diff --git a/docz/data/engines.xls b/docz/data/engines.xls
index b0b0ade..e6f06cd 100644
--- a/docz/data/engines.xls
+++ b/docz/data/engines.xls
@@ -77,9 +77,8 @@
✔ |
✔ |
✔ |
+ ✔ |
✔ |
- ✔ |
- |
Rhino |
@@ -147,9 +146,9 @@
✔ |
✔ |
✔ |
+ |
✔ |
- ✔ |
- ✔ |
+ |
Boa |
@@ -224,7 +223,7 @@
Engine |
- Binding |
+ Lang |
x64 |
ARM |
x64 |
@@ -249,7 +248,17 @@
|
|
|
+ ✔ |
|
+
+
+ Duktape |
+ Python |
+ ✔ |
+ |
+ |
+ |
+ ✔ |
|
@@ -261,16 +270,6 @@
✔ |
✔ |
-
- V8 |
- Python |
- ✔ |
- |
- |
- |
- |
- |
-
JSC |
Swift |
diff --git a/docz/docs/03-demos/01-math/21-pandas.md b/docz/docs/03-demos/01-math/21-pandas.md
new file mode 100644
index 0000000..ab1231e
--- /dev/null
+++ b/docz/docs/03-demos/01-math/21-pandas.md
@@ -0,0 +1,331 @@
+---
+title: Spreadsheet Data in Pandas
+sidebar_label: Python + Pandas
+description: Process structured data in Python with Pandas. Seamlessly integrate spreadsheets into your workflow with SheetJS. Analyze complex Excel spreadsheets with confidence.
+pagination_prev: demos/index
+pagination_next: demos/frontend/index
+---
+
+import current from '/version.js';
+import Tabs from '@theme/Tabs';
+import TabItem from '@theme/TabItem';
+import CodeBlock from '@theme/CodeBlock';
+
+Pandas[^1] is a Python software library for data analysis.
+
+[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing
+data from spreadsheets.
+
+This demo uses SheetJS to process data from a spreadsheet and translate to the
+Pandas DataFrame format. We'll explore how to load SheetJS from Python scripts,
+generate DataFrames from workbooks, and write DataFrames back to workbooks.
+
+The ["Complete Example"](#complete-example) includes a wrapper library that
+simplifies importing and exporting spreadsheets.
+
+:::info pass
+
+Pandas includes limited support for reading spreadsheets (`pandas.from_excel`)
+and writing XLSX spreadsheets (`pandas.DataFrame.to_excel`).
+
+**SheetJS supports common spreadsheet formats that Pandas cannot process.**
+
+SheetJS operations also offer more flexibility in processing complex worksheets.
+
+:::
+
+:::note Tested Environments
+
+This demo was tested in the following deployments:
+
+| Architecture | JS Engine | Pandas | Python | Date |
+|:-------------|:----------------|:-------|:-------|:-----------|
+| `darwin-x64` | Duktape `2.7.0` | 2.0.3 | 3.11.7 | 2024-01-29 |
+| `linux-x64` | Duktape `2.7.0` | 1.5.3 | 3.11.3 | 2024-01-29 |
+
+:::
+
+## Integration Details
+
+[`sheetjs.py`](pathname:///pandas/sheetjs.py) is a wrapper script that provides
+helper methods for reading and writing spreadsheets. Installation notes are
+included in the ["Complete Example"](#complete-example) section.
+
+### JS in Python
+
+JS code cannot be directly evaluated in Python implementations.
+
+To run JS code from Python, JavaScript engines[^2] can be embedded in Python
+modules or dynamically loaded using the `ctypes` foreign function library[^3].
+This demo uses `ctypes` with the [Duktape engine](/docs/demos/engines/duktape).
+
+### Wrapper
+
+The script exports a class named `SheetJSWrapper`. It is a context manager that
+initializes the Duktape engine and executes SheetJS scripts on entrance. All
+work should be performed in the context:
+
+```python title="Complete Example"
+#!/usr/bin/env python3
+from sheetjs import SheetJSWrapper
+
+with SheetJSWrapper() as sheetjs:
+
+ # Parse file
+ wb = sheetjs.read_file("pres.numbers")
+ print("Loaded file pres.numbers")
+
+ # Get first worksheet name
+ first_ws_name = wb.get_sheet_names()[0]
+ print(f"Reading from sheet {first_ws_name}")
+
+ # Generate DataFrame from first worksheet
+ df = wb.get_df(first_ws_name)
+ print(df.info())
+
+ # Export DataFrame to XLSB
+ sheetjs.write_df(df, "SheetJSPandas.xlsb", sheet_name="DataFrame")
+```
+
+### Reading Files
+
+`sheetjs.read_file` accepts a path to a spreadsheet file. It will parse the file
+and return an object representing the workbook.
+
+The `get_sheet_names` method of the workbook returns a list of sheet names.
+
+The `get_df` method of the workbook generates a DataFrame from the workbook. The
+specific sheet can be selected by passing the name.
+
+For example, the following code reads `pres.numbers` and generates a DataFrame
+from the second worksheet:
+
+```python title="Generating a DataFrame from the second worksheet"
+with SheetJSWrapper() as sheetjs:
+ # Parse file
+ wb = sheetjs.read_file(path)
+
+ # Generate DataFrame from second worksheet
+ ws_name = wb.get_sheet_names()[1]
+ df = wb.get_df(ws_name)
+
+ # Print metadata
+ print(df.info())
+```
+
+Under the hood, `sheetjs.py` performs the following steps:
+
+```mermaid
+flowchart LR
+ file[(workbook\nfile)]
+ subgraph SheetJS operations
+ bytes(Byte\nstring)
+ wb((SheetJS\nWorkbook))
+ csv(CSV\nstring)
+ end
+ subgraph Pandas operations
+ stream(CSV\nStream)
+ df[(Pandas\nDataFrame)]
+ end
+ file --> |`open`/`read`\nPython ops| bytes
+ bytes --> |`XLSX.read`\nParse Bytes| wb
+ wb --> |`sheet_to_csv`\nExtract Data| csv
+ csv --> |`StringIO`\nPython ops| stream
+ stream --> |`read_csv`\nParse CSV| df
+```
+
+1) Pure Python operations read the spreadsheet file and generate a byte string.
+
+2) SheetJS libraries parse the string and generate a clean CSV.
+
+- The `read` method[^4] parses file bytes into a SheetJS workbook object[^5]
+- After selecting a worksheet, `sheet_to_csv`[^6] generates a CSV string
+
+3) Python operations convert the CSV string to a stream object.[^7]
+
+4) The Pandas `read_csv` method[^8] ingests the stream and generate a DataFrame.
+
+### Writing Files
+
+`sheetjs.write_df` accepts a DataFrame and a path. It will attempt to export
+the data to a spreadsheet file.
+
+For example, the following code exports a DataFrame to `SheetJSPandas.xlsb`:
+
+```python title="Exporting a DataFrame to XLSB"
+with SheetJSWrapper() as sheetjs:
+ # Export DataFrame to XLSB
+ sheetjs.write_df(df, "SheetJSPandas.xlsb", sheet_name="DataFrame")
+```
+
+Under the hood, `sheetjs.py` performs the following steps:
+
+```mermaid
+flowchart LR
+ subgraph Pandas operations
+ df[(Pandas\nDataFrame)]
+ json(JSON\nString)
+ end
+ subgraph SheetJS operations
+ aoo(array of\nobjects)
+ wb((SheetJS\nWorkbook))
+ u8a(File\nbytes)
+ end
+ file[(workbook\nfile)]
+ df --> |`to_json`\nPandas ops| json
+ json --> |`JSON.parse`\nJS Engine| aoo
+ aoo --> |`json_to_sheet`\nSheetJS Ops| wb
+ wb --> |`XLSX.write`\nUint8Array| u8a
+ u8a --> |`open`/`write`\nPython ops| file
+```
+
+1) The Pandas DataFrame `to_json` method[^9] generates a JSON string.
+
+2) JS engine operations translate the JSON string to an array of objects.
+
+3) SheetJS libraries process the data array and generate file bytes.
+
+- The `json_to_sheet` method[^10] creates a SheetJS sheet object from the data.
+- The `book_new` method[^11] creates a SheetJS workbook that includes the sheet.
+- The `write` method[^12] generates the spreadsheet file bytes.
+
+4) Pure Python operations write the bytes to file.
+
+## Complete Example
+
+This example will extract data from an Apple Numbers spreadsheet and generate a
+DataFrame. The DataFrame will be exported to the binary XLSB spreadsheet format.
+
+0) Install Pandas:
+
+```bash
+sudo python3 -m pip install pandas
+```
+
+:::caution pass
+
+On Arch Linux-based platforms including the Steam Deck, the install may fail:
+
+```
+error: externally-managed-environment
+```
+
+In these situations, Pandas must be installed through the package manager:
+
+```bash
+sudo pacman -Syu python-pandas
+```
+
+:::
+
+1) Build the Duktape shared library:
+
+```bash
+curl -LO https://duktape.org/duktape-2.7.0.tar.xz
+tar -xJf duktape-2.7.0.tar.xz
+cd duktape-2.7.0
+make -f Makefile.sharedlibrary
+cd ..
+```
+
+2) Copy the shared library to the current folder. When the demo was last tested,
+the shared library file name differed by platform:
+
+| OS | name |
+|:-------|:--------------------------|
+| Darwin | `libduktape.207.20700.so` |
+| Linux | `libduktape.so.207.20700` |
+
+```bash
+cp duktape-*/libduktape.* .
+```
+
+3) Download the SheetJS Standalone script and move to the project directory:
+
+
+
+{`\
+curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js
+curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}
+
+
+4) Download the following test scripts and files:
+
+- [`pres.numbers` test file](https://sheetjs.com/pres.numbers)
+- [`sheetjs.py` script](pathname:///pandas/sheetjs.py)
+- [`SheetJSPandas.py` script](pathname:///pandas/SheetJSPandas.py)
+
+```bash
+curl -LO https://sheetjs.com/pres.numbers
+curl -LO https://docs.sheetjs.com/pandas/sheetjs.py
+curl -LO https://docs.sheetjs.com/pandas/SheetJSPandas.py
+```
+
+5) Edit the `sheetjs.py` script.
+
+The `lib` variable declares the path to the library:
+
+```python title="sheetjs.py (edit highlighted line)"
+# highlight-next-line
+lib = "libduktape.207.20700.so"
+```
+
+
+
+
+The name of the library is `libduktape.207.20700.so`:
+
+```python title="sheetjs.py (change highlighted line)"
+# highlight-next-line
+lib = "libduktape.207.20700.so"
+```
+
+
+
+
+The name of the library is `libduktape.so.207.20700`:
+
+```python title="sheetjs.py (change highlighted line)"
+# highlight-next-line
+lib = "libduktape.so.207.20700"
+```
+
+
+
+
+6) Run the script:
+
+```bash
+python3 SheetJSPandas.py pres.numbers
+```
+
+If successful, the script will display DataFrame metadata:
+
+```
+RangeIndex: 5 entries, 0 to 4
+Data columns (total 2 columns):
+ # Column Non-Null Count Dtype
+--- ------ -------------- -----
+ 0 Name 5 non-null object
+ 1 Index 5 non-null int64
+dtypes: int64(1), object(1)
+```
+
+It will also export the DataFrame to `SheetJSPandas.xlsb`. The file can be
+inspected with a spreadsheet editor that supports XLSB files.
+
+[^1]: The official documentation site is and the official distribution point is
+[^2]: See ["Other Languages"](/docs/demos/engines/) for more examples.
+[^3]: See [`ctypes`](https://docs.python.org/3/library/ctypes.html) in the Python documentation.
+[^4]: See [`read` in "Reading Files"](/docs/api/parse-options)
+[^5]: See ["Workbook Object"](/docs/csf/book)
+[^6]: See [`sheet_to_csv` in "Utilities"](/docs/api/utilities/csv#delimiter-separated-output)
+[^7]: See [the examples in "IO tools"](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html) in the Pandas documentation.
+[^8]: See [`pandas.read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html) in the Pandas documentation.
+[^9]: See [`pandas.DataFrame.to_json`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html) in the Pandas documentation.
+[^10]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input)
+[^11]: See [`book_new` in "Utilities"](/docs/api/utilities/wb)
+[^12]: See [`write` in "Writing Files"](/docs/api/write-options)
diff --git a/docz/docs/03-demos/42-engines/01-duktape.md b/docz/docs/03-demos/42-engines/01-duktape.md
index b578e80..d6c2832 100644
--- a/docz/docs/03-demos/42-engines/01-duktape.md
+++ b/docz/docs/03-demos/42-engines/01-duktape.md
@@ -361,11 +361,138 @@ This demo was tested in the following deployments:
| Architecture | Version | PHP Version | Date |
|:-------------|:--------|:------------|:-----------|
| `darwin-x64` | `2.7.0` | `8.3.2` | 2024-01-26 |
+| `linux-x64` | `2.7.0` | `8.2.7` | 2024-01-29 |
:::
0) Ensure `php` is installed and available on the system path.
+1) Inspect the `php.ini` configuration file. The location of the file can be
+found by running `php --ini`. The following output is from the last macOS test:
+
+```text pass
+Configuration File (php.ini) Path: /usr/local/etc/php/8.3
+// highlight-next-line
+Loaded Configuration File: /usr/local/etc/php/8.3/php.ini
+Scan for additional .ini files in: /usr/local/etc/php/8.3/conf.d
+Additional .ini files parsed: /usr/local/etc/php/8.3/conf.d/ext-opcache.ini
+```
+
+The following line should appear in the configuration:
+
+```ini title="php.ini (add to end)"
+extension=ffi
+```
+
+If this line is prefixed with a `;`, remove the semicolon. If this line does not
+appear in the file, add it to the end.
+
+2) Build the Duktape shared library:
+
+```bash
+curl -LO https://duktape.org/duktape-2.7.0.tar.xz
+tar -xJf duktape-2.7.0.tar.xz
+cd duktape-2.7.0
+make -f Makefile.sharedlibrary
+cd ..
+```
+
+3) Copy the shared library to the current folder. When the demo was last tested,
+the shared library file name differed by platform:
+
+| OS | name |
+|:-------|:--------------------------|
+| Darwin | `libduktape.207.20700.so` |
+| Linux | `libduktape.so.207.20700` |
+
+```bash
+cp duktape-*/libduktape.* .
+```
+
+4) Download the SheetJS Standalone script, shim script and test file. Move all
+three files to the project directory:
+
+
+
+{`\
+curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js
+curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js
+curl -LO https://sheetjs.com/pres.numbers`}
+
+
+5) Download [`SheetJSDuk.php`](pathname:///duk/SheetJSDuk.php):
+
+```bash
+curl -LO https://docs.sheetjs.com/duk/SheetJSDuk.php
+```
+
+6) Edit the `SheetJSDuk.php` script.
+
+The `$sofile` variable declares the path to the library:
+
+```php title="SheetJSDuk.php (edit highlighted line)"
+
+
+
+The name of the library is `libduktape.207.20700.so`:
+
+```php title="SheetJSDuk.php (change highlighted line)"
+// highlight-next-line
+$sofile = './libduktape.207.20700.so';
+```
+
+
+
+
+The name of the library is `libduktape.so.207.20700`:
+
+```php title="SheetJSDuk.php (change highlighted line)"
+// highlight-next-line
+$sofile = './libduktape.so.207.20700';
+```
+
+
+
+
+7) Run the script:
+
+```bash
+php SheetJSDuk.php pres.numbers
+```
+
+If the program succeeded, the CSV contents will be printed to console and the
+file `sheetjsw.xlsb` will be created. That file can be opened with Excel.
+
+### Python
+
+There is no official Python binding to the Duktape library. Instead, this demo
+uses the raw `ctypes` interface[^2] to the Duktape shared library.
+
+#### Python Demo
+
+:::note Tested Deployments
+
+This demo was tested in the following deployments:
+
+| Architecture | Version | Python | Date |
+|:-------------|:--------|:---------|:-----------|
+| `darwin-x64` | `2.7.0` | `3.11.7` | 2024-01-29 |
+| `linux-x64` | `2.7.0` | `3.11.3` | 2024-01-29 |
+
+:::
+
+0) Ensure `python` is installed and available on the system path.
+
1) Build the Duktape shared library:
```bash
@@ -377,10 +504,15 @@ cd ..
```
2) Copy the shared library to the current folder. When the demo was last tested,
-the file name was `libduktape.207.20700.so`:
+the shared library file name differed by platform:
+
+| OS | name |
+|:-------|:--------------------------|
+| Darwin | `libduktape.207.20700.so` |
+| Linux | `libduktape.so.207.20700` |
```bash
-cp duktape-*/libduktape.*.so .
+cp duktape-*/libduktape.* .
```
3) Download the SheetJS Standalone script, shim script and test file. Move all
@@ -398,16 +530,50 @@ curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js
curl -LO https://sheetjs.com/pres.numbers`}
-4) Download [`SheetJSDuk.php`](pathname:///duk/SheetJSDuk.php):
+4) Download [`SheetJSDuk.py`](pathname:///duk/SheetJSDuk.py):
```bash
-curl -LO https://docs.sheetjs.com/duk/SheetJSDuk.php
+curl -LO https://docs.sheetjs.com/duk/SheetJSDuk.py
```
-5) Run the script:
+5) Edit the `SheetJSDuk.py` script.
+
+The `lib` variable declares the path to the library:
+
+```python title="SheetJSDuk.py (edit highlighted line)"
+#!/usr/bin/env python3
+
+# highlight-next-line
+lib = "libduktape.207.20700.so"
+```
+
+
+
+
+The name of the library is `libduktape.207.20700.so`:
+
+```python title="SheetJSDuk.py (change highlighted line)"
+# highlight-next-line
+lib = "libduktape.207.20700.so"
+```
+
+
+
+
+The name of the library is `libduktape.so.207.20700`:
+
+```python title="SheetJSDuk.py (change highlighted line)"
+# highlight-next-line
+lib = "libduktape.so.207.20700"
+```
+
+
+
+
+6) Run the script:
```bash
-php SheetJSDuk.php pres.numbers
+python3 SheetJSDuk.py pres.numbers
```
If the program succeeded, the CSV contents will be printed to console and the
@@ -520,4 +686,5 @@ sudo cpan install File::Slurp
:::
-[^1]: See [Foreign Function Interface](https://www.php.net/manual/en/book.ffi.php) in the PHP documentation.
\ No newline at end of file
+[^1]: See [Foreign Function Interface](https://www.php.net/manual/en/book.ffi.php) in the PHP documentation.
+[^2]: See [`ctypes`](https://docs.python.org/3/library/ctypes.html) in the Python documentation.
\ No newline at end of file
diff --git a/docz/docs/03-demos/42-engines/14-pandas.md b/docz/docs/03-demos/42-engines/14-pandas.md
deleted file mode 100644
index d3e29bd..0000000
--- a/docz/docs/03-demos/42-engines/14-pandas.md
+++ /dev/null
@@ -1,440 +0,0 @@
----
-title: Spreadsheet Data in Pandas
-sidebar_label: Python (Pandas)
-description: Process structured data in Python with Pandas. Seamlessly integrate spreadsheets into your workflow with SheetJS. Analyze complex Excel spreadsheets with confidence.
-pagination_prev: demos/cloud/index
-pagination_next: demos/bigdata/index
----
-
-import current from '/version.js';
-import Tabs from '@theme/Tabs';
-import TabItem from '@theme/TabItem';
-import CodeBlock from '@theme/CodeBlock';
-
-Pandas[^1] is a Python software library for data analysis.
-
-[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing
-data from spreadsheets.
-
-This demo uses SheetJS to process data from a spreadsheet and translate to the
-Pandas DataFrame format. We'll explore how to load SheetJS from Python scripts,
-generate DataFrames from workbooks, and write DataFrames back to workbooks.
-
-:::note
-
-This demo was tested in the following deployments:
-
-| Architecture | V8 version | Pandas | Python | Date |
-|:-------------|:--------------|:-------|:-------|:-----------|
-| `darwin-x64` | `11.5.150.16` | 2.0.3 | 3.11.4 | 2023-07-29 |
-
-:::
-
-:::info pass
-
-Pandas includes limited support for reading spreadsheets (`pandas.from_excel`)
-and writing XLSX spreadsheets (`pandas.DataFrame.to_excel`).
-
-The SheetJS approach supports many common spreadsheet formats that are not
-supported by the current set of Pandas codecs and offers greater flexibility in
-processing complex worksheets.
-
-:::
-
-## Integration Details
-
-JS code cannot literally be run in the Python interpreter. To run JS code from
-Python, JavaScript engines[^2] can be embedded in CPython modules.
-
-### Loading SheetJS
-
-This demo uses the `STPyV8` module[^3] to access the V8 JavaScript engine.
-
-_Initialize V8_
-
-The engine library provides a convenient context manager `JSContext` for context
-resource management. Within the context, the `eval` method can evaluate code:
-
-```py
-from STPyV8 import JSContext
-
-# Initialize JS context
-with JSContext() as ctxt:
- # Run code
- res = ctxt.eval("'Sheet' + 'JS'")
-
- # print result
- print(res)
-```
-
-`STPyV8` handles data interchange for common types. Arrays and JS objects can be
-translated to Python `list` and `dict` respectively. The following `convert`
-function is used in the test suite[^4]
-
-```py
-# from `tests/test_Wrapper.py` in the STPyV8 library
-# License: Apache 2.0
-def convert(obj):
- if isinstance(obj, JSArray):
- return [convert(v) for v in obj]
- if isinstance(obj, JSObject):
- return dict([[str(k), convert(obj.__getattr__(str(k)))] for k in obj.__dir__()])
- return obj
-```
-
-_Loading the Library_
-
-The [SheetJS Standalone scripts](/docs/getting-started/installation/standalone)
-can be parsed and evaluated from the JS engine. Once evaluated, the `XLSX`
-variable is available as a global.
-
-Assuming the standalone library is in the same directory as the source file,
-the script can be evaluated with `eval`:
-
-```py
- # Within a JSContext, open `xlsx.full.min.js` and evaluate
- with open("xlsx.full.min.js") as f:
- ctxt.eval(f.read())
-```
-
-### Reading Files
-
-The following diagram depicts the spreadsheet salsa:
-
-```mermaid
-flowchart LR
- file[(workbook\nfile)]
- subgraph SheetJS operations
- base64(Base64\nstring)
- wb((SheetJS\nWorkbook))
- aoo(array of\nobjects)
- end
- subgraph Pandas operations
- lod(list of\nrecords)
- df[(Pandas\nDataFrame)]
- end
- file --> |`open`/`read`\nPython ops| base64
- base64 --> |`XLSX.read`\nParse Bytes| wb
- wb --> |`sheet_to_json`\nExtract Data| aoo
- aoo --> |`convert`\nPython ops|lod
- lod --> |`from_records`\nPandas ops| df
-```
-
-At a high level:
-
-1) Pure Python operations read the file and generate a Base64 string
-
-2) SheetJS libraries parse the string and generates JS records
-
-3) JS engine operations translate the rows to Python `list` of `dicts`
-
-4) Pandas operations translate the Python data to a DataFrame
-
-#### Read files
-
-The safest format for data interchange is Base64-encoded strings:
-
-```py
-from base64 import b64encode
-
-with open(path, mode="rb") as f:
- file_bytes = f.read()
- b64 = b64encode(file_bytes)
-```
-
-#### Parse bytes
-
-From JS code, `XLSX.read`[^5] parses the Base64 string
-
-```py
-wb = ctxt.eval("(b64 => XLSX.read(b64, {type: 'base64', dense: true}))")(b64)
-```
-
-The `wb` object follows the "Common Spreadsheet Format"[^6], an in-memory format
-for representing workbooks, worksheets, cells, and spreadsheet features.
-
-#### Get First Worksheet
-
-As explained in the "Workbook Object"[^7] section:
-- the `SheetNames` property is a ordered list of the sheet names in the workbook
-- the `Sheets` property of the workbook object is an object whose keys are sheet
- names and whose values are sheet objects.
-
-For use in Python, the `SheetNames` array must be converted to a `list`:
-
-```py
-sheet_names = convert(wb.SheetNames)
-first_sheet_name = sheet_names[0]
-```
-
-Since utility functions will process the worksheet object from JavaScript, it is
-preferable not to convert the object:
-
-```py
-first_sheet = wb.Sheets[first_sheet_name] # do not convert
-```
-
-#### Generate List of Records
-
-In JavaScript, the equivalent of the "`list` of `dict`s" or "`list` of records"
-is "array of objects". They can be created with `XLSX.utils.sheet_to_json`[^8]:
-
-```py
-rows = convert(ctxt.eval("(ws => XLSX.utils.sheet_to_json(ws))")(first_sheet))
-```
-
-#### Generate Pandas DataFrame
-
-`rows` is a `list` of `dict` objects. `from_records`[^9] understands this data
-shape and generates a proper DataFrame:
-
-```py
-df = pd.DataFrame.from_records(rows)
-```
-
-### Writing Files
-
-The writing process looks similar to the reading process in reverse:
-
-```mermaid
-flowchart LR
- subgraph Pandas operations
- df[(Pandas\nDataFrame)]
- json(JSON\nString)
- end
- subgraph SheetJS operations
- aoo(array of\nobjects)
- wb((SheetJS\nWorkbook))
- base64(Base64\nstring)
- end
- file[(workbook\nfile)]
- df --> |`to_json`\nPandas ops| json
- json --> |`JSON.parse`\nJS Engine| aoo
- aoo --> |`json_to_sheet`\nSheetJS Ops| wb
- wb --> |`XLSX.write`\nBase64| base64
- base64 --> |`open`/`write`\nPython ops| file
-```
-
-At a high level:
-
-1) Pandas operations translate the Python data to JSON string
-
-2) JS engine operations translate the JSON string to an array of objects
-
-3) SheetJS libraries parse the array and generate a Base64-encoded workbook
-
-4) Pure Python operations decode the Base64 string and write the bytes to file.
-
-#### Generate JSON
-
-`DataFrame#to_json`[^10] with the option `orient="records"` generates a JSON
-string that encodes an array of objects:
-
-```py
-json = df.to_json(orient="records")
-```
-
-#### Generate Worksheet
-
-In JavaScript, `JSON.parse` will interpret the string as an array of objects.
-`XLSX.utils.json_to_sheet`[^11] generates a SheetJS worksheet object:
-
-```py
-sheet = ctxt.eval("(json => XLSX.utils.json_to_sheet(JSON.parse(json)) )")(json)
-```
-
-#### Export Enhancements
-
-At this point, there are many options for improving the appearance of the sheet.
-For example, the "Export Tutorial"[^12] shows how to adjust column widths.
-
-:::tip pass
-
-[SheetJS Pro](https://sheetjs.com/pro) offers additional styling options such as
-cell styling and frozen rows.
-
-"Pro Edit" offers a special approach for inserting data into an existing file.
-
-:::
-
-#### Generate Workbook
-
-`XLSX.utils.book_new`[^13] creates a new workbook and `XLSX.utils.book_append_sheet`[^14]
-appends a worksheet to the workbook. The new worksheet will be called "Export":
-
-:::note pass
-
-The code in the string literal is reproduced below:
-
-```js
-(ws, name) => {
- const wb = XLSX.utils.book_new();
- XLSX.utils.book_append_sheet(wb, ws, name);
- return wb;
-}
-```
-
-:::
-
-```py
-book = ctxt.eval("""((ws, name) => {
- const wb = XLSX.utils.book_new();
- XLSX.utils.book_append_sheet(wb, ws, name);
- return wb;
-})""")(sheet, "Export")
-```
-
-#### Generate File
-
-`XLSX.write`[^15] with the option `type: "base64"` attempts to create a file and
-generate a Base64 string:
-
-```py
-b64 = ctxt.eval("(wb => XLSX.write(wb, {type:'base64', bookType:'xls'}))")(book)
-```
-
-With the Base64 string, standard Python operations can create a file:
-
-```py
-from base64 import b64decode
-
-raw = b64decode(b64)
-with open("export.xls", mode="wb") as f:
- f.write(raw)
-```
-
-## Complete Demo
-
-This example will extract data from an Apple Numbers spreadsheet and generate a
-DataFrame. The DataFrame will be exported to a legacy XLS spreadsheet.
-
-### Engine Setup
-
-0) Follow the official installation instructions[^16].
-
-Instructions for macOS 12 (click to show)
-
-- Install `boost-python3` package using `brew`:
-
-```bash
-brew install boost-python3
-```
-
-- Identify python version:
-
-```bash
-python3 --version
-```
-
-:::note pass
-
-When the demo was last tested, the version was `3.11.4`
-
-:::
-
-- [Download latest release](https://github.com/cloudflare/stpyv8/releases)
-
-```bash
-curl -LO https://github.com/cloudflare/stpyv8/releases/download/v11.5.150.16/stpyv8-macos-12-python-3.11.zip
-```
-
-- Extract ZIP file and enter folder
-
-```bash
-unzip stpyv8-macos-12-python-3.11.zip
-cd stpyv8-macos-12-3.11
-```
-
-- Move `icudtl.dat` to `/Library/Application Support/STPyV8/`:
-
-```bash
-sudo mkdir -p /Library/Application\ Support/STPyV8
-sudo mv icudtl.dat /Library/Application\ Support/STPyV8/
-```
-
-- Install wheel:
-
-```bash
-sudo python3 -m pip install --upgrade *.whl
-cd ..
-```
-
-
-
-### Demo
-
-1) Download the SheetJS Standalone script and move to the project directory:
-
-
-
-{`\
-curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}
-
-
-2) Install Pandas. On macOS:
-
-```python
-sudo python3 -m pip install pandas
-```
-
-3) Download the following test scripts and files:
-
-- [`pres.numbers` test file](https://sheetjs.com/pres.numbers)
-- [`sheetjs.py` wrapper](pathname:///pandas/sheetjs.py)
-- [`SheetJSPandas.py` script](pathname:///pandas/SheetJSPandas.py)
-
-```bash
-curl -LO https://sheetjs.com/pres.numbers
-curl -LO https://docs.sheetjs.com/pandas/sheetjs.py
-curl -LO https://docs.sheetjs.com/pandas/SheetJSPandas.py
-```
-
-4) Run the script:
-
-```bash
-python3 SheetJSPandas.py pres.numbers
-```
-
-If successful, it will display data rows in the file:
-
-```
-Reading from sheet Sheet1
-{'Name': 'Bill Clinton', 'Index': 42}
-{'Name': 'GeorgeW Bush', 'Index': 43}
-{'Name': 'Barack Obama', 'Index': 44}
-{'Name': 'Donald Trump', 'Index': 45}
-{'Name': 'Joseph Biden', 'Index': 46}
-```
-
-If Pandas is installed, the script will display DataFrame metadata:
-
-```
-RangeIndex: 5 entries, 0 to 4
-Data columns (total 2 columns):
- # Column Non-Null Count Dtype
---- ------ -------------- -----
- 0 Name 5 non-null object
- 1 Index 5 non-null int64
-dtypes: int64(1), object(1)
-```
-
-It will also export to `pres.xls`. The file can be read in a spreadsheet editor.
-
-[^1]: The official documentation site is and the official distribution point is
-[^2]: See ["Other Languages"](/docs/demos/engines/) for more examples.
-[^3]: [`STPyV8`](https://github.com/cloudflare/stpyv8) is a fork of the original [`PyV8` project](https://pypi.org/project/PyV8/). It is available under the permissive Apache 2.0 License. Special thanks to Flier Lu and CloudFlare!
-[^4]: See [`tests/test_Wrapper.py`](https://github.com/cloudflare/stpyv8/blob/410b31abe7a103b408d362cb872ce81604281c48/tests/test_Wrapper.py#L15) in the `STPyV8` code repository.
-[^5]: See [`read` in "Reading Files"](/docs/api/parse-options)
-[^6]: See ["SheetJS Data Model"](/docs/csf/)
-[^7]: See ["Workbook Object"](/docs/csf/book)
-[^8]: See [`sheet_to_json` in "Utilities"](/docs/api/utilities/array#array-output)
-[^9]: See [`pandas.DataFrame.from_records`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.from_records.html) in the Pandas documentation.
-[^10]: See [`pandas.DataFrame.to_json`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html) in the Pandas documentation.
-[^11]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input)
-[^12]: See ["Clean up Workbook"](/docs/getting-started/examples/export#clean-up-workbook) in "Export Tutorial".
-[^13]: See [`book_new` in "Utilities"](/docs/api/utilities/wb)
-[^14]: See [`book_append_sheet` in "Utilities"](/docs/api/utilities/wb)
-[^15]: See [`write` in "Writing Files"](/docs/api/write-options)
-[^16]: See ["Installing"](https://github.com/cloudflare/stpyv8#installing) in the `STPyV8` project documentation
\ No newline at end of file
diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js
index 072f32c..b1049dd 100644
--- a/docz/docusaurus.config.js
+++ b/docz/docusaurus.config.js
@@ -146,7 +146,7 @@ const config = {
prism: {
theme: lightCodeTheme,
darkTheme: darkCodeTheme,
- additionalLanguages: [ "visual-basic", "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata" ],
+ additionalLanguages: [ "visual-basic", "swift", "java", "php", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata" ],
},
liveCodeBlock: {
playgroundPosition: 'top'
@@ -244,6 +244,7 @@ const config = {
/* math */
{ from: '/docs/demos/ml', to: '/docs/demos/math/' },
{ from: '/docs/demos/bigdata/ml', to: '/docs/demos/math/' },
+ { from: '/docs/demos/engines/pandas', to: '/docs/demos/math/pandas/' },
/* installation */
{ from: '/docs/installation/standalone', to: '/docs/getting-started/installation/standalone/' },
{ from: '/docs/installation/frameworks', to: '/docs/getting-started/installation/frameworks/' },
diff --git a/docz/static/duk/SheetJSDuk.php b/docz/static/duk/SheetJSDuk.php
index 40bb0b1..754bd4d 100644
--- a/docz/static/duk/SheetJSDuk.php
+++ b/docz/static/duk/SheetJSDuk.php
@@ -1,5 +1,7 @@
duk_create_heap(null, null, null, null, null); }
function duk_eval_string_noresult($context, $cmd) { global $ffi; return $ffi->duk_eval_raw($context, $cmd, 0, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) ); }
@@ -63,7 +65,6 @@ function save_file($context, $path, $var) {
fclose($fh);
}
-
function DOIT($cmd) { global $context; return duk_eval_string_noresult($context, $cmd); }
/* initialize */
diff --git a/docz/static/duk/SheetJSDuk.pl b/docz/static/duk/SheetJSDuk.pl
new file mode 100644
index 0000000..ef996bc
--- /dev/null
+++ b/docz/static/duk/SheetJSDuk.pl
@@ -0,0 +1,33 @@
+# usage: perl SheetJSDuk.pl path/to/file
+use JavaScript::Duktape::XS;
+use File::Slurp;
+use MIME::Base64 qw( encode_base64 decode_base64 );
+
+# Initialize
+my $js = JavaScript::Duktape::XS->new({ max_memory_bytes => 256 * 1024 * 1024 });
+$js->eval("var global = (function(){ return this; }).call(null);");
+
+# Load the ExtendScript build
+my $src = read_file('xlsx.extendscript.js', { binmode => ':raw' });
+$src =~ s/^\xEF\xBB\xBF//;
+my $XLSX = $js->eval($src);
+
+# Print version number
+$js->set('log' => sub { print $_[0], "\n"; });
+$js->eval("log('SheetJS library version ' + XLSX.version);");
+
+# Parse File
+my $raw_data = encode_base64(read_file($ARGV[0], { binmode => ':raw' }), "");
+$js->set("b64", $raw_data);
+$js->eval(qq{
+ global.wb = XLSX.read(b64, {type: "base64", WTF:1});
+ global.ws = wb.Sheets[wb.SheetNames[0]];
+ void 0;
+});
+
+# Print first worksheet CSV
+$js->eval('log(XLSX.utils.sheet_to_csv(global.ws))');
+
+# Write XLSB file
+my $xlsb = $js->eval("XLSX.write(global.wb, {type:'base64', bookType:'xlsb'})");
+write_file("SheetJSDuk.xlsb", decode_base64($xlsb));
\ No newline at end of file
diff --git a/docz/static/duk/SheetJSDuk.py b/docz/static/duk/SheetJSDuk.py
new file mode 100644
index 0000000..e7ac73d
--- /dev/null
+++ b/docz/static/duk/SheetJSDuk.py
@@ -0,0 +1,103 @@
+#!/usr/bin/env python3
+
+lib = "libduktape.207.20700.so"
+
+from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t
+
+duk = CDLL(lib)
+
+def str_to_c(s):
+ b = s.encode("utf8")
+ return [c_char_p(b), len(b)]
+
+def duk_create_heap_default():
+ duk.duk_create_heap.restype = c_void_p
+ return duk.duk_create_heap(None, None, None, None, None)
+
+def duk_eval_string_noresult(ctx, cmd):
+ [s, l] = str_to_c(cmd)
+ return duk.duk_eval_raw(ctx, s, l, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) )
+
+def duk_eval_string(ctx, cmd):
+ [s, l] = str_to_c(cmd)
+ return duk.duk_eval_raw(ctx, s, l, 0 | (1<<3) | (1<<9) | (1<<10) | (1<<11) )
+
+def duk_peval(ctx):
+ return duk.duk_eval_raw(ctx, None, 0, 1 | (1<<3) | (1<<7) | (1<<11) )
+
+def duk_get_string(ctx, idx):
+ duk.duk_get_string.restype = c_char_p
+ retval = duk.duk_get_string(ctx, idx)
+ return retval.decode("utf8")
+
+def eval_file(ctx, path):
+ with open(path, "r") as f:
+ code = f.read()
+ [s, l] = str_to_c(code)
+
+ duk.duk_push_lstring(ctx, s, l)
+ retval = duk_peval(ctx)
+ duk.duk_pop(ctx)
+ return retval
+
+def load_file(ctx, path, var):
+ with open(path, "rb") as f:
+ data = f.read()
+ ptr = c_char_p(data)
+ duk.duk_push_buffer_raw(ctx, 0, 1 | 2)
+ duk.duk_config_buffer(ctx, -1, ptr, len(data))
+ duk.duk_put_global_string(ctx, str_to_c(var)[0])
+ return data
+
+def save_file(ctx, path, var):
+ duk.duk_get_global_string(ctx, str_to_c(var)[0])
+ sz = c_size_t()
+ duk.duk_get_buffer_data.restype = c_void_p
+ buf = duk.duk_get_buffer_data(ctx, -1, byref(sz))
+ s = string_at(buf, sz.value)
+ with open(path, "wb") as f:
+ f.write(s)
+
+def process(path):
+ # initialize
+ context = duk_create_heap_default()
+ ctx = c_void_p(context)
+
+ def DOIT(cmd):
+ return duk_eval_string_noresult(ctx, cmd)
+
+ # duktape does not expose a standard "global" by default
+ DOIT("var global = (function(){ return this; }).call(null);")
+
+ # load library
+ eval_file(ctx, "shim.min.js")
+ eval_file(ctx, "xlsx.full.min.js")
+
+ # get version string
+ duk_eval_string(ctx, "XLSX.version")
+ print("SheetJS Library Version %s" % (duk_get_string(ctx, -1)))
+ duk.duk_pop(ctx)
+
+ # read file
+ # NOTE: data is captured here to avoid GC
+ data = load_file(ctx, path, "buf")
+ print("Loaded file %s" % (path))
+
+ # parse workbook
+ DOIT("wb = XLSX.read(buf.slice(0, buf.length));")
+ DOIT("ws = wb.Sheets[wb.SheetNames[0]]")
+
+ # print CSV
+ duk_eval_string(ctx, "XLSX.utils.sheet_to_csv(ws)")
+ print(duk_get_string(ctx, -1))
+ duk.duk_pop(ctx)
+
+ DOIT("newbuf = (XLSX.write(wb, {type:'buffer', bookType:'xlsb'}));")
+ save_file(ctx, "sheetjsw.xlsb", "newbuf")
+
+ duk.duk_destroy_heap(ctx)
+ return 0
+
+if("__main__" == __name__):
+ from sys import argv
+ process(argv[1])
\ No newline at end of file
diff --git a/docz/static/pandas/SheetJSPandas.py b/docz/static/pandas/SheetJSPandas.py
index 6652c8b..c624671 100644
--- a/docz/static/pandas/SheetJSPandas.py
+++ b/docz/static/pandas/SheetJSPandas.py
@@ -1,41 +1,25 @@
-from sheetjs import SheetJS
-from sys import argv, exit
+#!/usr/bin/env python3
-test_pandas = True
-try:
- import pandas as pd
-except:
- test_pandas = False
+from sheetjs import SheetJSWrapper
-# Parse file and generate row objects
-with SheetJS() as sheetjs:
- # Print library version number
- print(f"SheetJS Version {sheetjs.version()}")
+def process(path):
+ with SheetJSWrapper() as sheetjs:
- # Read and parse data from file
- wb = sheetjs.read_file(argv[1])
+ # Parse file
+ wb = sheetjs.read_file(path)
+ print(f"Loaded file {path}")
- # Get first worksheet name
- wsname = wb.sheet_names()[0]
- print(f"Reading from sheet {wsname}")
+ # Get first worksheet name
+ names = wb.get_sheet_names()
+ print(f"Reading from sheet {names[0]}")
- # Get data from first sheet
- ws = wb.get_sheet(wsname)
- rows = ws.get_rows()
- for row in rows: print(row)
+ # Generate DataFrame from first worksheet
+ df = wb.get_df()
+ print(df.info())
-if not test_pandas:
- print("Pandas could not be loaded, skipping tests")
- exit()
+ # Export DataFrame to XLSB
+ sheetjs.write_df(df, "SheetJSPandas.xlsb", sheet_name="DataFrame")
-print("\n## Pandas DataFrame\n")
-
-# generate dataframe
-df = pd.DataFrame.from_records(rows)
-print(df.info())
-
-outf="pres.xls"
-print(f"\n## Writing to {outf}\n")
-# write JSON string to XLS worksheet
-with SheetJS() as sheetjs:
- sheetjs.book_from_df(df).to_file(outf)
+if("__main__" == __name__):
+ from sys import argv
+ process(argv[1])
diff --git a/docz/static/pandas/sheetjs.py b/docz/static/pandas/sheetjs.py
index 8dc265f..a4c3bc1 100644
--- a/docz/static/pandas/sheetjs.py
+++ b/docz/static/pandas/sheetjs.py
@@ -1,136 +1,154 @@
-from base64 import b64encode, b64decode
-from contextlib import contextmanager
-from STPyV8 import JSContext, JSArray, JSObject
-from functools import wraps
-from os.path import splitext
+lib = "libduktape.207.20700.so"
-def to_py(method):
- # `convert` from STPyV8 tests/test_Wrapper.py
- def convert(obj):
- if isinstance(obj, JSArray):
- return [convert(v) for v in obj]
- if isinstance(obj, JSObject):
- return dict([[str(k), convert(obj.__getattr__(str(k)))] for k in obj.__dir__()])
- return obj
+from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t
+from json import loads
+from io import StringIO
+from pandas import read_csv
- @wraps(method)
- def func(self, *args, **kwargs):
- res = method(self, *args, **kwargs)
- return convert(res)
- return func
+duk = CDLL(lib)
-class SheetJSWorksheet:
- ws = None
- ctxt = None
+def str_to_c(s):
+ b = s.encode("utf8")
+ return [c_char_p(b), len(b)]
- def __init__(self, ctxt, ws):
- self.ctxt = ctxt
- self.ws = ws
+def duk_create_heap_default():
+ duk.duk_create_heap.restype = c_void_p
+ return duk.duk_create_heap(None, None, None, None, None)
- def js(self): return self.ws
+def duk_eval_string_noresult(ctx, cmd):
+ [s, l] = str_to_c(cmd)
+ return duk.duk_eval_raw(ctx, s, l, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) )
- @to_py
- def get_rows(self):
- return self.ctxt.eval("(ws => XLSX.utils.sheet_to_json(ws))")(self.ws)
+def duk_eval_string(ctx, cmd):
+ [s, l] = str_to_c(cmd)
+ return duk.duk_eval_raw(ctx, s, l, 0 | (1<<3) | (1<<9) | (1<<10) | (1<<11) )
-class SheetJSWorkbook:
- wb = None
- ctxt = None
+def duk_peval(ctx):
+ return duk.duk_eval_raw(ctx, None, 0, 1 | (1<<3) | (1<<7) | (1<<11) )
- def __init__(self, ctxt, wb):
- self.ctxt = ctxt
+def duk_get_string(ctx, idx):
+ duk.duk_get_string.restype = c_char_p
+ retval = duk.duk_get_string(ctx, idx)
+ return retval.decode("utf8")
+
+def eval_file(ctx, path):
+ with open(path, "r") as f:
+ code = f.read()
+ [s, l] = str_to_c(code)
+
+ duk.duk_push_lstring(ctx, s, l)
+ retval = duk_peval(ctx)
+ duk.duk_pop(ctx)
+ return retval
+
+def load_file(ctx, path, var):
+ with open(path, "rb") as f:
+ data = f.read()
+ ptr = c_char_p(data)
+ duk.duk_push_buffer_raw(ctx, 0, 1 | 2)
+ duk.duk_config_buffer(ctx, -1, ptr, len(data))
+ duk.duk_put_global_string(ctx, str_to_c(var)[0])
+ return data
+
+def save_file(ctx, path, var):
+ duk.duk_get_global_string(ctx, str_to_c(var)[0])
+ sz = c_size_t()
+ duk.duk_get_buffer_data.restype = c_void_p
+ buf = duk.duk_get_buffer_data(ctx, -1, byref(sz))
+ s = string_at(buf, sz.value)
+ with open(path, "wb") as f:
+ f.write(s)
+
+def initialize():
+ # initialize
+ context = duk_create_heap_default()
+ ctx = c_void_p(context)
+
+ # duktape does not expose a standard "global" by default
+ duk_eval_string_noresult(ctx, "var global = (function(){ return this; }).call(null);")
+
+ # load library
+ eval_file(ctx, "shim.min.js")
+ eval_file(ctx, "xlsx.full.min.js")
+
+ # get version string
+ duk_eval_string(ctx, "XLSX.version")
+ print(f"SheetJS Library Version {duk_get_string(ctx, -1)}")
+ duk.duk_pop(ctx)
+ return [context, ctx]
+
+def parse_file(ctx, path, name):
+ # read file
+ # NOTE: data is captured here to avoid GC
+ data = load_file(ctx, path, "buf")
+
+ # parse workbook
+ duk_eval_string_noresult(ctx, f"{name} = XLSX.read(buf.slice(0, buf.length));")
+
+def get_sheet_names(ctx, wb):
+ duk_eval_string(ctx, f"JSON.stringify({wb}.SheetNames)")
+ wsnames = duk_get_string(ctx, -1)
+ names = loads(wsnames)
+ duk.duk_pop(ctx)
+ return names
+
+def get_csv_from_wb(ctx, wb, sheet_name=None):
+ if not sheet_name: sheet_name = f"{wb}.SheetNames[0]"
+ else: sheet_name = f"'{sheet_name}'"
+ duk_eval_string(ctx, f"XLSX.utils.sheet_to_csv({wb}.Sheets[{sheet_name}])")
+ csv = duk_get_string(ctx, -1)
+ duk.duk_pop(ctx)
+ return csv
+
+def export_df_to_wb(ctx, df, path, sheet_name="Sheet1", book_type=None):
+ json = df.to_json(orient="records")
+ [s, l] = str_to_c(json)
+ duk.duk_push_lstring(ctx, s, l)
+ duk.duk_put_global_string(ctx, str_to_c("json")[0])
+ if not book_type: book_type = path.split(".")[-1]
+ duk_eval_string_noresult(ctx, f"""
+ aoo = JSON.parse(json);
+ newws = XLSX.utils.json_to_sheet(aoo);
+ newwb = XLSX.utils.book_new(newws, '{sheet_name}');
+ newbuf = XLSX.write(newwb, {{type:'buffer', bookType:'{book_type}'}});
+ """)
+ save_file(ctx, path, "newbuf")
+
+def get_df_from_wb(ctx, wb, sheet_name=None):
+ csv = get_csv_from_wb(ctx, wb, sheet_name)
+ return read_csv(StringIO(csv))
+
+class SheetJSWorkbook(object):
+ def __init__(self, sheetjs, wb):
+ self.ctx = sheetjs.ctx
self.wb = wb
- def js(self): return self.wb
+ def get_sheet_names(self):
+ return get_sheet_names(self.ctx, self.wb)
- @to_py
- def sheet_names(self):
- return self.wb.SheetNames
-
- def get_sheet(self, name):
- return SheetJSWorksheet(self.ctxt, self.wb.Sheets[name])
-
- def to_file(self, path, book_type=""):
- b64ify = self.ctxt.eval("((wb, bT) => XLSX.write(wb, {type:'base64', bookType:bT}))")
- if not book_type: book_type = splitext(path)[1][1:]
- b64 = b64ify(self.wb, book_type)
- raw = b64decode(b64)
- with open(path, mode="wb") as f:
- f.write(raw)
-
-class SheetJSWrapper:
- ctxt = None
+ def get_df(self, sheet_name=None):
+ if sheet_name is None: sheet_name = self.get_sheet_names()[0]
+ return get_df_from_wb(self.ctx, self.wb, sheet_name)
+class SheetJS(object):
def __init__(self, ctx):
- self.ctxt = ctx
- with open("xlsx.full.min.js") as f: self.ctxt.eval(f.read())
-
- def version(self):
- return self.ctxt.eval("XLSX.version")
-
- def read_binary(self, data):
- read = self.ctxt.eval("(b64 => XLSX.read(b64, {type: 'base64', dense: true}))")
- return SheetJSWorkbook(self.ctxt, read(b64encode(data)))
+ self.ctx = ctx
+ self.wb_names = []
def read_file(self, path):
- with open(path, mode="rb") as f:
- return self.read_binary(f.read())
+ self.wb_names.append(f"wb{len(self.wb_names)}")
+ parse_file(self.ctx, path, self.wb_names[-1])
+ return SheetJSWorkbook(self, self.wb_names[-1])
- def sheet_from_json(self, json):
- jsonify = self.ctxt.eval("(json => XLSX.utils.json_to_sheet(JSON.parse(json)) )")
- return SheetJSWorksheet(self.ctxt, jsonify(json))
+ def write_df(self, df, path, sheet_name = None):
+ export_df_to_wb(self.ctx, df, path, sheet_name)
- def book_new(self):
- booknew = self.ctxt.eval("XLSX.utils.book_new()")
- return SheetJSWorkbook(self.ctxt, booknew)
+class SheetJSWrapper(object):
+ def __enter__(self):
+ [context, ctx] = initialize()
+ self.context = context
+ self.ctx = ctx
+ return SheetJS(ctx)
- def book_append_sheet(self, book, sheet, wsname):
- bas = self.ctxt.eval("((wb, ws, wsname) => XLSX.utils.book_append_sheet(wb, ws, wsname))")
- bas(book.js(), sheet.js(), wsname)
-
- def book_from_json(self, json, wsname = "Sheet1"):
- booknew = self.book_new()
- sheet = self.sheet_from_json(json)
- self.book_append_sheet(booknew, sheet, wsname)
- return booknew
-
- def book_from_df(self, df):
- # convert from dataframe to JSON string
- json = df.to_json(orient="records")
- return self.book_from_json(json)
-
-@contextmanager
-def SheetJS():
- """
- SheetJS Library context manager
-
- Returns an instance of the SheetJSWrapper class
-
- Reading data from file to Pandas DataFrame:
-
- ```py
- with SheetJS() as sheetjs:
- # read data from file
- wb = sheetjs.read_file(argv[1])
-
- # get first worksheet
- first_ws_name = wb.sheet_names()[0]
- ws = wb.get_sheet(wsname)
-
- # get data from first worksheet (list of dicts)
- rows = ws.get_rows()
-
- # generate pandas DataFrame
- df = pd.DataFrame.from_records(rows)
- ```
-
- Writing data from Pandas DataFrame to file:
-
- ```py
- with SheetJS() as sheetjs:
- sheetjs.book_from_df(df).to_file(outf)
- ```
-
- """
- with JSContext() as ctxt:
- yield SheetJSWrapper(ctxt)
+ def __exit__(self, exc_type, exc_value, traceback):
+ duk.duk_destroy_heap(self.ctx)
\ No newline at end of file