From 952244b91739a6a52622e8864cfb201ab65a8b12 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Tue, 30 Jan 2024 04:27:22 -0500 Subject: [PATCH] pandas-duktape --- docz/data/engines.xls | 29 +- docz/docs/03-demos/01-math/21-pandas.md | 331 +++++++++++++++ docz/docs/03-demos/42-engines/01-duktape.md | 181 +++++++- docz/docs/03-demos/42-engines/14-pandas.md | 440 -------------------- docz/docusaurus.config.js | 3 +- docz/static/duk/SheetJSDuk.php | 5 +- docz/static/duk/SheetJSDuk.pl | 33 ++ docz/static/duk/SheetJSDuk.py | 103 +++++ docz/static/pandas/SheetJSPandas.py | 52 +-- docz/static/pandas/sheetjs.py | 254 +++++------ 10 files changed, 814 insertions(+), 617 deletions(-) create mode 100644 docz/docs/03-demos/01-math/21-pandas.md delete mode 100644 docz/docs/03-demos/42-engines/14-pandas.md create mode 100644 docz/static/duk/SheetJSDuk.pl create mode 100644 docz/static/duk/SheetJSDuk.py diff --git a/docz/data/engines.xls b/docz/data/engines.xls index b0b0ade..e6f06cd 100644 --- a/docz/data/engines.xls +++ b/docz/data/engines.xls @@ -77,9 +77,8 @@ + - - Rhino @@ -147,9 +146,9 @@ + - - + Boa @@ -224,7 +223,7 @@ Engine - Binding + Lang x64 ARM x64 @@ -249,7 +248,17 @@ + + + + Duktape + Python + + + + + @@ -261,16 +270,6 @@ - - V8 - Python - - - - - - - JSC Swift diff --git a/docz/docs/03-demos/01-math/21-pandas.md b/docz/docs/03-demos/01-math/21-pandas.md new file mode 100644 index 0000000..ab1231e --- /dev/null +++ b/docz/docs/03-demos/01-math/21-pandas.md @@ -0,0 +1,331 @@ +--- +title: Spreadsheet Data in Pandas +sidebar_label: Python + Pandas +description: Process structured data in Python with Pandas. Seamlessly integrate spreadsheets into your workflow with SheetJS. Analyze complex Excel spreadsheets with confidence. +pagination_prev: demos/index +pagination_next: demos/frontend/index +--- + +import current from '/version.js'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; +import CodeBlock from '@theme/CodeBlock'; + +Pandas[^1] is a Python software library for data analysis. + +[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing +data from spreadsheets. + +This demo uses SheetJS to process data from a spreadsheet and translate to the +Pandas DataFrame format. We'll explore how to load SheetJS from Python scripts, +generate DataFrames from workbooks, and write DataFrames back to workbooks. + +The ["Complete Example"](#complete-example) includes a wrapper library that +simplifies importing and exporting spreadsheets. + +:::info pass + +Pandas includes limited support for reading spreadsheets (`pandas.from_excel`) +and writing XLSX spreadsheets (`pandas.DataFrame.to_excel`). + +**SheetJS supports common spreadsheet formats that Pandas cannot process.** + +SheetJS operations also offer more flexibility in processing complex worksheets. + +::: + +:::note Tested Environments + +This demo was tested in the following deployments: + +| Architecture | JS Engine | Pandas | Python | Date | +|:-------------|:----------------|:-------|:-------|:-----------| +| `darwin-x64` | Duktape `2.7.0` | 2.0.3 | 3.11.7 | 2024-01-29 | +| `linux-x64` | Duktape `2.7.0` | 1.5.3 | 3.11.3 | 2024-01-29 | + +::: + +## Integration Details + +[`sheetjs.py`](pathname:///pandas/sheetjs.py) is a wrapper script that provides +helper methods for reading and writing spreadsheets. Installation notes are +included in the ["Complete Example"](#complete-example) section. + +### JS in Python + +JS code cannot be directly evaluated in Python implementations. + +To run JS code from Python, JavaScript engines[^2] can be embedded in Python +modules or dynamically loaded using the `ctypes` foreign function library[^3]. +This demo uses `ctypes` with the [Duktape engine](/docs/demos/engines/duktape). + +### Wrapper + +The script exports a class named `SheetJSWrapper`. It is a context manager that +initializes the Duktape engine and executes SheetJS scripts on entrance. All +work should be performed in the context: + +```python title="Complete Example" +#!/usr/bin/env python3 +from sheetjs import SheetJSWrapper + +with SheetJSWrapper() as sheetjs: + + # Parse file + wb = sheetjs.read_file("pres.numbers") + print("Loaded file pres.numbers") + + # Get first worksheet name + first_ws_name = wb.get_sheet_names()[0] + print(f"Reading from sheet {first_ws_name}") + + # Generate DataFrame from first worksheet + df = wb.get_df(first_ws_name) + print(df.info()) + + # Export DataFrame to XLSB + sheetjs.write_df(df, "SheetJSPandas.xlsb", sheet_name="DataFrame") +``` + +### Reading Files + +`sheetjs.read_file` accepts a path to a spreadsheet file. It will parse the file +and return an object representing the workbook. + +The `get_sheet_names` method of the workbook returns a list of sheet names. + +The `get_df` method of the workbook generates a DataFrame from the workbook. The +specific sheet can be selected by passing the name. + +For example, the following code reads `pres.numbers` and generates a DataFrame +from the second worksheet: + +```python title="Generating a DataFrame from the second worksheet" +with SheetJSWrapper() as sheetjs: + # Parse file + wb = sheetjs.read_file(path) + + # Generate DataFrame from second worksheet + ws_name = wb.get_sheet_names()[1] + df = wb.get_df(ws_name) + + # Print metadata + print(df.info()) +``` + +Under the hood, `sheetjs.py` performs the following steps: + +```mermaid +flowchart LR + file[(workbook\nfile)] + subgraph SheetJS operations + bytes(Byte\nstring) + wb((SheetJS\nWorkbook)) + csv(CSV\nstring) + end + subgraph Pandas operations + stream(CSV\nStream) + df[(Pandas\nDataFrame)] + end + file --> |`open`/`read`\nPython ops| bytes + bytes --> |`XLSX.read`\nParse Bytes| wb + wb --> |`sheet_to_csv`\nExtract Data| csv + csv --> |`StringIO`\nPython ops| stream + stream --> |`read_csv`\nParse CSV| df +``` + +1) Pure Python operations read the spreadsheet file and generate a byte string. + +2) SheetJS libraries parse the string and generate a clean CSV. + +- The `read` method[^4] parses file bytes into a SheetJS workbook object[^5] +- After selecting a worksheet, `sheet_to_csv`[^6] generates a CSV string + +3) Python operations convert the CSV string to a stream object.[^7] + +4) The Pandas `read_csv` method[^8] ingests the stream and generate a DataFrame. + +### Writing Files + +`sheetjs.write_df` accepts a DataFrame and a path. It will attempt to export +the data to a spreadsheet file. + +For example, the following code exports a DataFrame to `SheetJSPandas.xlsb`: + +```python title="Exporting a DataFrame to XLSB" +with SheetJSWrapper() as sheetjs: + # Export DataFrame to XLSB + sheetjs.write_df(df, "SheetJSPandas.xlsb", sheet_name="DataFrame") +``` + +Under the hood, `sheetjs.py` performs the following steps: + +```mermaid +flowchart LR + subgraph Pandas operations + df[(Pandas\nDataFrame)] + json(JSON\nString) + end + subgraph SheetJS operations + aoo(array of\nobjects) + wb((SheetJS\nWorkbook)) + u8a(File\nbytes) + end + file[(workbook\nfile)] + df --> |`to_json`\nPandas ops| json + json --> |`JSON.parse`\nJS Engine| aoo + aoo --> |`json_to_sheet`\nSheetJS Ops| wb + wb --> |`XLSX.write`\nUint8Array| u8a + u8a --> |`open`/`write`\nPython ops| file +``` + +1) The Pandas DataFrame `to_json` method[^9] generates a JSON string. + +2) JS engine operations translate the JSON string to an array of objects. + +3) SheetJS libraries process the data array and generate file bytes. + +- The `json_to_sheet` method[^10] creates a SheetJS sheet object from the data. +- The `book_new` method[^11] creates a SheetJS workbook that includes the sheet. +- The `write` method[^12] generates the spreadsheet file bytes. + +4) Pure Python operations write the bytes to file. + +## Complete Example + +This example will extract data from an Apple Numbers spreadsheet and generate a +DataFrame. The DataFrame will be exported to the binary XLSB spreadsheet format. + +0) Install Pandas: + +```bash +sudo python3 -m pip install pandas +``` + +:::caution pass + +On Arch Linux-based platforms including the Steam Deck, the install may fail: + +``` +error: externally-managed-environment +``` + +In these situations, Pandas must be installed through the package manager: + +```bash +sudo pacman -Syu python-pandas +``` + +::: + +1) Build the Duktape shared library: + +```bash +curl -LO https://duktape.org/duktape-2.7.0.tar.xz +tar -xJf duktape-2.7.0.tar.xz +cd duktape-2.7.0 +make -f Makefile.sharedlibrary +cd .. +``` + +2) Copy the shared library to the current folder. When the demo was last tested, +the shared library file name differed by platform: + +| OS | name | +|:-------|:--------------------------| +| Darwin | `libduktape.207.20700.so` | +| Linux | `libduktape.so.207.20700` | + +```bash +cp duktape-*/libduktape.* . +``` + +3) Download the SheetJS Standalone script and move to the project directory: + + + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`} + + +4) Download the following test scripts and files: + +- [`pres.numbers` test file](https://sheetjs.com/pres.numbers) +- [`sheetjs.py` script](pathname:///pandas/sheetjs.py) +- [`SheetJSPandas.py` script](pathname:///pandas/SheetJSPandas.py) + +```bash +curl -LO https://sheetjs.com/pres.numbers +curl -LO https://docs.sheetjs.com/pandas/sheetjs.py +curl -LO https://docs.sheetjs.com/pandas/SheetJSPandas.py +``` + +5) Edit the `sheetjs.py` script. + +The `lib` variable declares the path to the library: + +```python title="sheetjs.py (edit highlighted line)" +# highlight-next-line +lib = "libduktape.207.20700.so" +``` + + + + +The name of the library is `libduktape.207.20700.so`: + +```python title="sheetjs.py (change highlighted line)" +# highlight-next-line +lib = "libduktape.207.20700.so" +``` + + + + +The name of the library is `libduktape.so.207.20700`: + +```python title="sheetjs.py (change highlighted line)" +# highlight-next-line +lib = "libduktape.so.207.20700" +``` + + + + +6) Run the script: + +```bash +python3 SheetJSPandas.py pres.numbers +``` + +If successful, the script will display DataFrame metadata: + +``` +RangeIndex: 5 entries, 0 to 4 +Data columns (total 2 columns): + # Column Non-Null Count Dtype +--- ------ -------------- ----- + 0 Name 5 non-null object + 1 Index 5 non-null int64 +dtypes: int64(1), object(1) +``` + +It will also export the DataFrame to `SheetJSPandas.xlsb`. The file can be +inspected with a spreadsheet editor that supports XLSB files. + +[^1]: The official documentation site is and the official distribution point is +[^2]: See ["Other Languages"](/docs/demos/engines/) for more examples. +[^3]: See [`ctypes`](https://docs.python.org/3/library/ctypes.html) in the Python documentation. +[^4]: See [`read` in "Reading Files"](/docs/api/parse-options) +[^5]: See ["Workbook Object"](/docs/csf/book) +[^6]: See [`sheet_to_csv` in "Utilities"](/docs/api/utilities/csv#delimiter-separated-output) +[^7]: See [the examples in "IO tools"](https://pandas.pydata.org/pandas-docs/stable/user_guide/io.html) in the Pandas documentation. +[^8]: See [`pandas.read_csv`](https://pandas.pydata.org/docs/reference/api/pandas.read_csv.html) in the Pandas documentation. +[^9]: See [`pandas.DataFrame.to_json`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html) in the Pandas documentation. +[^10]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input) +[^11]: See [`book_new` in "Utilities"](/docs/api/utilities/wb) +[^12]: See [`write` in "Writing Files"](/docs/api/write-options) diff --git a/docz/docs/03-demos/42-engines/01-duktape.md b/docz/docs/03-demos/42-engines/01-duktape.md index b578e80..d6c2832 100644 --- a/docz/docs/03-demos/42-engines/01-duktape.md +++ b/docz/docs/03-demos/42-engines/01-duktape.md @@ -361,11 +361,138 @@ This demo was tested in the following deployments: | Architecture | Version | PHP Version | Date | |:-------------|:--------|:------------|:-----------| | `darwin-x64` | `2.7.0` | `8.3.2` | 2024-01-26 | +| `linux-x64` | `2.7.0` | `8.2.7` | 2024-01-29 | ::: 0) Ensure `php` is installed and available on the system path. +1) Inspect the `php.ini` configuration file. The location of the file can be +found by running `php --ini`. The following output is from the last macOS test: + +```text pass +Configuration File (php.ini) Path: /usr/local/etc/php/8.3 +// highlight-next-line +Loaded Configuration File: /usr/local/etc/php/8.3/php.ini +Scan for additional .ini files in: /usr/local/etc/php/8.3/conf.d +Additional .ini files parsed: /usr/local/etc/php/8.3/conf.d/ext-opcache.ini +``` + +The following line should appear in the configuration: + +```ini title="php.ini (add to end)" +extension=ffi +``` + +If this line is prefixed with a `;`, remove the semicolon. If this line does not +appear in the file, add it to the end. + +2) Build the Duktape shared library: + +```bash +curl -LO https://duktape.org/duktape-2.7.0.tar.xz +tar -xJf duktape-2.7.0.tar.xz +cd duktape-2.7.0 +make -f Makefile.sharedlibrary +cd .. +``` + +3) Copy the shared library to the current folder. When the demo was last tested, +the shared library file name differed by platform: + +| OS | name | +|:-------|:--------------------------| +| Darwin | `libduktape.207.20700.so` | +| Linux | `libduktape.so.207.20700` | + +```bash +cp duktape-*/libduktape.* . +``` + +4) Download the SheetJS Standalone script, shim script and test file. Move all +three files to the project directory: + + + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js +curl -LO https://sheetjs.com/pres.numbers`} + + +5) Download [`SheetJSDuk.php`](pathname:///duk/SheetJSDuk.php): + +```bash +curl -LO https://docs.sheetjs.com/duk/SheetJSDuk.php +``` + +6) Edit the `SheetJSDuk.php` script. + +The `$sofile` variable declares the path to the library: + +```php title="SheetJSDuk.php (edit highlighted line)" + + + +The name of the library is `libduktape.207.20700.so`: + +```php title="SheetJSDuk.php (change highlighted line)" +// highlight-next-line +$sofile = './libduktape.207.20700.so'; +``` + + + + +The name of the library is `libduktape.so.207.20700`: + +```php title="SheetJSDuk.php (change highlighted line)" +// highlight-next-line +$sofile = './libduktape.so.207.20700'; +``` + + + + +7) Run the script: + +```bash +php SheetJSDuk.php pres.numbers +``` + +If the program succeeded, the CSV contents will be printed to console and the +file `sheetjsw.xlsb` will be created. That file can be opened with Excel. + +### Python + +There is no official Python binding to the Duktape library. Instead, this demo +uses the raw `ctypes` interface[^2] to the Duktape shared library. + +#### Python Demo + +:::note Tested Deployments + +This demo was tested in the following deployments: + +| Architecture | Version | Python | Date | +|:-------------|:--------|:---------|:-----------| +| `darwin-x64` | `2.7.0` | `3.11.7` | 2024-01-29 | +| `linux-x64` | `2.7.0` | `3.11.3` | 2024-01-29 | + +::: + +0) Ensure `python` is installed and available on the system path. + 1) Build the Duktape shared library: ```bash @@ -377,10 +504,15 @@ cd .. ``` 2) Copy the shared library to the current folder. When the demo was last tested, -the file name was `libduktape.207.20700.so`: +the shared library file name differed by platform: + +| OS | name | +|:-------|:--------------------------| +| Darwin | `libduktape.207.20700.so` | +| Linux | `libduktape.so.207.20700` | ```bash -cp duktape-*/libduktape.*.so . +cp duktape-*/libduktape.* . ``` 3) Download the SheetJS Standalone script, shim script and test file. Move all @@ -398,16 +530,50 @@ curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js curl -LO https://sheetjs.com/pres.numbers`} -4) Download [`SheetJSDuk.php`](pathname:///duk/SheetJSDuk.php): +4) Download [`SheetJSDuk.py`](pathname:///duk/SheetJSDuk.py): ```bash -curl -LO https://docs.sheetjs.com/duk/SheetJSDuk.php +curl -LO https://docs.sheetjs.com/duk/SheetJSDuk.py ``` -5) Run the script: +5) Edit the `SheetJSDuk.py` script. + +The `lib` variable declares the path to the library: + +```python title="SheetJSDuk.py (edit highlighted line)" +#!/usr/bin/env python3 + +# highlight-next-line +lib = "libduktape.207.20700.so" +``` + + + + +The name of the library is `libduktape.207.20700.so`: + +```python title="SheetJSDuk.py (change highlighted line)" +# highlight-next-line +lib = "libduktape.207.20700.so" +``` + + + + +The name of the library is `libduktape.so.207.20700`: + +```python title="SheetJSDuk.py (change highlighted line)" +# highlight-next-line +lib = "libduktape.so.207.20700" +``` + + + + +6) Run the script: ```bash -php SheetJSDuk.php pres.numbers +python3 SheetJSDuk.py pres.numbers ``` If the program succeeded, the CSV contents will be printed to console and the @@ -520,4 +686,5 @@ sudo cpan install File::Slurp ::: -[^1]: See [Foreign Function Interface](https://www.php.net/manual/en/book.ffi.php) in the PHP documentation. \ No newline at end of file +[^1]: See [Foreign Function Interface](https://www.php.net/manual/en/book.ffi.php) in the PHP documentation. +[^2]: See [`ctypes`](https://docs.python.org/3/library/ctypes.html) in the Python documentation. \ No newline at end of file diff --git a/docz/docs/03-demos/42-engines/14-pandas.md b/docz/docs/03-demos/42-engines/14-pandas.md deleted file mode 100644 index d3e29bd..0000000 --- a/docz/docs/03-demos/42-engines/14-pandas.md +++ /dev/null @@ -1,440 +0,0 @@ ---- -title: Spreadsheet Data in Pandas -sidebar_label: Python (Pandas) -description: Process structured data in Python with Pandas. Seamlessly integrate spreadsheets into your workflow with SheetJS. Analyze complex Excel spreadsheets with confidence. -pagination_prev: demos/cloud/index -pagination_next: demos/bigdata/index ---- - -import current from '/version.js'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; -import CodeBlock from '@theme/CodeBlock'; - -Pandas[^1] is a Python software library for data analysis. - -[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing -data from spreadsheets. - -This demo uses SheetJS to process data from a spreadsheet and translate to the -Pandas DataFrame format. We'll explore how to load SheetJS from Python scripts, -generate DataFrames from workbooks, and write DataFrames back to workbooks. - -:::note - -This demo was tested in the following deployments: - -| Architecture | V8 version | Pandas | Python | Date | -|:-------------|:--------------|:-------|:-------|:-----------| -| `darwin-x64` | `11.5.150.16` | 2.0.3 | 3.11.4 | 2023-07-29 | - -::: - -:::info pass - -Pandas includes limited support for reading spreadsheets (`pandas.from_excel`) -and writing XLSX spreadsheets (`pandas.DataFrame.to_excel`). - -The SheetJS approach supports many common spreadsheet formats that are not -supported by the current set of Pandas codecs and offers greater flexibility in -processing complex worksheets. - -::: - -## Integration Details - -JS code cannot literally be run in the Python interpreter. To run JS code from -Python, JavaScript engines[^2] can be embedded in CPython modules. - -### Loading SheetJS - -This demo uses the `STPyV8` module[^3] to access the V8 JavaScript engine. - -_Initialize V8_ - -The engine library provides a convenient context manager `JSContext` for context -resource management. Within the context, the `eval` method can evaluate code: - -```py -from STPyV8 import JSContext - -# Initialize JS context -with JSContext() as ctxt: - # Run code - res = ctxt.eval("'Sheet' + 'JS'") - - # print result - print(res) -``` - -`STPyV8` handles data interchange for common types. Arrays and JS objects can be -translated to Python `list` and `dict` respectively. The following `convert` -function is used in the test suite[^4] - -```py -# from `tests/test_Wrapper.py` in the STPyV8 library -# License: Apache 2.0 -def convert(obj): - if isinstance(obj, JSArray): - return [convert(v) for v in obj] - if isinstance(obj, JSObject): - return dict([[str(k), convert(obj.__getattr__(str(k)))] for k in obj.__dir__()]) - return obj -``` - -_Loading the Library_ - -The [SheetJS Standalone scripts](/docs/getting-started/installation/standalone) -can be parsed and evaluated from the JS engine. Once evaluated, the `XLSX` -variable is available as a global. - -Assuming the standalone library is in the same directory as the source file, -the script can be evaluated with `eval`: - -```py - # Within a JSContext, open `xlsx.full.min.js` and evaluate - with open("xlsx.full.min.js") as f: - ctxt.eval(f.read()) -``` - -### Reading Files - -The following diagram depicts the spreadsheet salsa: - -```mermaid -flowchart LR - file[(workbook\nfile)] - subgraph SheetJS operations - base64(Base64\nstring) - wb((SheetJS\nWorkbook)) - aoo(array of\nobjects) - end - subgraph Pandas operations - lod(list of\nrecords) - df[(Pandas\nDataFrame)] - end - file --> |`open`/`read`\nPython ops| base64 - base64 --> |`XLSX.read`\nParse Bytes| wb - wb --> |`sheet_to_json`\nExtract Data| aoo - aoo --> |`convert`\nPython ops|lod - lod --> |`from_records`\nPandas ops| df -``` - -At a high level: - -1) Pure Python operations read the file and generate a Base64 string - -2) SheetJS libraries parse the string and generates JS records - -3) JS engine operations translate the rows to Python `list` of `dicts` - -4) Pandas operations translate the Python data to a DataFrame - -#### Read files - -The safest format for data interchange is Base64-encoded strings: - -```py -from base64 import b64encode - -with open(path, mode="rb") as f: - file_bytes = f.read() - b64 = b64encode(file_bytes) -``` - -#### Parse bytes - -From JS code, `XLSX.read`[^5] parses the Base64 string - -```py -wb = ctxt.eval("(b64 => XLSX.read(b64, {type: 'base64', dense: true}))")(b64) -``` - -The `wb` object follows the "Common Spreadsheet Format"[^6], an in-memory format -for representing workbooks, worksheets, cells, and spreadsheet features. - -#### Get First Worksheet - -As explained in the "Workbook Object"[^7] section: -- the `SheetNames` property is a ordered list of the sheet names in the workbook -- the `Sheets` property of the workbook object is an object whose keys are sheet - names and whose values are sheet objects. - -For use in Python, the `SheetNames` array must be converted to a `list`: - -```py -sheet_names = convert(wb.SheetNames) -first_sheet_name = sheet_names[0] -``` - -Since utility functions will process the worksheet object from JavaScript, it is -preferable not to convert the object: - -```py -first_sheet = wb.Sheets[first_sheet_name] # do not convert -``` - -#### Generate List of Records - -In JavaScript, the equivalent of the "`list` of `dict`s" or "`list` of records" -is "array of objects". They can be created with `XLSX.utils.sheet_to_json`[^8]: - -```py -rows = convert(ctxt.eval("(ws => XLSX.utils.sheet_to_json(ws))")(first_sheet)) -``` - -#### Generate Pandas DataFrame - -`rows` is a `list` of `dict` objects. `from_records`[^9] understands this data -shape and generates a proper DataFrame: - -```py -df = pd.DataFrame.from_records(rows) -``` - -### Writing Files - -The writing process looks similar to the reading process in reverse: - -```mermaid -flowchart LR - subgraph Pandas operations - df[(Pandas\nDataFrame)] - json(JSON\nString) - end - subgraph SheetJS operations - aoo(array of\nobjects) - wb((SheetJS\nWorkbook)) - base64(Base64\nstring) - end - file[(workbook\nfile)] - df --> |`to_json`\nPandas ops| json - json --> |`JSON.parse`\nJS Engine| aoo - aoo --> |`json_to_sheet`\nSheetJS Ops| wb - wb --> |`XLSX.write`\nBase64| base64 - base64 --> |`open`/`write`\nPython ops| file -``` - -At a high level: - -1) Pandas operations translate the Python data to JSON string - -2) JS engine operations translate the JSON string to an array of objects - -3) SheetJS libraries parse the array and generate a Base64-encoded workbook - -4) Pure Python operations decode the Base64 string and write the bytes to file. - -#### Generate JSON - -`DataFrame#to_json`[^10] with the option `orient="records"` generates a JSON -string that encodes an array of objects: - -```py -json = df.to_json(orient="records") -``` - -#### Generate Worksheet - -In JavaScript, `JSON.parse` will interpret the string as an array of objects. -`XLSX.utils.json_to_sheet`[^11] generates a SheetJS worksheet object: - -```py -sheet = ctxt.eval("(json => XLSX.utils.json_to_sheet(JSON.parse(json)) )")(json) -``` - -#### Export Enhancements - -At this point, there are many options for improving the appearance of the sheet. -For example, the "Export Tutorial"[^12] shows how to adjust column widths. - -:::tip pass - -[SheetJS Pro](https://sheetjs.com/pro) offers additional styling options such as -cell styling and frozen rows. - -"Pro Edit" offers a special approach for inserting data into an existing file. - -::: - -#### Generate Workbook - -`XLSX.utils.book_new`[^13] creates a new workbook and `XLSX.utils.book_append_sheet`[^14] -appends a worksheet to the workbook. The new worksheet will be called "Export": - -:::note pass - -The code in the string literal is reproduced below: - -```js -(ws, name) => { - const wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(wb, ws, name); - return wb; -} -``` - -::: - -```py -book = ctxt.eval("""((ws, name) => { - const wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(wb, ws, name); - return wb; -})""")(sheet, "Export") -``` - -#### Generate File - -`XLSX.write`[^15] with the option `type: "base64"` attempts to create a file and -generate a Base64 string: - -```py -b64 = ctxt.eval("(wb => XLSX.write(wb, {type:'base64', bookType:'xls'}))")(book) -``` - -With the Base64 string, standard Python operations can create a file: - -```py -from base64 import b64decode - -raw = b64decode(b64) -with open("export.xls", mode="wb") as f: - f.write(raw) -``` - -## Complete Demo - -This example will extract data from an Apple Numbers spreadsheet and generate a -DataFrame. The DataFrame will be exported to a legacy XLS spreadsheet. - -### Engine Setup - -0) Follow the official installation instructions[^16]. - -
Instructions for macOS 12 (click to show) - -- Install `boost-python3` package using `brew`: - -```bash -brew install boost-python3 -``` - -- Identify python version: - -```bash -python3 --version -``` - -:::note pass - -When the demo was last tested, the version was `3.11.4` - -::: - -- [Download latest release](https://github.com/cloudflare/stpyv8/releases) - -```bash -curl -LO https://github.com/cloudflare/stpyv8/releases/download/v11.5.150.16/stpyv8-macos-12-python-3.11.zip -``` - -- Extract ZIP file and enter folder - -```bash -unzip stpyv8-macos-12-python-3.11.zip -cd stpyv8-macos-12-3.11 -``` - -- Move `icudtl.dat` to `/Library/Application Support/STPyV8/`: - -```bash -sudo mkdir -p /Library/Application\ Support/STPyV8 -sudo mv icudtl.dat /Library/Application\ Support/STPyV8/ -``` - -- Install wheel: - -```bash -sudo python3 -m pip install --upgrade *.whl -cd .. -``` - -
- -### Demo - -1) Download the SheetJS Standalone script and move to the project directory: - - - -{`\ -curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`} - - -2) Install Pandas. On macOS: - -```python -sudo python3 -m pip install pandas -``` - -3) Download the following test scripts and files: - -- [`pres.numbers` test file](https://sheetjs.com/pres.numbers) -- [`sheetjs.py` wrapper](pathname:///pandas/sheetjs.py) -- [`SheetJSPandas.py` script](pathname:///pandas/SheetJSPandas.py) - -```bash -curl -LO https://sheetjs.com/pres.numbers -curl -LO https://docs.sheetjs.com/pandas/sheetjs.py -curl -LO https://docs.sheetjs.com/pandas/SheetJSPandas.py -``` - -4) Run the script: - -```bash -python3 SheetJSPandas.py pres.numbers -``` - -If successful, it will display data rows in the file: - -``` -Reading from sheet Sheet1 -{'Name': 'Bill Clinton', 'Index': 42} -{'Name': 'GeorgeW Bush', 'Index': 43} -{'Name': 'Barack Obama', 'Index': 44} -{'Name': 'Donald Trump', 'Index': 45} -{'Name': 'Joseph Biden', 'Index': 46} -``` - -If Pandas is installed, the script will display DataFrame metadata: - -``` -RangeIndex: 5 entries, 0 to 4 -Data columns (total 2 columns): - # Column Non-Null Count Dtype ---- ------ -------------- ----- - 0 Name 5 non-null object - 1 Index 5 non-null int64 -dtypes: int64(1), object(1) -``` - -It will also export to `pres.xls`. The file can be read in a spreadsheet editor. - -[^1]: The official documentation site is and the official distribution point is -[^2]: See ["Other Languages"](/docs/demos/engines/) for more examples. -[^3]: [`STPyV8`](https://github.com/cloudflare/stpyv8) is a fork of the original [`PyV8` project](https://pypi.org/project/PyV8/). It is available under the permissive Apache 2.0 License. Special thanks to Flier Lu and CloudFlare! -[^4]: See [`tests/test_Wrapper.py`](https://github.com/cloudflare/stpyv8/blob/410b31abe7a103b408d362cb872ce81604281c48/tests/test_Wrapper.py#L15) in the `STPyV8` code repository. -[^5]: See [`read` in "Reading Files"](/docs/api/parse-options) -[^6]: See ["SheetJS Data Model"](/docs/csf/) -[^7]: See ["Workbook Object"](/docs/csf/book) -[^8]: See [`sheet_to_json` in "Utilities"](/docs/api/utilities/array#array-output) -[^9]: See [`pandas.DataFrame.from_records`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.from_records.html) in the Pandas documentation. -[^10]: See [`pandas.DataFrame.to_json`](https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.to_json.html) in the Pandas documentation. -[^11]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input) -[^12]: See ["Clean up Workbook"](/docs/getting-started/examples/export#clean-up-workbook) in "Export Tutorial". -[^13]: See [`book_new` in "Utilities"](/docs/api/utilities/wb) -[^14]: See [`book_append_sheet` in "Utilities"](/docs/api/utilities/wb) -[^15]: See [`write` in "Writing Files"](/docs/api/write-options) -[^16]: See ["Installing"](https://github.com/cloudflare/stpyv8#installing) in the `STPyV8` project documentation \ No newline at end of file diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js index 072f32c..b1049dd 100644 --- a/docz/docusaurus.config.js +++ b/docz/docusaurus.config.js @@ -146,7 +146,7 @@ const config = { prism: { theme: lightCodeTheme, darkTheme: darkCodeTheme, - additionalLanguages: [ "visual-basic", "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata" ], + additionalLanguages: [ "visual-basic", "swift", "java", "php", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata" ], }, liveCodeBlock: { playgroundPosition: 'top' @@ -244,6 +244,7 @@ const config = { /* math */ { from: '/docs/demos/ml', to: '/docs/demos/math/' }, { from: '/docs/demos/bigdata/ml', to: '/docs/demos/math/' }, + { from: '/docs/demos/engines/pandas', to: '/docs/demos/math/pandas/' }, /* installation */ { from: '/docs/installation/standalone', to: '/docs/getting-started/installation/standalone/' }, { from: '/docs/installation/frameworks', to: '/docs/getting-started/installation/frameworks/' }, diff --git a/docz/static/duk/SheetJSDuk.php b/docz/static/duk/SheetJSDuk.php index 40bb0b1..754bd4d 100644 --- a/docz/static/duk/SheetJSDuk.php +++ b/docz/static/duk/SheetJSDuk.php @@ -1,5 +1,7 @@ duk_create_heap(null, null, null, null, null); } function duk_eval_string_noresult($context, $cmd) { global $ffi; return $ffi->duk_eval_raw($context, $cmd, 0, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) ); } @@ -63,7 +65,6 @@ function save_file($context, $path, $var) { fclose($fh); } - function DOIT($cmd) { global $context; return duk_eval_string_noresult($context, $cmd); } /* initialize */ diff --git a/docz/static/duk/SheetJSDuk.pl b/docz/static/duk/SheetJSDuk.pl new file mode 100644 index 0000000..ef996bc --- /dev/null +++ b/docz/static/duk/SheetJSDuk.pl @@ -0,0 +1,33 @@ +# usage: perl SheetJSDuk.pl path/to/file +use JavaScript::Duktape::XS; +use File::Slurp; +use MIME::Base64 qw( encode_base64 decode_base64 ); + +# Initialize +my $js = JavaScript::Duktape::XS->new({ max_memory_bytes => 256 * 1024 * 1024 }); +$js->eval("var global = (function(){ return this; }).call(null);"); + +# Load the ExtendScript build +my $src = read_file('xlsx.extendscript.js', { binmode => ':raw' }); +$src =~ s/^\xEF\xBB\xBF//; +my $XLSX = $js->eval($src); + +# Print version number +$js->set('log' => sub { print $_[0], "\n"; }); +$js->eval("log('SheetJS library version ' + XLSX.version);"); + +# Parse File +my $raw_data = encode_base64(read_file($ARGV[0], { binmode => ':raw' }), ""); +$js->set("b64", $raw_data); +$js->eval(qq{ + global.wb = XLSX.read(b64, {type: "base64", WTF:1}); + global.ws = wb.Sheets[wb.SheetNames[0]]; + void 0; +}); + +# Print first worksheet CSV +$js->eval('log(XLSX.utils.sheet_to_csv(global.ws))'); + +# Write XLSB file +my $xlsb = $js->eval("XLSX.write(global.wb, {type:'base64', bookType:'xlsb'})"); +write_file("SheetJSDuk.xlsb", decode_base64($xlsb)); \ No newline at end of file diff --git a/docz/static/duk/SheetJSDuk.py b/docz/static/duk/SheetJSDuk.py new file mode 100644 index 0000000..e7ac73d --- /dev/null +++ b/docz/static/duk/SheetJSDuk.py @@ -0,0 +1,103 @@ +#!/usr/bin/env python3 + +lib = "libduktape.207.20700.so" + +from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t + +duk = CDLL(lib) + +def str_to_c(s): + b = s.encode("utf8") + return [c_char_p(b), len(b)] + +def duk_create_heap_default(): + duk.duk_create_heap.restype = c_void_p + return duk.duk_create_heap(None, None, None, None, None) + +def duk_eval_string_noresult(ctx, cmd): + [s, l] = str_to_c(cmd) + return duk.duk_eval_raw(ctx, s, l, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) ) + +def duk_eval_string(ctx, cmd): + [s, l] = str_to_c(cmd) + return duk.duk_eval_raw(ctx, s, l, 0 | (1<<3) | (1<<9) | (1<<10) | (1<<11) ) + +def duk_peval(ctx): + return duk.duk_eval_raw(ctx, None, 0, 1 | (1<<3) | (1<<7) | (1<<11) ) + +def duk_get_string(ctx, idx): + duk.duk_get_string.restype = c_char_p + retval = duk.duk_get_string(ctx, idx) + return retval.decode("utf8") + +def eval_file(ctx, path): + with open(path, "r") as f: + code = f.read() + [s, l] = str_to_c(code) + + duk.duk_push_lstring(ctx, s, l) + retval = duk_peval(ctx) + duk.duk_pop(ctx) + return retval + +def load_file(ctx, path, var): + with open(path, "rb") as f: + data = f.read() + ptr = c_char_p(data) + duk.duk_push_buffer_raw(ctx, 0, 1 | 2) + duk.duk_config_buffer(ctx, -1, ptr, len(data)) + duk.duk_put_global_string(ctx, str_to_c(var)[0]) + return data + +def save_file(ctx, path, var): + duk.duk_get_global_string(ctx, str_to_c(var)[0]) + sz = c_size_t() + duk.duk_get_buffer_data.restype = c_void_p + buf = duk.duk_get_buffer_data(ctx, -1, byref(sz)) + s = string_at(buf, sz.value) + with open(path, "wb") as f: + f.write(s) + +def process(path): + # initialize + context = duk_create_heap_default() + ctx = c_void_p(context) + + def DOIT(cmd): + return duk_eval_string_noresult(ctx, cmd) + + # duktape does not expose a standard "global" by default + DOIT("var global = (function(){ return this; }).call(null);") + + # load library + eval_file(ctx, "shim.min.js") + eval_file(ctx, "xlsx.full.min.js") + + # get version string + duk_eval_string(ctx, "XLSX.version") + print("SheetJS Library Version %s" % (duk_get_string(ctx, -1))) + duk.duk_pop(ctx) + + # read file + # NOTE: data is captured here to avoid GC + data = load_file(ctx, path, "buf") + print("Loaded file %s" % (path)) + + # parse workbook + DOIT("wb = XLSX.read(buf.slice(0, buf.length));") + DOIT("ws = wb.Sheets[wb.SheetNames[0]]") + + # print CSV + duk_eval_string(ctx, "XLSX.utils.sheet_to_csv(ws)") + print(duk_get_string(ctx, -1)) + duk.duk_pop(ctx) + + DOIT("newbuf = (XLSX.write(wb, {type:'buffer', bookType:'xlsb'}));") + save_file(ctx, "sheetjsw.xlsb", "newbuf") + + duk.duk_destroy_heap(ctx) + return 0 + +if("__main__" == __name__): + from sys import argv + process(argv[1]) \ No newline at end of file diff --git a/docz/static/pandas/SheetJSPandas.py b/docz/static/pandas/SheetJSPandas.py index 6652c8b..c624671 100644 --- a/docz/static/pandas/SheetJSPandas.py +++ b/docz/static/pandas/SheetJSPandas.py @@ -1,41 +1,25 @@ -from sheetjs import SheetJS -from sys import argv, exit +#!/usr/bin/env python3 -test_pandas = True -try: - import pandas as pd -except: - test_pandas = False +from sheetjs import SheetJSWrapper -# Parse file and generate row objects -with SheetJS() as sheetjs: - # Print library version number - print(f"SheetJS Version {sheetjs.version()}") +def process(path): + with SheetJSWrapper() as sheetjs: - # Read and parse data from file - wb = sheetjs.read_file(argv[1]) + # Parse file + wb = sheetjs.read_file(path) + print(f"Loaded file {path}") - # Get first worksheet name - wsname = wb.sheet_names()[0] - print(f"Reading from sheet {wsname}") + # Get first worksheet name + names = wb.get_sheet_names() + print(f"Reading from sheet {names[0]}") - # Get data from first sheet - ws = wb.get_sheet(wsname) - rows = ws.get_rows() - for row in rows: print(row) + # Generate DataFrame from first worksheet + df = wb.get_df() + print(df.info()) -if not test_pandas: - print("Pandas could not be loaded, skipping tests") - exit() + # Export DataFrame to XLSB + sheetjs.write_df(df, "SheetJSPandas.xlsb", sheet_name="DataFrame") -print("\n## Pandas DataFrame\n") - -# generate dataframe -df = pd.DataFrame.from_records(rows) -print(df.info()) - -outf="pres.xls" -print(f"\n## Writing to {outf}\n") -# write JSON string to XLS worksheet -with SheetJS() as sheetjs: - sheetjs.book_from_df(df).to_file(outf) +if("__main__" == __name__): + from sys import argv + process(argv[1]) diff --git a/docz/static/pandas/sheetjs.py b/docz/static/pandas/sheetjs.py index 8dc265f..a4c3bc1 100644 --- a/docz/static/pandas/sheetjs.py +++ b/docz/static/pandas/sheetjs.py @@ -1,136 +1,154 @@ -from base64 import b64encode, b64decode -from contextlib import contextmanager -from STPyV8 import JSContext, JSArray, JSObject -from functools import wraps -from os.path import splitext +lib = "libduktape.207.20700.so" -def to_py(method): - # `convert` from STPyV8 tests/test_Wrapper.py - def convert(obj): - if isinstance(obj, JSArray): - return [convert(v) for v in obj] - if isinstance(obj, JSObject): - return dict([[str(k), convert(obj.__getattr__(str(k)))] for k in obj.__dir__()]) - return obj +from ctypes import CDLL, byref, string_at, c_int, c_void_p, c_char_p, c_size_t +from json import loads +from io import StringIO +from pandas import read_csv - @wraps(method) - def func(self, *args, **kwargs): - res = method(self, *args, **kwargs) - return convert(res) - return func +duk = CDLL(lib) -class SheetJSWorksheet: - ws = None - ctxt = None +def str_to_c(s): + b = s.encode("utf8") + return [c_char_p(b), len(b)] - def __init__(self, ctxt, ws): - self.ctxt = ctxt - self.ws = ws +def duk_create_heap_default(): + duk.duk_create_heap.restype = c_void_p + return duk.duk_create_heap(None, None, None, None, None) - def js(self): return self.ws +def duk_eval_string_noresult(ctx, cmd): + [s, l] = str_to_c(cmd) + return duk.duk_eval_raw(ctx, s, l, 1 | (1<<3) | (1<<9) | (1<<10) | (1<<8) | (1<<11) ) - @to_py - def get_rows(self): - return self.ctxt.eval("(ws => XLSX.utils.sheet_to_json(ws))")(self.ws) +def duk_eval_string(ctx, cmd): + [s, l] = str_to_c(cmd) + return duk.duk_eval_raw(ctx, s, l, 0 | (1<<3) | (1<<9) | (1<<10) | (1<<11) ) -class SheetJSWorkbook: - wb = None - ctxt = None +def duk_peval(ctx): + return duk.duk_eval_raw(ctx, None, 0, 1 | (1<<3) | (1<<7) | (1<<11) ) - def __init__(self, ctxt, wb): - self.ctxt = ctxt +def duk_get_string(ctx, idx): + duk.duk_get_string.restype = c_char_p + retval = duk.duk_get_string(ctx, idx) + return retval.decode("utf8") + +def eval_file(ctx, path): + with open(path, "r") as f: + code = f.read() + [s, l] = str_to_c(code) + + duk.duk_push_lstring(ctx, s, l) + retval = duk_peval(ctx) + duk.duk_pop(ctx) + return retval + +def load_file(ctx, path, var): + with open(path, "rb") as f: + data = f.read() + ptr = c_char_p(data) + duk.duk_push_buffer_raw(ctx, 0, 1 | 2) + duk.duk_config_buffer(ctx, -1, ptr, len(data)) + duk.duk_put_global_string(ctx, str_to_c(var)[0]) + return data + +def save_file(ctx, path, var): + duk.duk_get_global_string(ctx, str_to_c(var)[0]) + sz = c_size_t() + duk.duk_get_buffer_data.restype = c_void_p + buf = duk.duk_get_buffer_data(ctx, -1, byref(sz)) + s = string_at(buf, sz.value) + with open(path, "wb") as f: + f.write(s) + +def initialize(): + # initialize + context = duk_create_heap_default() + ctx = c_void_p(context) + + # duktape does not expose a standard "global" by default + duk_eval_string_noresult(ctx, "var global = (function(){ return this; }).call(null);") + + # load library + eval_file(ctx, "shim.min.js") + eval_file(ctx, "xlsx.full.min.js") + + # get version string + duk_eval_string(ctx, "XLSX.version") + print(f"SheetJS Library Version {duk_get_string(ctx, -1)}") + duk.duk_pop(ctx) + return [context, ctx] + +def parse_file(ctx, path, name): + # read file + # NOTE: data is captured here to avoid GC + data = load_file(ctx, path, "buf") + + # parse workbook + duk_eval_string_noresult(ctx, f"{name} = XLSX.read(buf.slice(0, buf.length));") + +def get_sheet_names(ctx, wb): + duk_eval_string(ctx, f"JSON.stringify({wb}.SheetNames)") + wsnames = duk_get_string(ctx, -1) + names = loads(wsnames) + duk.duk_pop(ctx) + return names + +def get_csv_from_wb(ctx, wb, sheet_name=None): + if not sheet_name: sheet_name = f"{wb}.SheetNames[0]" + else: sheet_name = f"'{sheet_name}'" + duk_eval_string(ctx, f"XLSX.utils.sheet_to_csv({wb}.Sheets[{sheet_name}])") + csv = duk_get_string(ctx, -1) + duk.duk_pop(ctx) + return csv + +def export_df_to_wb(ctx, df, path, sheet_name="Sheet1", book_type=None): + json = df.to_json(orient="records") + [s, l] = str_to_c(json) + duk.duk_push_lstring(ctx, s, l) + duk.duk_put_global_string(ctx, str_to_c("json")[0]) + if not book_type: book_type = path.split(".")[-1] + duk_eval_string_noresult(ctx, f""" + aoo = JSON.parse(json); + newws = XLSX.utils.json_to_sheet(aoo); + newwb = XLSX.utils.book_new(newws, '{sheet_name}'); + newbuf = XLSX.write(newwb, {{type:'buffer', bookType:'{book_type}'}}); + """) + save_file(ctx, path, "newbuf") + +def get_df_from_wb(ctx, wb, sheet_name=None): + csv = get_csv_from_wb(ctx, wb, sheet_name) + return read_csv(StringIO(csv)) + +class SheetJSWorkbook(object): + def __init__(self, sheetjs, wb): + self.ctx = sheetjs.ctx self.wb = wb - def js(self): return self.wb + def get_sheet_names(self): + return get_sheet_names(self.ctx, self.wb) - @to_py - def sheet_names(self): - return self.wb.SheetNames - - def get_sheet(self, name): - return SheetJSWorksheet(self.ctxt, self.wb.Sheets[name]) - - def to_file(self, path, book_type=""): - b64ify = self.ctxt.eval("((wb, bT) => XLSX.write(wb, {type:'base64', bookType:bT}))") - if not book_type: book_type = splitext(path)[1][1:] - b64 = b64ify(self.wb, book_type) - raw = b64decode(b64) - with open(path, mode="wb") as f: - f.write(raw) - -class SheetJSWrapper: - ctxt = None + def get_df(self, sheet_name=None): + if sheet_name is None: sheet_name = self.get_sheet_names()[0] + return get_df_from_wb(self.ctx, self.wb, sheet_name) +class SheetJS(object): def __init__(self, ctx): - self.ctxt = ctx - with open("xlsx.full.min.js") as f: self.ctxt.eval(f.read()) - - def version(self): - return self.ctxt.eval("XLSX.version") - - def read_binary(self, data): - read = self.ctxt.eval("(b64 => XLSX.read(b64, {type: 'base64', dense: true}))") - return SheetJSWorkbook(self.ctxt, read(b64encode(data))) + self.ctx = ctx + self.wb_names = [] def read_file(self, path): - with open(path, mode="rb") as f: - return self.read_binary(f.read()) + self.wb_names.append(f"wb{len(self.wb_names)}") + parse_file(self.ctx, path, self.wb_names[-1]) + return SheetJSWorkbook(self, self.wb_names[-1]) - def sheet_from_json(self, json): - jsonify = self.ctxt.eval("(json => XLSX.utils.json_to_sheet(JSON.parse(json)) )") - return SheetJSWorksheet(self.ctxt, jsonify(json)) + def write_df(self, df, path, sheet_name = None): + export_df_to_wb(self.ctx, df, path, sheet_name) - def book_new(self): - booknew = self.ctxt.eval("XLSX.utils.book_new()") - return SheetJSWorkbook(self.ctxt, booknew) +class SheetJSWrapper(object): + def __enter__(self): + [context, ctx] = initialize() + self.context = context + self.ctx = ctx + return SheetJS(ctx) - def book_append_sheet(self, book, sheet, wsname): - bas = self.ctxt.eval("((wb, ws, wsname) => XLSX.utils.book_append_sheet(wb, ws, wsname))") - bas(book.js(), sheet.js(), wsname) - - def book_from_json(self, json, wsname = "Sheet1"): - booknew = self.book_new() - sheet = self.sheet_from_json(json) - self.book_append_sheet(booknew, sheet, wsname) - return booknew - - def book_from_df(self, df): - # convert from dataframe to JSON string - json = df.to_json(orient="records") - return self.book_from_json(json) - -@contextmanager -def SheetJS(): - """ - SheetJS Library context manager - - Returns an instance of the SheetJSWrapper class - - Reading data from file to Pandas DataFrame: - - ```py - with SheetJS() as sheetjs: - # read data from file - wb = sheetjs.read_file(argv[1]) - - # get first worksheet - first_ws_name = wb.sheet_names()[0] - ws = wb.get_sheet(wsname) - - # get data from first worksheet (list of dicts) - rows = ws.get_rows() - - # generate pandas DataFrame - df = pd.DataFrame.from_records(rows) - ``` - - Writing data from Pandas DataFrame to file: - - ```py - with SheetJS() as sheetjs: - sheetjs.book_from_df(df).to_file(outf) - ``` - - """ - with JSContext() as ctxt: - yield SheetJSWrapper(ctxt) + def __exit__(self, exc_type, exc_value, traceback): + duk.duk_destroy_heap(self.ctx) \ No newline at end of file