From 234c63dcaa517a894d8efdc38a6dcebcab5432e4 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Thu, 20 Jun 2024 03:30:34 -0400 Subject: [PATCH] V8 Java Binding demo --- docz/data/engines.xls | 12 +++- .../02-examples/06-loader.md | 39 ++++++++++- .../02-getting-started/02-examples/index.md | 4 +- .../docs/03-demos/03-net/01-network/index.mdx | 4 +- docz/docs/03-demos/03-net/02-upload/index.mdx | 4 +- docz/docs/03-demos/03-net/09-dom.md | 11 ++- docz/docs/03-demos/42-engines/02-v8.md | 67 +++++++++++++++++++ docz/static/v8/SheetJSJavet.java | 30 +++++++++ 8 files changed, 158 insertions(+), 13 deletions(-) create mode 100644 docz/static/v8/SheetJSJavet.java diff --git a/docz/data/engines.xls b/docz/data/engines.xls index 6c88411..c86c89a 100644 --- a/docz/data/engines.xls +++ b/docz/data/engines.xls @@ -244,7 +244,7 @@ - +
@@ -317,6 +317,16 @@ + + V8 + Java + + + + + + + JSC Swift diff --git a/docz/docs/02-getting-started/02-examples/06-loader.md b/docz/docs/02-getting-started/02-examples/06-loader.md index 46ba18a..056d89a 100644 --- a/docz/docs/02-getting-started/02-examples/06-loader.md +++ b/docz/docs/02-getting-started/02-examples/06-loader.md @@ -23,8 +23,11 @@ In ["SheetJS Conversion"](#sheetjs-conversion), we will use SheetJS libraries to generate CSV files for the LangChain CSV loader. These conversions can be run in a preprocessing step without disrupting existing CSV workflows. -In ["SheetJS Loader"](#sheetjs-loader), we will use SheetJS libraries in a custom -loader to directly generate documents and metadata. +In ["SheetJS Loader"](#sheetjs-loader), we will use SheetJS libraries in a +custom loader to directly generate documents and metadata. + +["SheetJS Loader Demo"](#sheetjs-loader-demo) is a complete demo that uses the +SheetJS Loader to answer questions based on data from a XLS workbook. :::note Tested Deployments @@ -34,6 +37,7 @@ This demo was tested in the following configurations: |:-----------|:--------------------------------------------------------------| | 2024-06-19 | Apple M2 Max 12-Core CPU + 30-Core GPU (32 GB unified memory) | | 2024-06-19 | NVIDIA RTX 4080 SUPER (16 GB VRAM) + i9-10910 (128 GB RAM) | +| 2024-06-19 | NVIDIA RTX 3090 (24 GB VRAM) + Ryzen 9 3900XT (128 GB RAM) | This explanation was verified against LangChain 0.2. @@ -103,7 +107,8 @@ Document { The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be imported in NodeJS scripts that use LangChain and other JavaScript libraries. -A simple pre-processing step can convert workbooks to spreadsheets +A simple pre-processing step can convert workbooks to CSV files that can be +processed by the existing CSV tooling: ```mermaid flowchart LR @@ -150,6 +155,23 @@ const csv = utils.sheet_to_csv(first_ws); console.log(csv); ``` +:::note pass + +A number of demos cover spiritually similar workflows: + +- [Stata](/docs/demos/extensions/stata), [MATLAB](/docs/demos/extensions/matlab) +and [Maple](/docs/demos/extensions/maple/) support XLSX data import. The SheetJS +integrations generate clean XLSX workbooks from user-supplied spreadsheets. + +- [TensorFlow.js](/docs/demos/math/tensorflow), [Pandas](/docs/demos/math/pandas) +and [Mathematica](/docs/demos/extensions/mathematica) support CSV. The SheetJS +integrations generate clean CSVs and use built-in CSV processors. + +- The ["Command-Line Tools"](/docs/demos/cli/) demo covers techniques for making +standalone command-line tools for file conversion. + +::: + ### Single Worksheet For a single worksheet, a SheetJS pre-processing step can write the CSV rows to @@ -257,6 +279,17 @@ The demo [`LoadOfSheet` loader](pathname:///loadofsheet/loadofsheet.mjs) will generate one Document per data row across all worksheets. It will also attempt to build metadata and attributes for use in self-querying retrievers. +```js title="Sample usage" +/* read and parse `data.xlsb` */ +const loader = new LoadOfSheet("./data.xlsb"); + +/* generate documents */ +const docs = await loader.load(); + +/* synthesized attributes for the SelfQueryRetriever */ +const attributes = loader.attributes; +``` +
Sample SheetJS Loader (click to show) diff --git a/docz/docs/02-getting-started/02-examples/index.md b/docz/docs/02-getting-started/02-examples/index.md index 179f25a..3347a7b 100644 --- a/docz/docs/02-getting-started/02-examples/index.md +++ b/docz/docs/02-getting-started/02-examples/index.md @@ -25,6 +25,6 @@ ultimately displayed to the user in a HTML table. ## Loading Sheets -["Loading Sheets"](/docs/getting-started/examples/loader) explores deep SheetJS +The ["Loader Tutorial"](/docs/getting-started/examples/loader) explores SheetJS integrations. Based on the existing CSV and binary loaders, a spreadsheet loader -for LangChain is developed and tested. +is developed and tested in a natural language query workflow. diff --git a/docz/docs/03-demos/03-net/01-network/index.mdx b/docz/docs/03-demos/03-net/01-network/index.mdx index f3881a3..c987d63 100644 --- a/docz/docs/03-demos/03-net/01-network/index.mdx +++ b/docz/docs/03-demos/03-net/01-network/index.mdx @@ -81,8 +81,8 @@ Each browser demo was tested in the following environments: | Browser | Date | |:------------|:-----------| -| Chrome 120 | 2024-01-30 | -| Safari 17.2 | 2024-01-15 | +| Chrome 126 | 2024-06-19 | +| Safari 17.3 | 2024-06-19 | ::: diff --git a/docz/docs/03-demos/03-net/02-upload/index.mdx b/docz/docs/03-demos/03-net/02-upload/index.mdx index 2dfd67a..53572e8 100644 --- a/docz/docs/03-demos/03-net/02-upload/index.mdx +++ b/docz/docs/03-demos/03-net/02-upload/index.mdx @@ -135,8 +135,8 @@ Each browser demo was tested in the following environments: | Browser | Date | |:------------|:-----------| -| Chrome 120 | 2024-01-15 | -| Safari 17.3 | 2024-02-21 | +| Chrome 126 | 2024-06-19 | +| Safari 17.3 | 2024-06-19 | ::: diff --git a/docz/docs/03-demos/03-net/09-dom.md b/docz/docs/03-demos/03-net/09-dom.md index 1799c76..1607138 100644 --- a/docz/docs/03-demos/03-net/09-dom.md +++ b/docz/docs/03-demos/03-net/09-dom.md @@ -288,7 +288,7 @@ The script will create a file `SheetJSCheerio.xlsx` that can be opened. ### DenoDOM [DenoDOM](https://deno.land/x/deno_dom) provides a DOM framework for Deno. For -the tested version (`0.1.43`), the following patches were needed: +the tested version (`0.1.46`), the following patches were needed: - TABLE `rows` property (explained above) - TR `cells` property (explained above) @@ -299,7 +299,7 @@ This example fetches [a sample table](pathname:///dom/SheetJSTable.html): // @deno-types="https://cdn.sheetjs.com/xlsx-${current}/package/types/index.d.ts" import * as XLSX from 'https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs'; \n\ -import { DOMParser } from 'https://deno.land/x/deno_dom@v0.1.43/deno-dom-wasm.ts'; +import { DOMParser } from 'https://deno.land/x/deno_dom@v0.1.46/deno-dom-wasm.ts'; \n\ const doc = new DOMParser().parseFromString( await (await fetch('https://docs.sheetjs.com/dom/SheetJSTable.html')).text(), @@ -323,7 +323,12 @@ XLSX.writeFile(workbook, "SheetJSDenoDOM.xlsx");`} :::note Tested Deployments -This demo was last tested on 2024 January 27 against DenoDOM `0.1.43` +This demo was tested in the following deployments: + +| Architecture | DenoDOM | Deno | Date | +|:-------------|:--------|:-------|:-----------| +| `darwin-x64` | 0.1.46 | 1.44.4 | 2024-06-19 | +| `darwin-arm` | 0.1.46 | 1.44.4 | 2024-06-19 | ::: diff --git a/docz/docs/03-demos/42-engines/02-v8.md b/docz/docs/03-demos/42-engines/02-v8.md index f27aef5..a4d87d5 100644 --- a/docz/docs/03-demos/42-engines/02-v8.md +++ b/docz/docs/03-demos/42-engines/02-v8.md @@ -970,6 +970,73 @@ cargo run pres.numbers If the program succeeded, the CSV contents will be printed to console and the file `sheetjsw.xlsb` will be created. That file can be opened with Excel. +### Java + +[Javet](https://www.caoccao.com/Javet/) is a Java binding to the V8 engine. +Javet simplifies conversions between Java data structures and V8 equivalents. + +Java byte arrays (`byte[]`) are projected in V8 as `Int8Array`. The SheetJS +`read` method expects a `Uint8Array`. The following script snippet performs a +zero-copy conversion: + +```js title="Zero-copy conversion from Int8Array to Uint8Array" +// assuming `i8` is an Int8Array +const u8 = new Uint8Array(i8.buffer, i8.byteOffset, i8.length); +``` + +:::note Tested Deployments + +This demo was last tested in the following deployments: + +| Architecture | V8 Version | Javet | Java | Date | +|:-------------|:--------------|:--------|:--------|:-----------| +| `darwin-x64` | `12.6.228.13` | `3.1.3` | 22 | 2024-06-19 | +| `darwin-arm` | `12.6.228.13` | `3.1.3` | 11.0.23 | 2024-06-19 | + +::: + +1) Create a new project: + +```bash +mkdir sheetjs-javet +cd sheetjs-javet +``` + +2) Download the Javet JAR. There are different archives for different platforms. +The following command runs on `darwin-x64` and `darwin-arm`: + +```bash +curl -LO https://repo1.maven.org/maven2/com/caoccao/javet/javet-macos/3.1.3/javet-macos-3.1.3.jar +``` + +3) Download the SheetJS Standalone script and test file. Save both files in the +project directory: + + + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js +curl -LO https://docs.sheetjs.com/pres.xlsx`} + + +4) Download [`SheetJSJavet.java`](pathname:///v8/SheetJSJavet.java): + +```bash +curl -LO https://docs.sheetjs.com/v8/SheetJSJavet.java +``` + +5) Build and run the Java application: + +```bash +javac -cp ".:javet-macos-3.1.3.jar" SheetJSJavet.java +java -cp ".:javet-macos-3.1.3.jar" SheetJSJavet pres.xlsx +``` + +If the program succeeded, the CSV contents will be printed to console. + ## Snapshots At a high level, V8 snapshots are raw dumps of the V8 engine state. It is much diff --git a/docz/static/v8/SheetJSJavet.java b/docz/static/v8/SheetJSJavet.java new file mode 100644 index 0000000..eec11b4 --- /dev/null +++ b/docz/static/v8/SheetJSJavet.java @@ -0,0 +1,30 @@ +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.Scanner; +import com.caoccao.javet.interop.V8Host; +import com.caoccao.javet.interop.V8Runtime; + +public class SheetJSJavet { + public static void main(String[] args) throws Exception { + /* initialize */ + V8Runtime v8Runtime = V8Host.getV8Instance().createV8Runtime(); + + /* read script file */ + v8Runtime.getExecutor("var global = (function(){ return this; }).call(null);").executeVoid(); + v8Runtime.getExecutor(new Scanner(SheetJSJavet.class.getResourceAsStream("/xlsx.full.min.js")).useDelimiter("\\Z").next()).executeVoid(); + + System.out.println(v8Runtime.getExecutor("'SheetJS Version ' + XLSX.version").executeString()); + + /* read spreadsheet bytes */ + v8Runtime.getGlobalObject().set("i8", Files.readAllBytes(Paths.get(args[0]))); + v8Runtime.getExecutor("var u8 = new Uint8Array(i8.buffer, i8.byteOffset, i8.length);").executeVoid(); + + /* parse workbook */ + v8Runtime.getExecutor("var wb = XLSX.read(u8, {type: 'array'})").executeVoid(); + + /* get first worksheet as CSV */ + v8Runtime.getExecutor("var ws = wb.Sheets[wb.SheetNames[0]];").executeVoid(); + String res = v8Runtime.getExecutor("XLSX.utils.sheet_to_csv(ws)").executeString(); + System.out.println(res); + } +}