From 7f0bda6af676924e757af0b46326bdc4980bed47 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Sun, 30 Jun 2024 23:59:01 -0400 Subject: [PATCH] Perl JE demo refresh --- .../02-examples/06-loader.md | 85 +++++++++++++++++-- docz/docs/03-demos/01-math/21-pandas.md | 10 ++- docz/docs/03-demos/03-net/03-server/index.md | 7 +- docz/docs/03-demos/27-local/02-websql.md | 11 +-- docz/docs/03-demos/27-local/05-clipboard.md | 5 +- .../03-demos/32-extensions/02-chromium.md | 2 +- docz/docs/03-demos/32-extensions/06-osa.md | 23 ++++- docz/docs/03-demos/37-bigdata/01-stream.md | 26 +++--- docz/docs/03-demos/42-engines/01-duktape.md | 6 +- docz/docs/03-demos/42-engines/04-jsc.md | 2 +- docz/docs/03-demos/42-engines/22-perl.md | 57 ++++++++++--- docz/static/loadofsheet/query.mjs | 18 +++- tests/bigdata-stream-nodejs.sh | 21 +++++ tests/server-express-worker.sh | 80 +++++++++++++++++ 14 files changed, 297 insertions(+), 56 deletions(-) create mode 100755 tests/bigdata-stream-nodejs.sh create mode 100755 tests/server-express-worker.sh diff --git a/docz/docs/02-getting-started/02-examples/06-loader.md b/docz/docs/02-getting-started/02-examples/06-loader.md index 660eb80..8a4f7fd 100644 --- a/docz/docs/02-getting-started/02-examples/06-loader.md +++ b/docz/docs/02-getting-started/02-examples/06-loader.md @@ -36,15 +36,43 @@ This demo was tested in the following configurations: | Date | Platform | |:-----------|:--------------------------------------------------------------| | 2024-06-19 | Apple M2 Max 12-Core CPU + 30-Core GPU (32 GB unified memory) | +| 2024-06-28 | NVIDIA RTX 4090 (24 GB VRAM) + i9-10910 (128 GB RAM) | | 2024-06-19 | NVIDIA RTX 4080 SUPER (16 GB VRAM) + i9-10910 (128 GB RAM) | -| 2024-06-19 | NVIDIA RTX 3090 (24 GB VRAM) + Ryzen 9 3900XT (128 GB RAM) | -This explanation was verified against LangChain 0.2. +SheetJS users have verified this demo in other configurations: + +
+ Other tested configurations (click to show) + +| Demo | Platform | +|:------------|:-------------------------------------------------------------| +| LangChainJS | NVIDIA RTX 4070 Ti (12 GB VRAM) + Ryzen 7 5800x (64 GB RAM) | +| LangChainJS | NVIDIA RTX 3090 (24 GB VRAM) + Ryzen 9 3900XT (128 GB RAM) | +| LangChainJS | NVIDIA RTX 3060 (12 GB VRAM) + i5-11400 (32 GB RAM) | +| LangChainJS | NVIDIA RTX 2080 (12 GB VRAM) + i7-9700K (16 GB RAM) | +| LangChainJS | NVIDIA RTX 2060 (6 GB VRAM) + Ryzen 5 3600 (32 GB RAM) | +| LangChainJS | NVIDIA GTX 1080 (8 GB VRAM) + Ryzen 7 5800x (64 GB RAM) | + +
+ +Special thanks to: + +- [`@Rasmus`](https://tengstedt.dev/) +- [Ben Halverson](https://benhalverson.dev/) +- [Navid Nami](https://github.com/CaseoJKL) +- [`@Smor`](https://smor.dev/) +- [`@timbr`](https://timbr.dev/) ::: ## CSV Loader +:::note pass + +This explanation was verified against LangChain 0.2. + +::: + Document loaders generate data objects ("documents") and associated metadata from data sources. @@ -638,12 +666,22 @@ export default class LoadOfSheet extends BufferLoader { The demo performs the query "Which rows have over 40 miles per gallon?" against a [sample cars dataset](pathname:///cd.xls) and displays the results. +:::note pass + +SheetJS team members have tested this demo on Windows 10 and Windows 11 using +PowerShell and Ollama for Windows. + +SheetJS users have also tested this demo within Windows Subsystem for Linux. + +::: + :::caution pass This demo was tested using the ChatQA-1.5 model[^9] in Ollama[^10]. -The tested model requires 9.2GB VRAM. It is strongly recommended to run the demo -on a newer Apple Silicon Mac or a PC with an Nvidia GPU with at least 12GB VRAM. +The tested model used up to 9.2GB VRAM. It is strongly recommended to run the +demo on a newer Apple Silicon Mac or a PC with an Nvidia GPU with at least 12GB +VRAM. SheetJS users have tested the demo on systems with as little as 6GB VRAM. ::: @@ -665,16 +703,33 @@ curl -LO https://docs.sheetjs.com/loadofsheet/query.mjs curl -LO https://docs.sheetjs.com/loadofsheet/loadofsheet.mjs ``` +:::note pass + +In PowerShell, the command may fail with a parameter error: + +``` +Invoke-WebRequest : A parameter cannot be found that matches parameter name 'LO'. +``` + +`curl.exe` must be invoked directly: + +```bash +curl.exe -LO https://docs.sheetjs.com/loadofsheet/query.mjs +curl.exe -LO https://docs.sheetjs.com/loadofsheet/loadofsheet.mjs +``` + +::: + 2) Install the SheetJS NodeJS module: {`\ npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz`} -3) Install LangChain and HNSWLib dependencies: +3) Install dependencies: ```bash -npm i --save @langchain/community@0.2.0 @langchain/core@0.2.6 langchain@0.2.5 hnswlib-node@3.0.0 peggy@3.0.2 +npm i --save @langchain/community@0.2.0 @langchain/core@0.2.6 langchain@0.2.5 peggy@3.0.2 ``` 4) Download the [cars dataset](pathname:///cd.xls): @@ -683,6 +738,22 @@ npm i --save @langchain/community@0.2.0 @langchain/core@0.2.6 langchain@0.2.5 hn curl -LO https://docs.sheetjs.com/cd.xls ``` +:::note pass + +In PowerShell, the command may fail with a parameter error: + +``` +Invoke-WebRequest : A parameter cannot be found that matches parameter name 'LO'. +``` + +`curl.exe` must be invoked directly: + +```bash +curl.exe -LO https://docs.sheetjs.com/cd.xls +``` + +::: + 5) Install the `llama3-chatqa:8b-v1.5-q8_0` model using Ollama: ```bash @@ -705,7 +776,7 @@ node query.mjs The demo performs the query "Which rows have over 40 miles per gallon?". It will print the following nine results: -```js title="Expected output" +```js title="Expected output (order of lines may differ)" { Name: 'volkswagen rabbit custom diesel', MPG: 43.1 } { Name: 'vw rabbit c (diesel)', MPG: 44.3 } { Name: 'renault lecar deluxe', MPG: 40.9 } diff --git a/docz/docs/03-demos/01-math/21-pandas.md b/docz/docs/03-demos/01-math/21-pandas.md index ed56f92..11d2dde 100644 --- a/docz/docs/03-demos/01-math/21-pandas.md +++ b/docz/docs/03-demos/01-math/21-pandas.md @@ -41,7 +41,7 @@ This demo was tested in the following deployments: | Architecture | JS Engine | Pandas | Python | Date | |:-------------|:----------------|:-------|:-------|:-----------| | `darwin-x64` | Duktape `2.7.0` | 2.2.1 | 3.12.2 | 2024-03-15 | -| `darwin-arm` | Duktape `2.7.0` | 2.0.3 | 3.11.7 | 2024-02-13 | +| `darwin-arm` | Duktape `2.7.0` | 2.2.2 | 3.12.3 | 2024-06-30 | | `win10-x64` | Duktape `2.7.0` | 2.2.1 | 3.12.2 | 2024-03-25 | | `win11-arm` | Duktape `2.7.0` | 2.2.2 | 3.11.5 | 2024-06-20 | | `linux-x64` | Duktape `2.7.0` | 1.5.3 | 3.11.3 | 2024-03-21 | @@ -520,7 +520,7 @@ This demo was tested in the following deployments: | Architecture | JS Engine | Polars | Python | Date | |:-------------|:----------------|:--------|:-------|:-----------| | `darwin-x64` | Duktape `2.7.0` | 0.20.15 | 3.12.2 | 2024-03-15 | -| `darwin-arm` | Duktape `2.7.0` | 0.20.7 | 3.11.7 | 2024-02-13 | +| `darwin-arm` | Duktape `2.7.0` | 0.20.31 | 3.12.3 | 2024-06-30 | | `win10-x64` | Duktape `2.7.0` | 0.20.16 | 3.12.2 | 2024-03-25 | | `win10-arm` | Duktape `2.7.0` | 0.20.31 | 3.11.5 | 2024-06-20 | | `linux-x64` | Duktape `2.7.0` | 0.20.16 | 3.11.3 | 2024-03-21 | @@ -612,6 +612,12 @@ sudo apt-get install python3.11-venv - `venv` is included in the `python` package in Arch Linux-based platforms. +- macOS systems with a Python version from Homebrew: + +```bash +brew install pyenv-virtualenv +``` + After installing `venv`, the following commands set up the virtual environment: ```bash diff --git a/docz/docs/03-demos/03-net/03-server/index.md b/docz/docs/03-demos/03-net/03-server/index.md index 720894c..c825005 100644 --- a/docz/docs/03-demos/03-net/03-server/index.md +++ b/docz/docs/03-demos/03-net/03-server/index.md @@ -129,8 +129,9 @@ This demo was tested in the following environments: | NodeJS | Date | Dependencies | |:----------|:-----------|:------------------------------------| -| `18.19.1` | 2024-02-23 | ExpressJS 4.18.2 + Formidable 2.1.2 | -| `20.11.1` | 2024-02-23 | ExpressJS 4.18.2 + Formidable 2.1.2 | +| `18.20.3` | 2024-06-30 | ExpressJS 4.19.2 + Formidable 2.1.2 | +| `20.15.0` | 2024-06-30 | ExpressJS 4.19.2 + Formidable 2.1.2 | +| `22.3.0` | 2024-06-30 | ExpressJS 4.19.2 + Formidable 2.1.2 | ::: @@ -145,7 +146,7 @@ echo '{ "type": "module" }' > package.json 1) Install the dependencies: {`\ -npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz express@4.18.2 formidable@2.1.2`} +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz express@4.19.2 formidable@2.1.2`} 2) Create a worker script `worker.js` that listens for messages. When a message diff --git a/docz/docs/03-demos/27-local/02-websql.md b/docz/docs/03-demos/27-local/02-websql.md index cf6f995..5074fce 100644 --- a/docz/docs/03-demos/27-local/02-websql.md +++ b/docz/docs/03-demos/27-local/02-websql.md @@ -139,9 +139,6 @@ db.readTransaction(tx => Using `book_new` and `book_append_sheet`[^5], a workbook object can be created. This workbook is typically exported to the filesystem with `writeFile`[^6]. - - - ## Live Demo :::note Tested Deployments @@ -150,15 +147,15 @@ This browser demo was tested in the following environments: | Browser | Date | |:------------|:-----------| -| Chrome 118 | 2024-02-11 | +| Chrome 118 | 2024-06-29 | Browsers that do not support WebSQL will throw errors: | Browser | Date | Error Message | |:------------|:-----------|:------------------------------| -| Chrome 120 | 2024-02-11 | `openDatabase is not defined` | -| Safari 17.4 | 2024-03-15 | `Web SQL is deprecated` | -| Firefox 122 | 2024-03-15 | `openDatabase is not defined` | +| Chrome 126 | 2024-06-29 | `openDatabase is not defined` | +| Safari 17.1 | 2024-06-29 | `Web SQL is deprecated` | +| Firefox 127 | 2024-06-29 | `openDatabase is not defined` | ::: diff --git a/docz/docs/03-demos/27-local/05-clipboard.md b/docz/docs/03-demos/27-local/05-clipboard.md index f05140f..fbc34e7 100644 --- a/docz/docs/03-demos/27-local/05-clipboard.md +++ b/docz/docs/03-demos/27-local/05-clipboard.md @@ -22,9 +22,8 @@ Each browser demo was tested in the following environments: | Browser | Date | Notes |:------------|:-----------|:-------------------------| -| Chrome 121 | 2024-02-21 | | -| Safari 17.3 | 2024-02-21 | `text/rtf` not supported | -| Brave 1.59 | 2024-02-21 | | +| Chrome 126 | 2024-06-30 | | +| Safari 17.3 | 2024-06-30 | `text/rtf` not supported | ::: diff --git a/docz/docs/03-demos/32-extensions/02-chromium.md b/docz/docs/03-demos/32-extensions/02-chromium.md index 5b0a3c1..8a8df01 100644 --- a/docz/docs/03-demos/32-extensions/02-chromium.md +++ b/docz/docs/03-demos/32-extensions/02-chromium.md @@ -171,7 +171,7 @@ npm install "version": "0.0.0", "author": "**", // highlight-next-line - "description": "Sample Extension using SheetJS to interact with Chrome", + "description": "Sample Extension using SheetJS to interact with Chrome", ``` 3) Edit `manifest.ts` and add to the `permissions` array: diff --git a/docz/docs/03-demos/32-extensions/06-osa.md b/docz/docs/03-demos/32-extensions/06-osa.md index 2c25da7..fbacac4 100644 --- a/docz/docs/03-demos/32-extensions/06-osa.md +++ b/docz/docs/03-demos/32-extensions/06-osa.md @@ -18,14 +18,18 @@ OSA originally supported the "AppleScript" language. Modern macOS releases [SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing data from spreadsheets. +This demo uses SheetJS in OSA Scripts to pull data from a spreadsheet. We'll +explore how to use SheetJS libraries in AppleScript and JavaScript scripts. The +["Complete Demo"](#complete-demo) parses workbooks and generates CSV rows. + :::note Tested Environments This demo was tested in the following environments: -| macOS | Language | Date | -|:---------|:------------------|:-----------| -| `14.3.1` | AppleScript (OSA) | 2024-02-21 | -| `14.3.1` | JavaScript (JXA) | 2024-02-21 | +| macOS | Language | Date | +|:-------|:------------------|:-----------| +| `14.5` | AppleScript (OSA) | 2024-06-30 | +| `14.5` | JavaScript (JXA) | 2024-06-30 | ::: @@ -263,5 +267,16 @@ chmod +x sheetosa.scpt +If successful, CSV rows from the first worksheet will be printed: + +``` +Name,Index +Bill Clinton,42 +GeorgeW Bush,43 +Barack Obama,44 +Donald Trump,45 +Joseph Biden,46 +``` + [^1]: See ["Introduction to AppleScript Overview"](https://developer.apple.com/library/archive/documentation/AppleScript/Conceptual/AppleScriptX/AppleScriptX.html) in the Apple Developer documentation for more details. [^2]: See ["Introduction to JavaScript for Automation Release Notes"](https://developer.apple.com/library/archive/releasenotes/InterapplicationCommunication/RN-JavaScriptForAutomation/Articles/Introduction.html) in the Apple Developer documentation for more details. diff --git a/docz/docs/03-demos/37-bigdata/01-stream.md b/docz/docs/03-demos/37-bigdata/01-stream.md index de4462e..3f405b0 100644 --- a/docz/docs/03-demos/37-bigdata/01-stream.md +++ b/docz/docs/03-demos/37-bigdata/01-stream.md @@ -145,17 +145,17 @@ This demo was tested in the following deployments: | Node Version | Date | Node Status when tested | |:-------------|:-----------|:------------------------| -| `0.12.18` | 2024-02-23 | End-of-Life | -| `4.9.1` | 2024-02-23 | End-of-Life | -| `6.17.1` | 2024-02-23 | End-of-Life | -| `8.17.0` | 2024-02-23 | End-of-Life | -| `10.24.1` | 2024-02-23 | End-of-Life | -| `12.22.12` | 2024-02-23 | End-of-Life | -| `14.21.3` | 2024-02-23 | End-of-Life | -| `16.20.2` | 2024-02-23 | End-of-Life | -| `18.19.1` | 2024-02-23 | Maintenance LTS | -| `20.11.1` | 2024-02-23 | Active LTS | -| `22.0.0` | 2024-04-25 | Current | +| `0.12.18` | 2024-06-30 | End-of-Life | +| `4.9.1` | 2024-06-30 | End-of-Life | +| `6.17.1` | 2024-06-30 | End-of-Life | +| `8.17.0` | 2024-06-30 | End-of-Life | +| `10.24.1` | 2024-06-30 | End-of-Life | +| `12.22.12` | 2024-06-30 | End-of-Life | +| `14.21.3` | 2024-06-30 | End-of-Life | +| `16.20.2` | 2024-06-30 | End-of-Life | +| `18.20.3` | 2024-06-30 | Maintenance LTS | +| `20.15.0` | 2024-06-30 | Active LTS | +| `22.3.0` | 2024-06-30 | Current | While streaming methods work in End-of-Life versions of NodeJS, production deployments should upgrade to a Current or LTS version of NodeJS. @@ -220,8 +220,8 @@ Each browser demo was tested in the following environments: | Browser | Date | |:------------|:-----------| -| Chrome 121 | 2024-02-23 | -| Safari 17.3 | 2024-02-23 | +| Chrome 126 | 2024-06-30 | +| Safari 17.3 | 2024-06-30 | ::: diff --git a/docz/docs/03-demos/42-engines/01-duktape.md b/docz/docs/03-demos/42-engines/01-duktape.md index f70f40f..bf8d6ea 100644 --- a/docz/docs/03-demos/42-engines/01-duktape.md +++ b/docz/docs/03-demos/42-engines/01-duktape.md @@ -405,7 +405,7 @@ This demo was tested in the following deployments: | Architecture | Version | PHP | Date | |:-------------|:--------|:---------|:-----------| | `darwin-x64` | `2.7.0` | `8.3.4` | 2024-03-15 | -| `darwin-arm` | `2.7.0` | `8.3.2` | 2024-02-13 | +| `darwin-arm` | `2.7.0` | `8.3.8` | 2024-06-30 | | `linux-x64` | `2.7.0` | `8.2.7` | 2024-03-21 | | `linux-arm` | `2.7.0` | `8.2.18` | 2024-05-25 | @@ -547,7 +547,7 @@ This demo was tested in the following deployments: | Architecture | Version | Python | Date | |:-------------|:--------|:---------|:-----------| | `darwin-x64` | `2.7.0` | `3.12.2` | 2024-03-15 | -| `darwin-arm` | `2.7.0` | `3.11.7` | 2024-02-13 | +| `darwin-arm` | `2.7.0` | `3.12.3` | 2024-06-30 | | `linux-x64` | `2.7.0` | `3.11.3` | 2024-03-21 | | `linux-arm` | `2.7.0` | `3.11.2` | 2024-05-25 | @@ -1015,7 +1015,7 @@ This demo was tested in the following deployments: | Architecture | Version | Date | |:-------------|:--------|:-----------| | `darwin-x64` | `2.2.0` | 2024-03-15 | -| `darwin-arm` | `2.2.0` | 2024-02-13 | +| `darwin-arm` | `2.2.0` | 2024-06-30 | | `linux-x64` | `2.2.0` | 2024-03-21 | | `linux-arm` | `2.2.0` | 2024-05-25 | diff --git a/docz/docs/03-demos/42-engines/04-jsc.md b/docz/docs/03-demos/42-engines/04-jsc.md index 19de00f..5ea8604 100644 --- a/docz/docs/03-demos/42-engines/04-jsc.md +++ b/docz/docs/03-demos/42-engines/04-jsc.md @@ -319,7 +319,7 @@ Swift on MacOS supports JavaScriptCore without additional dependencies. | Architecture | Swift | Date | |:-------------|:--------|:-----------| | `darwin-x64` | `5.10` | 2024-04-04 | -| `darwin-arm` | `5.9.2` | 2024-02-21 | +| `darwin-arm` | `5.10` | 2024-06-30 | **Compiled** diff --git a/docz/docs/03-demos/42-engines/22-perl.md b/docz/docs/03-demos/42-engines/22-perl.md index a6a3768..0e798a6 100644 --- a/docz/docs/03-demos/42-engines/22-perl.md +++ b/docz/docs/03-demos/42-engines/22-perl.md @@ -1,5 +1,6 @@ --- -title: Perl + JE +title: Data Processing with JE +sidebar_label: Perl + JE pagination_prev: demos/bigdata/index pagination_next: solutions/input --- @@ -14,17 +15,35 @@ C engine like [`JavaScript::Duktape`](/docs/demos/engines/duktape#perl) ::: -JE is a pure-Perl JavaScript engine. +[`JE`](https://metacpan.org/pod/JE) is a pure-Perl JavaScript engine. -The [ExtendScript build](/docs/getting-started/installation/extendscript) can be -parsed and evaluated in a JE context. +[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing +data from spreadsheets. +This demo uses JE and SheetJS to pull data from a spreadsheet and print CSV +rows. We'll explore how to load SheetJS in a JE context and process spreadsheets +from Perl scripts. + +The ["Complete Example"](#complete-example) section includes a complete script +for reading data from XLS files, printing CSV rows, and writing FODS workbooks. ## Integration Details -The engine deviates from ES3. Modifying prototypes can fix some behavior: +The [SheetJS ExtendScript build](/docs/getting-started/installation/extendscript) +can be parsed and evaluated in a JE context. -```js +The engine deviates from ES3. Modifying prototypes can fix some behavior: + +
+ Required shim to support JE (click to show) + +The following features are implemented: + +- simple string `charCodeAt` +- Number `charCodeAt` (to work around string `split` bug) +- String `match` (to work around a bug when there are no matches) + +```js title="Required shim to support JE" /* String#charCodeAt is missing */ var string = ""; for(var i = 0; i < 256; ++i) string += String.fromCharCode(i); @@ -45,6 +64,8 @@ String.prototype.match = function(p) { }; ``` +
+ When loading the ExtendScript build, the BOM must be removed: ```perl @@ -106,9 +127,9 @@ This demo was tested in the following deployments: | Architecture | Version | Date | |:-------------|:--------|:-----------| -| `darwin-x64` | `0.066` | 2024-02-13 | +| `darwin-x64` | `0.066` | 2024-06-29 | | `darwin-arm` | `0.066` | 2024-05-25 | -| `linux-x64` | `0.066` | 2024-02-13 | +| `linux-x64` | `0.066` | 2024-06-29 | | `linux-arm` | `0.066` | 2024-05-25 | ::: @@ -119,7 +140,23 @@ This demo was tested in the following deployments: cpan install JE File::Slurp ``` -2) Download the [ExtendScript build](/docs/getting-started/installation/extendscript): +:::note pass + +There were permissions errors in some test runs: + +``` +mkdir /Library/Perl/5.30/File: Permission denied at /System/Library/Perl/5.30/ExtUtils/Install.pm line 489. +``` + +On macOS, the commands should be run through `sudo`: + +```bash +sudo cpan install JE File::Slurp +``` + +::: + +2) Download the [SheetJS ExtendScript build](/docs/getting-started/installation/extendscript): {`\ curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.extendscript.js`} @@ -131,7 +168,7 @@ curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.extendscript. curl -LO https://docs.sheetjs.com/perl/SheetJE.pl ``` -4) Download a test file and run: +4) Download the [test file](pathname:///cd.xls) and run: ```bash curl -LO https://docs.sheetjs.com/cd.xls diff --git a/docz/static/loadofsheet/query.mjs b/docz/static/loadofsheet/query.mjs index 676bc00..e6d4e96 100644 --- a/docz/static/loadofsheet/query.mjs +++ b/docz/static/loadofsheet/query.mjs @@ -1,7 +1,9 @@ -import { existsSync } from 'fs'; +/* NOTE: hnswlib-node@3.0.0 does not install on a fresh Windows 11 setup */ +// import { existsSync } from 'fs'; import { ChatOllama } from "@langchain/community/chat_models/ollama"; import { OllamaEmbeddings } from "@langchain/community/embeddings/ollama" -import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; +// import { HNSWLib } from "@langchain/community/vectorstores/hnswlib"; +import { MemoryVectorStore } from "langchain/vectorstores/memory"; import { SelfQueryRetriever } from "langchain/retrievers/self_query"; import { FunctionalTranslator } from "@langchain/core/structured_query"; @@ -9,12 +11,19 @@ import LoadOfSheet from "./loadofsheet.mjs"; const modelName = "llama3-chatqa:8b-v1.5-q8_0"; +console.log(`Using model ${modelName}`); + const model = new ChatOllama({ baseUrl: "http://localhost:11434", model: modelName }); const embeddings = new OllamaEmbeddings({model: modelName}); +console.time("load of sheet"); const loader = new LoadOfSheet("./cd.xls"); const docs = await loader.load(); +console.timeEnd("load of sheet"); +console.time("vector store"); +const vectorstore = await MemoryVectorStore.fromDocuments(docs, embeddings); +/* const vectorstore = await (async() => { if(!existsSync("store/hnswlib.index")) { const vectorstore = await HNSWLib.fromDocuments(docs, embeddings); @@ -23,7 +32,10 @@ const vectorstore = await (async() => { } return await HNSWLib.load("store", embeddings); })(); +*/ +console.timeEnd("vector store"); +console.time("query"); const selfQueryRetriever = SelfQueryRetriever.fromLLM({ llm: model, vectorStore: vectorstore, @@ -36,4 +48,6 @@ const selfQueryRetriever = SelfQueryRetriever.fromLLM({ const res = await selfQueryRetriever.invoke( "Which rows have over 40 miles per gallon?" ); +console.timeEnd("query"); + res.forEach(({metadata}) => { console.log({ Name: metadata.Name, MPG: metadata.Miles_per_Gallon }); }); diff --git a/tests/bigdata-stream-nodejs.sh b/tests/bigdata-stream-nodejs.sh new file mode 100755 index 0000000..95cb996 --- /dev/null +++ b/tests/bigdata-stream-nodejs.sh @@ -0,0 +1,21 @@ +#!/bin/bash +# https://docs.sheetjs.com/docs/demos/bigdata/stream#nodejs + +cd /tmp +rm -rf sheetjs-stream + +mkdir sheetjs-stream +cd sheetjs-stream + +npm i --save https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz + +curl -LO https://docs.sheetjs.com/stream/SheetJSNodeJStream.js + +curl -LO https://docs.sheetjs.com/pres.xlsx + +# this version uses `n` to cycle through node versions +for n in 0.12 4 6 8 10 12 14 16 18 20 22; do + sudo n $n + node --version + node SheetJSNodeJStream.js pres.xlsx +done diff --git a/tests/server-express-worker.sh b/tests/server-express-worker.sh new file mode 100755 index 0000000..7bca13c --- /dev/null +++ b/tests/server-express-worker.sh @@ -0,0 +1,80 @@ +#!/bin/bash +# https://docs.sheetjs.com/docs/demos/net/server#worker-threads + +cd /tmp +rm -rf sheetjs-worker + +mkdir sheetjs-worker +cd sheetjs-worker +echo '{ "type": "module" }' > package.json + +npm i --save https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz express@4.18.2 formidable@2.1.2 + +cat >worker.js < { + // read file + const wb = readFile(task.path, { dense: true }); + // send back XLSX + parentPort.postMessage(write(wb, { type: "buffer", bookType: "xlsx" })); + // remove file + fs.unlink(task.path, ()=>{}); +}); +EOF + +curl -LO https://docs.sheetjs.com/server/worker_pool.js +curl -LO https://docs.sheetjs.com/pres.numbers + +cat >main.mjs < { pool.close(); }) + +/* create server */ +const app = express(); +app.post('/', (req, res, next) => { + // parse body + const form = formidable({}); + form.parse(req, (err, fields, files) => { + // look for "upload" field + if(err) return next(err); + if(!files["upload"]) return next(new Error("missing \`upload\` file")); + + // send a message to the worker with the path to the uploaded file + pool.runTask({ path: files["upload"].filepath }, (err, result) => { + if(err) return next(err); + // send the file back as an attachment + res.attachment("SheetJSPool.xlsx"); + res.status(200).end(result); + }); + }); +}); + +// start server +app.listen(7262, () => { console.log(\`Example app listening on port 7262\`); }); +EOF + +# this version uses `n` to cycle through node versions +for n in 18 20 22; do + sudo n $n + node --version + npx -y concurrently -k 'node main.mjs' 'sleep 2; curl -X POST -F upload=@pres.numbers http://localhost:7262/ -J -O' + npx -y xlsx-cli SheetJSPool.xlsx + rm -f SheetJSPool.xlsx +done