TensorFlow NodeJS / Kaioken demos

This commit is contained in:
SheetJS 2024-08-16 07:15:33 -04:00
parent 6836b4b450
commit bbaf012efd
5 changed files with 443 additions and 46 deletions

@ -5,6 +5,11 @@ pagination_prev: demos/index
pagination_next: demos/frontend/index
---
import current from '/version.js';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
<head>
<script src="https://docs.sheetjs.com/tfjs/tf.min.js"></script>
</head>
@ -25,27 +30,151 @@ results back to spreadsheets.
- ["JS Array Interchange"](#js-array-interchange) uses SheetJS to process sheets
and generate rows of objects that can be post-processed.
:::info pass
Live code blocks in this page use the TF.js `4.14.0` standalone build.
For use in web frameworks, the `@tensorflow/tfjs` module should be used.
For use in NodeJS, the native bindings module is `@tensorflow/tfjs-node`.
:::
:::note Tested Deployments
Each browser demo was tested in the following environments:
| Browser | TF.js version | Date |
|:------------|:--------------|:-----------|
| Chrome 122 | `4.14.0` | 2024-04-07 |
| Safari 17.4 | `4.14.0` | 2024-03-23 |
| Chrome 127 | `4.20.0` | 2024-08-16 |
| Safari 17.4 | `4.20.0` | 2024-08-16 |
The NodeJS demo was tested in the following environments:
| NodeJS | TF.js version | Date |
|:---------|:------------------------------|:-----------|
| `22.3.0` | `4.20.0` (`@tensorflow/tfjs`) | 2024-08-16 |
The Kaioken demo was tested in the following environments:
| Kaioken | TF.js version | Date |
|:----------|:--------------|:-----------|
| `0.25.3` | `4.20.0` | 2024-08-16 |
:::
## Installation
#### Standalone Browser Scripts
Live code blocks in this page use the TF.js `4.20.0` standalone build.
Standalone scripts are available on various CDNs including UNPKG. The latest
version can be loaded with the following `SCRIPT` tag.
The [SheetJS Standalone scripts](/docs/getting-started/installation/standalone)
can be loaded after the TF.js standalone script.
<CodeBlock language="html">{`\
<!-- latest version of TF.js -->
<script src="https://unpkg.com/@tensorflow/tfjs@latest/dist/tf.min.js"></script>
<!-- use version ${current} -->
<script lang="javascript" src="https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js"></script>`}
</CodeBlock>
#### Frameworks and Bundlers
[The "Frameworks" section](/docs/getting-started/installation/frameworks) covers
installation with Yarn and other package managers.
`@tensorflow/tfjs` and SheetJS modules should be installed using a package manager:
<Tabs groupId="pm">
<TabItem value="npm" label="npm">
<CodeBlock language="bash">{`\
npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs`}
</CodeBlock>
</TabItem>
<TabItem value="pnpm" label="pnpm">
<CodeBlock language="bash">{`\
pnpm install --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs`}
</CodeBlock>
</TabItem>
<TabItem value="yarn" label="Yarn" default>
<CodeBlock language="bash">{`\
yarn add https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs`}
</CodeBlock>
:::caution pass
Newer releases of Yarn may throw an error:
```
Usage Error: It seems you are trying to add a package using a https:... url; we now require package names to be explicitly specified.
Try running the command again with the package name prefixed: yarn add my-package@https:...
```
The workaround is to prepend the URL with `xlsx@`:
<CodeBlock language="bash">{`\
yarn add xlsx@https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs`}
</CodeBlock>
:::
</TabItem>
</Tabs>
#### NodeJS
The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be
imported in NodeJS scripts that use TF.js.
There are two options for NodeJS:
- the pure JavaScript bindings module is `@tensorflow/tfjs`
- the native bindings module is `@tensorflow/tfjs-node`
:::danger pass
When this demo was last tested, there were issues with the native binding:
```
Error: The specified module could not be found.
\\?\C:\Users\SheetJS\node_modules\@tensorflow\tfjs-node\lib\napi-v8\tfjs_binding.node
```
For general compatibility, the demos use the pure `@tensorflow/tfjs` binding.
:::
<Tabs groupId="pm">
<TabItem value="npm" label="npm">
<CodeBlock language="bash">{`\
npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs @tensorflow/tfjs-node`}
</CodeBlock>
</TabItem>
<TabItem value="pnpm" label="pnpm">
<CodeBlock language="bash">{`\
pnpm install --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs @tensorflow/tfjs-node`}
</CodeBlock>
</TabItem>
<TabItem value="yarn" label="Yarn" default>
<CodeBlock language="bash">{`\
yarn add https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs @tensorflow/tfjs-node`}
</CodeBlock>
:::caution pass
Newer releases of Yarn may throw an error:
```
Usage Error: It seems you are trying to add a package using a https:... url; we now require package names to be explicitly specified.
Try running the command again with the package name prefixed: yarn add my-package@https:...
```
The workaround is to prepend the URL with `xlsx@`:
<CodeBlock language="bash">{`\
yarn add xlsx@https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs @tensorflow/tfjs-node`}
</CodeBlock>
:::
</TabItem>
</Tabs>
## CSV Data Interchange
`tf.data.csv`[^1] generates a Dataset from CSV data. The function expects a URL.
@ -126,6 +255,8 @@ The demo builds a model for predicting MPG from Horsepower data. It:
- builds a model and trains with `fitDataset`[^8]
- predicts MPG from a set of sample inputs and displays results in a table
#### Live Demo
<details>
<summary><b>Live Demo</b> (click to show)</summary>
@ -186,8 +317,8 @@ function SheetJSToTFJSCSV() {
hasHeader: true,
configuredColumnsOnly: true,
columnConfigs:{
"Horsepower": {required: false, default: 0},
"Miles_per_Gallon":{required: false, default: 0, isLabel:true}
"Horsepower": { required: false, default: 0},
"Miles_per_Gallon": { required: false, default: 0, isLabel: true }
}
});
@ -234,6 +365,111 @@ function SheetJSToTFJSCSV() {
</details>
#### NodeJS Demo
0) Create a new project:
```bash
mkdir sheetjs-tfjs-csv
cd sheetjs-tfjs-csv
npm init -y
```
1) Download [`SheetJSTF.js`](pathname:///tfjs/SheetJSTF.js):
```bash
curl -LO https://docs.sheetjs.com/tfjs/SheetJSTF.js
```
2) Install SheetJS and TF.js dependencies:
<CodeBlock language="bash">{`\
npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs @tensorflow/tfjs-node`}
</CodeBlock>
3) Run the script:
```bash
node SheetJSTF.js
```
#### Kaioken Demo
:::tip pass
[Kaioken](/docs/demos/frontend/kaioken) is a popular front-end framework that
uses patterns that will be familiar to ReactJS developers.
The SheetJS team strongly recommends using Kaioken in projects using TF.js.
:::
1) Create a new site.
```bash
npm create vite sheetjs-tfjs-kaioken -- --template vanilla-ts
cd sheetjs-tfjs-kaioken
npm add --save kaioken
npm add --save vite-plugin-kaioken -D
```
2) Create a new file `vite.config.ts` with the following content:
```ts title="vite.config.ts (create new file)"
import { defineConfig } from "vite"
import kaioken from "vite-plugin-kaioken"
export default defineConfig({
plugins: [kaioken()],
})
```
3) Edit `tsconfig.json` and add `"jsx": "preserve"` within `compilerOptions`:
```js title="tsconfig.json (add highlighted line)"
{
"compilerOptions": {
// highlight-next-line
"jsx": "preserve",
```
4) Replace `src/main.ts` with the following codeblock:
```js title="src/main.ts"
import { mount } from "kaioken";
import App from "./SheetJSTF";
const root = document.getElementById("app");
mount(App, root!);
```
5) Download [`SheetJSTF.tsx`](pathname:///tfjs/SheetJSTF.tsx) to the `src` directory:
```bash
curl -L -o src/SheetJSTF.tsx https://docs.sheetjs.com/tfjs/SheetJSTF.tsx
```
6) Install SheetJS and TF.js dependencies:
<CodeBlock language="bash">{`\
npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz @tensorflow/tfjs`}
</CodeBlock>
7) Start the development server:
```bash
npm run dev
```
The process will display a URL:
```
➜ Local: http://localhost:5173/
```
Open the displayed URL (`http://localhost:5173/` in this example) with a web
browser. Click the "Click to Run" button to see the results.
## JS Array Interchange
[The official Linear Regression tutorial](https://www.tensorflow.org/js/tutorials/training/linear_regression)
@ -306,7 +542,7 @@ The SheetJS `sheet_to_json` method[^10] will translate worksheet objects into an
array of row objects:
```js
var aoo = [
const aoo = [
{"sepal length": 5.1, "sepal width": 3.5, ...},
{"sepal length": 4.9, "sepal width": 3, ...},
...
@ -316,18 +552,18 @@ var aoo = [
TF.js and other libraries tend to operate on individual columns, equivalent to:
```js
var sepal_lengths = [5.1, 4.9, ...];
var sepal_widths = [3.5, 3, ...];
const sepal_lengths = [5.1, 4.9, ...];
const sepal_widths = [3.5, 3, ...];
```
When a `tensor2d` can be exported, it will look different from the spreadsheet:
```js
var data_set_2d = [
const data_set_2d = [
[5.1, 4.9, ...],
[3.5, 3, ...],
...
]
// ...
];
```
This is the transpose of how people use spreadsheets!
@ -340,35 +576,35 @@ transposed. To export multiple data sets, the data should be transposed:
```js
/* assuming data is an array of typed arrays */
var aoa = [];
for(var i = 0; i < data.length; ++i) {
for(var j = 0; j < data[i].length; ++j) {
const aoa = [];
for(let i = 0; i < data.length; ++i) {
for(let j = 0; j < data[i].length; ++j) {
if(!aoa[j]) aoa[j] = [];
aoa[j][i] = data[i][j];
}
}
/* aoa can be directly converted to a worksheet object */
var ws = XLSX.utils.aoa_to_sheet(aoa);
const ws = XLSX.utils.aoa_to_sheet(aoa);
```
### Importing Data from a Spreadsheet
`sheet_to_json` with the option `header:1`[^12] will generate a row-major array
`sheet_to_json` with the option `header: 1`[^12] will generate a row-major array
of arrays that can be transposed. However, it is more efficient to walk the
sheet manually:
```js
/* find worksheet range */
var range = XLSX.utils.decode_range(ws['!ref']);
var out = []
const range = XLSX.utils.decode_range(ws['!ref']);
const out = []
/* walk the columns */
for(var C = range.s.c; C <= range.e.c; ++C) {
for(let C = range.s.c; C <= range.e.c; ++C) {
/* create the typed array */
var ta = new Float32Array(range.e.r - range.s.r + 1);
const ta = new Float32Array(range.e.r - range.s.r + 1);
/* walk the rows */
for(var R = range.s.r; R <= range.e.r; ++R) {
for(let R = range.s.r; R <= range.e.r; ++R) {
/* find the cell, skip it if the cell isn't numeric or boolean */
var cell = ws["!data"] ? (ws["!data"][R]||[])[C] : ws[XLSX.utils.encode_cell({r:R, c:C})];
const cell = ws["!data"] ? (ws["!data"][R]||[])[C] : ws[XLSX.utils.encode_cell({r:R, c:C})];
if(!cell || cell.t != 'n' && cell.t != 'b') continue;
/* assign to the typed array */
ta[R - range.s.r] = cell.v;
@ -393,38 +629,38 @@ const tensor = tf.tensor1d(lengths);
`tf.Tensor` objects can be directly transposed using `transpose`:
```js
var aoo = XLSX.utils.sheet_to_json(worksheet);
const aoo = XLSX.utils.sheet_to_json(worksheet);
// "x" and "y" are the fields we want to pull from the data
var data = aoo.map(row => ([row["x"], row["y"]]));
con st data = aoo.map(row => ([row["x"], row["y"]]));
// create a tensor representing two column datasets
var tensor = tf.tensor2d(data).transpose();
const tensor = tf.tensor2d(data).transpose();
// individual columns can be accessed
var col1 = tensor.slice([0,0], [1,tensor.shape[1]]).flatten();
var col2 = tensor.slice([1,0], [1,tensor.shape[1]]).flatten();
const col1 = tensor.slice([0,0], [1,tensor.shape[1]]).flatten();
const col2 = tensor.slice([1,0], [1,tensor.shape[1]]).flatten();
```
For exporting, `stack` can be used to collapse the columns into a linear array:
```js
/* pull data into a Float32Array */
var result = tf.stack([col1, col2]).transpose();
var shape = tensor.shape;
var f32 = tensor.dataSync();
const result = tf.stack([col1, col2]).transpose();
const shape = tensor.shape;
const f32 = tensor.dataSync();
/* construct an array of arrays of the data in spreadsheet order */
var aoa = [];
for(var j = 0; j < shape[0]; ++j) {
const aoa = [];
for(let j = 0; j < shape[0]; ++j) {
aoa[j] = [];
for(var i = 0; i < shape[1]; ++i) aoa[j][i] = f32[j * shape[1] + i];
for(let i = 0; i < shape[1]; ++i) aoa[j][i] = f32[j * shape[1] + i];
}
/* add headers to the top */
aoa.unshift(["x", "y"]);
/* generate worksheet */
var worksheet = XLSX.utils.aoa_to_sheet(aoa);
const worksheet = XLSX.utils.aoa_to_sheet(aoa);
```
[^1]: See [`tf.data.csv`](https://js.tensorflow.org/api/latest/#data.csv) in the TensorFlow.js documentation

@ -828,4 +828,5 @@ file named `SheetJSNS.xls`.
[^4]: See [`write` in "Writing Files"](/docs/api/write-options)
[^5]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input)
[^6]: See ["Workbook Helpers" in "Utilities"](/docs/api/utilities/wb) for details on `book_new` and `book_append_sheet`.
[^7]: See [`read` in "Reading Files"](/docs/api/parse-options)
[^7]: See [`read` in "Reading Files"](/docs/api/parse-options)
[^8]: See ["Local setup"](https://docs.nativescript.org/setup/#local-setup) in the NativeScript documentation. For Windows and Linux, follow the "Android" instructions. For macOS, follow both the iOS and Android instructions.

@ -0,0 +1,68 @@
const XLSX = require('xlsx');
const tf = require("@tensorflow/tfjs");
//const tf = require("@tensorflow/tfjs-node");
function worksheet_to_csv_url(worksheet) {
/* generate CSV */
const csv = XLSX.utils.sheet_to_csv(worksheet);
/* CSV -> Uint8Array -> Blob */
const u8 = new TextEncoder().encode(csv);
const blob = new Blob([u8], { type: "text/csv" });
/* generate a blob URL */
return URL.createObjectURL(blob);
}
(async() => { try {
/* fetch file */
const f = await fetch("https://docs.sheetjs.com/cd.xls");
const ab = await f.arrayBuffer();
/* parse file and get first worksheet */
const wb = XLSX.read(ab);
const ws = wb.Sheets[wb.SheetNames[0]];
/* generate blob URL */
const url = worksheet_to_csv_url(ws);
/* feed to tf.js */
const dataset = tf.data.csv(url, {
hasHeader: true,
configuredColumnsOnly: true,
columnConfigs:{
"Horsepower": {required: false, default: 0},
"Miles_per_Gallon":{required: false, default: 0, isLabel:true}
}
});
/* pre-process data */
let flat = dataset
.map(({xs,ys}) =>({xs: Object.values(xs), ys: Object.values(ys)}))
.filter(({xs,ys}) => [...xs,...ys].every(v => v>0));
/* normalize manually :( */
let minX = Infinity, maxX = -Infinity, minY = Infinity, maxY = -Infinity;
await flat.forEachAsync(({xs, ys}) => {
minX = Math.min(minX, xs[0]); maxX = Math.max(maxX, xs[0]);
minY = Math.min(minY, ys[0]); maxY = Math.max(maxY, ys[0]);
});
flat = flat.map(({xs, ys}) => ({xs:xs.map(v => (v-minX)/(maxX - minX)),ys:ys.map(v => (v-minY)/(maxY-minY))}));
flat = flat.batch(32);
/* build and train model */
const model = tf.sequential();
model.add(tf.layers.dense({inputShape: [1], units: 1}));
model.compile({ optimizer: tf.train.sgd(0.000001), loss: 'meanSquaredError' });
await model.fitDataset(flat, { epochs: 100, callbacks: { onEpochEnd: async (epoch, logs) => {
console.error(`${epoch}:${logs.loss}`);
}}});
/* predict values */
const inp = tf.linspace(0, 1, 9);
const pred = model.predict(inp);
const xs = await inp.dataSync(), ys = await pred.dataSync();
for (let i=0; i<xs.length; ++i) {
console.log([xs[i] * (maxX - minX) + minX, ys[i] * (maxY - minY) + minY].join(" "));
}
} catch(e) { console.error(`ERROR: ${String(e)}`); }})();

@ -0,0 +1,92 @@
import { useState, useCallback } from "kaioken";
import { TensorContainerObject, data, layers, linspace, train, sequential } from "@tensorflow/tfjs";
import { read, utils } from "xlsx";
import type { Tensor, Rank } from "@tensorflow/tfjs";
import type { WorkSheet } from "xlsx";
interface Data extends TensorContainerObject {
xs: Tensor;
ys: Tensor;
}
type DSet = data.Dataset<Data>;
export default function SheetJSToTFJSCSV() {
const [output, setOutput] = useState("");
const [results, setResults] = useState<[number, number][]>([]);
const [disabled, setDisabled] = useState(false);
function worksheet_to_csv_url(worksheet: WorkSheet) {
/* generate CSV */
const csv = utils.sheet_to_csv(worksheet);
/* CSV -> Uint8Array -> Blob */
const u8 = new TextEncoder().encode(csv);
const blob = new Blob([u8], { type: "text/csv" });
/* generate a blob URL */
return URL.createObjectURL(blob);
}
const doit = useCallback(async () => {
setResults([]); setOutput(""); setDisabled(true);
try {
/* fetch file */
const f = await fetch("https://docs.sheetjs.com/cd.xls");
const ab = await f.arrayBuffer();
/* parse file and get first worksheet */
const wb = read(ab);
const ws = wb.Sheets[wb.SheetNames[0]];
/* generate blob URL */
const url = worksheet_to_csv_url(ws);
/* feed to tf.js */
const dataset = data.csv(url, {
hasHeader: true,
configuredColumnsOnly: true,
columnConfigs:{
"Horsepower": {required: false, default: 0},
"Miles_per_Gallon":{required: false, default: 0, isLabel:true}
}
});
/* pre-process data */
let flat = (dataset as unknown as DSet)
.map(({xs,ys}) =>({xs: Object.values(xs), ys: Object.values(ys)}))
.filter(({xs,ys}) => [...xs,...ys].every(v => v>0));
/* normalize manually :( */
let minX = Infinity, maxX = -Infinity, minY = Infinity, maxY = -Infinity;
await flat.forEachAsync(({xs, ys}) => {
minX = Math.min(minX, xs[0]); maxX = Math.max(maxX, xs[0]);
minY = Math.min(minY, ys[0]); maxY = Math.max(maxY, ys[0]);
});
flat = flat.map(({xs, ys}) => ({xs:xs.map(v => (v-minX)/(maxX - minX)),ys:ys.map(v => (v-minY)/(maxY-minY))}));
let batch = flat.batch(32);
/* build and train model */
const model = sequential();
model.add(layers.dense({inputShape: [1], units: 1}));
model.compile({ optimizer: train.sgd(0.000001), loss: 'meanSquaredError' });
await model.fitDataset(batch, { epochs: 100, callbacks: { onEpochEnd: async (epoch, logs) => {
setOutput(`${epoch}:${logs?.loss}`);
}}});
/* predict values */
const inp = linspace(0, 1, 9);
const pred = model.predict(inp) as Tensor<Rank>;
const xs = await inp.dataSync(), ys = await pred.dataSync();
setResults(Array.from(xs).map((x, i) => [ x * (maxX - minX) + minX, ys[i] * (maxY - minY) + minY ]));
setOutput("");
} catch(e) { setOutput(`ERROR: ${String(e)}`); } finally { setDisabled(false);}
}, []);
return ( <>
<button onclick={doit} disabled={disabled}>Click to run</button><br/>
{output && <pre>{output}</pre> || <></>}
{results.length && <table><thead><tr><th>Horsepower</th><th>MPG</th></tr></thead><tbody>
{results.map((r,i) => <tr key={i}><td>{r[0]}</td><td>{r[1].toFixed(2)}</td></tr>)}
</tbody></table> || <></>}
</> );
}

File diff suppressed because one or more lines are too long