);
+}
+```
+
+
+
+In the other direction, `XLSX.read` will readily parse CSV exports.
+
+## JS Array Interchange
+
+[The official Linear Regression tutorial](https://www.tensorflow.org/js/tutorials/training/linear_regression)
+loads data from a JSON file:
+
+```json
+[
+ {
+ "Name": "chevrolet chevelle malibu",
+ "Miles_per_Gallon": 18,
+ "Cylinders": 8,
+ "Displacement": 307,
+ "Horsepower": 130,
+ "Weight_in_lbs": 3504,
+ "Acceleration": 12,
+ "Year": "1970-01-01",
+ "Origin": "USA"
+ },
+ {
+ "Name": "buick skylark 320",
+ "Miles_per_Gallon": 15,
+ "Cylinders": 8,
+ "Displacement": 350,
+ "Horsepower": 165,
+ "Weight_in_lbs": 3693,
+ "Acceleration": 11.5,
+ "Year": "1970-01-01",
+ "Origin": "USA"
+ },
+ // ...
+]
+```
+
+In real use cases, data is stored in [spreadsheets](https://sheetjs.com/cd.xls)
+
+![cd.xls screenshot](pathname:///files/cd.png)
+
+Following the tutorial, the data fetching method is easily adapted. Differences
+from the official example are highlighted below:
+
+```js
+/**
+ * Get the car data reduced to just the variables we are interested
+ * and cleaned of missing data.
+ */
+async function getData() {
+ // highlight-start
+ /* fetch file */
+ const carsDataResponse = await fetch('https://sheetjs.com/cd.xls');
+ /* get file data (ArrayBuffer) */
+ const carsDataAB = await carsDataResponse.arrayBuffer();
+ /* parse */
+ const carsDataWB = XLSX.read(carsDataAB);
+ /* get first worksheet */
+ const carsDataWS = carsDataWB.Sheets[carsDataWB.SheetNames[0]];
+ /* generate array of JS objects */
+ const carsData = XLSX.utils.sheet_to_json(carsDataWS);
+ // highlight-end
+ const cleaned = carsData.map(car => ({
+ mpg: car.Miles_per_Gallon,
+ horsepower: car.Horsepower,
+ }))
+ .filter(car => (car.mpg != null && car.horsepower != null));
+
+ return cleaned;
+}
+```
+
+## Low-Level Operations
+
+:::caution
+
+While it is more efficient to use low-level operations, JS or CSV interchange
+is strongly recommended when possible.
+
+:::
+
+### Data Transposition
+
+A typical dataset in a spreadsheet will start with one header row and represent
+each data record in its own row. For example, the Iris dataset might look like
+
+![Iris dataset](pathname:///files/iris.png)
+
+`XLSX.utils.sheet_to_json` will translate this into an array of row objects:
+
+```js
+var aoo = [
+ {"sepal length": 5.1, "sepal width": 3.5, ...},
+ {"sepal length": 4.9, "sepal width": 3, ...},
+ ...
+];
+```
+
+TF.js and other libraries tend to operate on individual columns, equivalent to:
+
+```js
+var sepal_lengths = [5.1, 4.9, ...];
+var sepal_widths = [3.5, 3, ...];
+```
+
+When a 2D tensor can be exported, it will look different from the spreadsheet:
+
+```js
+var data_set_2d = [
+ [5.1, 4.9, ...],
+ [3.5, 3, ...],
+ ...
+]
+```
+
+This is the transpose of how people use spreadsheets!
+
+#### Typed Arrays and Columns
+
+A single typed array can be converted to a pure JS array with `Array.from`:
+
+```js
+var column = Array.from(dataset_typedarray);
+```
+
+Similarly, `Float32Array.from` generates a typed array from a normal array:
+
+```js
+var dataset = Float32Array.from(column);
+```
+
+### Exporting Datasets to a Worksheet
+
+`XLSX.utils.aoa_to_sheet` can generate a worksheet from an array of arrays.
+ML libraries typically provide APIs to pull an array of arrays, but it will
+be transponsed
+a row-major array of arrays. To export multiple data
+sets, "transpose" the data:
+
+```js
+/* assuming data is an array of typed arrays */
+var aoa = [];
+for(var i = 0; i < data.length; ++i) {
+ for(var j = 0; j < data[i].length; ++j) {
+ if(!aoa[j]) aoa[j] = [];
+ aoa[j][i] = data[i][j];
+ }
+}
+/* aoa can be directly converted to a worksheet object */
+var ws = XLSX.utils.aoa_to_sheet(aoa);
+```
+
+### Importing Data from a Spreadsheet
+
+`sheet_to_json` with the option `header:1` will generate a row-major array of
+arrays that can be transposed. However, it is more efficient to walk the sheet
+manually:
+
+```js
+/* find worksheet range */
+var range = XLSX.utils.decode_range(ws['!ref']);
+var out = []
+/* walk the columns */
+for(var C = range.s.c; C <= range.e.c; ++C) {
+ /* create the typed array */
+ var ta = new Float32Array(range.e.r - range.s.r + 1);
+ /* walk the rows */
+ for(var R = range.s.r; R <= range.e.r; ++R) {
+ /* find the cell, skip it if the cell isn't numeric or boolean */
+ var cell = ws[XLSX.utils.encode_cell({r:R, c:C})];
+ if(!cell || cell.t != 'n' && cell.t != 'b') continue;
+ /* assign to the typed array */
+ ta[R - range.s.r] = cell.v;
+ }
+ out.push(ta);
+}
+```
+
+If the data set has a header row, the loop can be adjusted to skip those rows.
+
+### TF.js Tensors
+
+A single `Array#map` can pull individual named fields from the result, which
+can be used to construct TensorFlow.js tensor objects:
+
+```js
+const aoo = XLSX.utils.sheet_to_json(worksheet);
+const lengths = aoo.map(row => row["sepal length"]);
+const tensor = tf.tensor1d(lengths);
+```
+
+`tf.Tensor` objects can be directly transposed using `transpose`:
+
+```js
+var aoo = XLSX.utils.sheet_to_json(worksheet);
+// "x" and "y" are the fields we want to pull from the data
+var data = aoo.map(row => ([row["x"], row["y"]]));
+
+// create a tensor representing two column datasets
+var tensor = tf.tensor2d(data).transpose();
+
+// individual columns can be accessed
+var col1 = tensor.slice([0,0], [1,tensor.shape[1]]).flatten();
+var col2 = tensor.slice([1,0], [1,tensor.shape[1]]).flatten();
+```
+
+For exporting, `stack` can be used to linearize the columns:
+
+```js
+/* pull data into a Float32Array */
+var result = tf.stack([col1, col2]).transpose();
+var shape = tensor.shape;
+var f32 = tensor.dataSync();
+
+/* construct an array of arrays of the data in spreadsheet order */
+var aoa = [];
+for(var j = 0; j < shape[0]; ++j) {
+ aoa[j] = [];
+ for(var i = 0; i < shape[1]; ++i) aoa[j][i] = f32[j * shape[1] + i];
+}
+
+/* add headers to the top */
+aoa.unshift(["x", "y"]);
+
+/* generate worksheet */
+var worksheet = XLSX.utils.aoa_to_sheet(aoa);
+```
+
diff --git a/docz/docs/04-getting-started/03-demos/index.md b/docz/docs/04-getting-started/03-demos/index.md
index a142e57..1a86f40 100644
--- a/docz/docs/04-getting-started/03-demos/index.md
+++ b/docz/docs/04-getting-started/03-demos/index.md
@@ -11,7 +11,7 @@ The demo projects include small runnable examples and short explainers.
- [`XMLHttpRequest and fetch`](https://github.com/SheetJS/SheetJS/tree/master/demos/xhr/)
- [`Clipboard Data`](./clipboard)
-- [`Typed Arrays and Math`](https://github.com/SheetJS/SheetJS/tree/master/demos/array/)
+- [`Typed Arrays for Machine Learning`](./ml)
### Frameworks
diff --git a/docz/docs/06-solutions/01-input.md b/docz/docs/06-solutions/01-input.md
index 459f6cc..ed5fb8a 100644
--- a/docz/docs/06-solutions/01-input.md
+++ b/docz/docs/06-solutions/01-input.md
@@ -732,13 +732,15 @@ the optional `opts` argument in more detail.
["Complete Example"](../example) contains a detailed example "Get Data
from a JSON Endpoint and Generate a Workbook"
-
[`x-spreadsheet`](https://github.com/myliang/x-spreadsheet) is an interactive
data grid for previewing and modifying structured data in the web browser. The
[demo](https://github.com/sheetjs/sheetjs/tree/master/demos/xspreadsheet)
includes a sample script with the `xtos` function for converting from
x-spreadsheet to a workbook. Live Demo:
+["Typed Arrays and ML"](../getting-started/demos/ml) covers strategies for
+creating worksheets from ML library exports (datasets stored in Typed Arrays).
+
Records from a database query (SQL or no-SQL) (click to show)
@@ -748,44 +750,6 @@ databases and query results.
-
- Numerical Computations with TensorFlow.js (click to show)
-
-`@tensorflow/tfjs` and other libraries expect data in simple arrays, well-suited
-for worksheets where each column is a data vector. That is the transpose of how
-most people use spreadsheets, where each row is a vector.
-
-When recovering data from `tfjs`, the returned data points are stored in a typed
-array. An array of arrays can be constructed with loops. `Array#unshift` can
-prepend a title row before the conversion:
-
-```js
-const XLSX = require("xlsx");
-const tf = require('@tensorflow/tfjs');
-
-/* suppose xs and ys are vectors (1D tensors) -> tfarr will be a typed array */
-const tfdata = tf.stack([xs, ys]).transpose();
-const shape = tfdata.shape;
-const tfarr = tfdata.dataSync();
-
-/* construct the array of arrays */
-const aoa = [];
-for(let j = 0; j < shape[0]; ++j) {
- aoa[j] = [];
- for(let i = 0; i < shape[1]; ++i) aoa[j][i] = tfarr[j * shape[1] + i];
-}
-/* add headers to the top */
-aoa.unshift(["x", "y"]);
-
-/* generate worksheet */
-const worksheet = XLSX.utils.aoa_to_sheet(aoa);
-```
-
-The [`array` demo](https://github.com/SheetJS/SheetJS/tree/master/demos/array/) shows a complete example.
-
-
-
-
## Processing HTML Tables
#### API
diff --git a/docz/docs/06-solutions/05-output.md b/docz/docs/06-solutions/05-output.md
index 523f2da..fc9043b 100644
--- a/docz/docs/06-solutions/05-output.md
+++ b/docz/docs/06-solutions/05-output.md
@@ -450,6 +450,9 @@ simple VueJS 3 data table. It is featured in the
### Example: Data Loading
+["Typed Arrays and ML"](../getting-started/demos/ml) covers strategies for
+generating typed arrays and tensors from worksheet data.
+
Populating a database (SQL or no-SQL) (click to show)
@@ -458,44 +461,7 @@ includes examples of working with databases and query results.
-
- Numerical Computations with TensorFlow.js (click to show)
-`@tensorflow/tfjs` and other libraries expect data in simple arrays, well-suited
-for worksheets where each column is a data vector. That is the transpose of how
-most people use spreadsheets, where each row is a vector.
-
-A single `Array#map` can pull individual named rows from `sheet_to_json` export:
-
-```js
-const XLSX = require("xlsx");
-const tf = require('@tensorflow/tfjs');
-
-const key = "age"; // this is the field we want to pull
-const ages = XLSX.utils.sheet_to_json(worksheet).map(r => r[key]);
-const tf_data = tf.tensor1d(ages);
-```
-
-All fields can be processed at once using a transpose of the 2D tensor generated
-with the `sheet_to_json` export with `header: 1`. The first row, if it contains
-header labels, should be removed with a slice:
-
-```js
-const XLSX = require("xlsx");
-const tf = require('@tensorflow/tfjs');
-
-/* array of arrays of the data starting on the second row */
-const aoa = XLSX.utils.sheet_to_json(worksheet, {header: 1}).slice(1);
-/* dataset in the "correct orientation" */
-const tf_dataset = tf.tensor2d(aoa).transpose();
-/* pull out each dataset with a slice */
-const tf_field0 = tf_dataset.slice([0,0], [1,tensor.shape[1]]).flatten();
-const tf_field1 = tf_dataset.slice([1,0], [1,tensor.shape[1]]).flatten();
-```
-
-The [`array` demo](https://github.com/SheetJS/SheetJS/tree/master/demos/array/) shows a complete example.
-
-
## Generating HTML Tables
diff --git a/docz/static/files/cd.png b/docz/static/files/cd.png
new file mode 100644
index 0000000..fab628c
Binary files /dev/null and b/docz/static/files/cd.png differ
diff --git a/docz/static/files/iris.png b/docz/static/files/iris.png
new file mode 100644
index 0000000..a2cc8a7
Binary files /dev/null and b/docz/static/files/iris.png differ