);
}
```
In the other direction, `XLSX.read` will readily parse CSV exports.
## JS Array Interchange
[The official Linear Regression tutorial](https://www.tensorflow.org/js/tutorials/training/linear_regression)
loads data from a JSON file:
```json
[
{
"Name": "chevrolet chevelle malibu",
"Miles_per_Gallon": 18,
"Cylinders": 8,
"Displacement": 307,
"Horsepower": 130,
"Weight_in_lbs": 3504,
"Acceleration": 12,
"Year": "1970-01-01",
"Origin": "USA"
},
{
"Name": "buick skylark 320",
"Miles_per_Gallon": 15,
"Cylinders": 8,
"Displacement": 350,
"Horsepower": 165,
"Weight_in_lbs": 3693,
"Acceleration": 11.5,
"Year": "1970-01-01",
"Origin": "USA"
},
// ...
]
```
In real use cases, data is stored in [spreadsheets](https://sheetjs.com/cd.xls)
![cd.xls screenshot](pathname:///files/cd.png)
Following the tutorial, the data fetching method is easily adapted. Differences
from the official example are highlighted below:
```js
/**
* Get the car data reduced to just the variables we are interested
* and cleaned of missing data.
*/
async function getData() {
// highlight-start
/* fetch file */
const carsDataResponse = await fetch('https://sheetjs.com/cd.xls');
/* get file data (ArrayBuffer) */
const carsDataAB = await carsDataResponse.arrayBuffer();
/* parse */
const carsDataWB = XLSX.read(carsDataAB);
/* get first worksheet */
const carsDataWS = carsDataWB.Sheets[carsDataWB.SheetNames[0]];
/* generate array of JS objects */
const carsData = XLSX.utils.sheet_to_json(carsDataWS);
// highlight-end
const cleaned = carsData.map(car => ({
mpg: car.Miles_per_Gallon,
horsepower: car.Horsepower,
}))
.filter(car => (car.mpg != null && car.horsepower != null));
return cleaned;
}
```
## Low-Level Operations
:::caution
While it is more efficient to use low-level operations, JS or CSV interchange
is strongly recommended when possible.
:::
### Data Transposition
A typical dataset in a spreadsheet will start with one header row and represent
each data record in its own row. For example, the Iris dataset might look like
![Iris dataset](pathname:///files/iris.png)
`XLSX.utils.sheet_to_json` will translate this into an array of row objects:
```js
var aoo = [
{"sepal length": 5.1, "sepal width": 3.5, ...},
{"sepal length": 4.9, "sepal width": 3, ...},
...
];
```
TF.js and other libraries tend to operate on individual columns, equivalent to:
```js
var sepal_lengths = [5.1, 4.9, ...];
var sepal_widths = [3.5, 3, ...];
```
When a 2D tensor can be exported, it will look different from the spreadsheet:
```js
var data_set_2d = [
[5.1, 4.9, ...],
[3.5, 3, ...],
...
]
```
This is the transpose of how people use spreadsheets!
#### Typed Arrays and Columns
A single typed array can be converted to a pure JS array with `Array.from`:
```js
var column = Array.from(dataset_typedarray);
```
Similarly, `Float32Array.from` generates a typed array from a normal array:
```js
var dataset = Float32Array.from(column);
```
### Exporting Datasets to a Worksheet
`XLSX.utils.aoa_to_sheet` can generate a worksheet from an array of arrays.
ML libraries typically provide APIs to pull an array of arrays, but it will
be transponsed
a row-major array of arrays. To export multiple data
sets, "transpose" the data:
```js
/* assuming data is an array of typed arrays */
var aoa = [];
for(var i = 0; i < data.length; ++i) {
for(var j = 0; j < data[i].length; ++j) {
if(!aoa[j]) aoa[j] = [];
aoa[j][i] = data[i][j];
}
}
/* aoa can be directly converted to a worksheet object */
var ws = XLSX.utils.aoa_to_sheet(aoa);
```
### Importing Data from a Spreadsheet
`sheet_to_json` with the option `header:1` will generate a row-major array of
arrays that can be transposed. However, it is more efficient to walk the sheet
manually:
```js
/* find worksheet range */
var range = XLSX.utils.decode_range(ws['!ref']);
var out = []
/* walk the columns */
for(var C = range.s.c; C <= range.e.c; ++C) {
/* create the typed array */
var ta = new Float32Array(range.e.r - range.s.r + 1);
/* walk the rows */
for(var R = range.s.r; R <= range.e.r; ++R) {
/* find the cell, skip it if the cell isn't numeric or boolean */
var cell = ws[XLSX.utils.encode_cell({r:R, c:C})];
if(!cell || cell.t != 'n' && cell.t != 'b') continue;
/* assign to the typed array */
ta[R - range.s.r] = cell.v;
}
out.push(ta);
}
```
If the data set has a header row, the loop can be adjusted to skip those rows.
### TF.js Tensors
A single `Array#map` can pull individual named fields from the result, which
can be used to construct TensorFlow.js tensor objects:
```js
const aoo = XLSX.utils.sheet_to_json(worksheet);
const lengths = aoo.map(row => row["sepal length"]);
const tensor = tf.tensor1d(lengths);
```
`tf.Tensor` objects can be directly transposed using `transpose`:
```js
var aoo = XLSX.utils.sheet_to_json(worksheet);
// "x" and "y" are the fields we want to pull from the data
var data = aoo.map(row => ([row["x"], row["y"]]));
// create a tensor representing two column datasets
var tensor = tf.tensor2d(data).transpose();
// individual columns can be accessed
var col1 = tensor.slice([0,0], [1,tensor.shape[1]]).flatten();
var col2 = tensor.slice([1,0], [1,tensor.shape[1]]).flatten();
```
For exporting, `stack` can be used to linearize the columns:
```js
/* pull data into a Float32Array */
var result = tf.stack([col1, col2]).transpose();
var shape = tensor.shape;
var f32 = tensor.dataSync();
/* construct an array of arrays of the data in spreadsheet order */
var aoa = [];
for(var j = 0; j < shape[0]; ++j) {
aoa[j] = [];
for(var i = 0; i < shape[1]; ++i) aoa[j][i] = f32[j * shape[1] + i];
}
/* add headers to the top */
aoa.unshift(["x", "y"]);
/* generate worksheet */
var worksheet = XLSX.utils.aoa_to_sheet(aoa);
```