2022-02-05 13:59:25 +00:00
|
|
|
## Acquiring and Extracting Data
|
2017-03-20 09:02:25 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
### Parsing Workbooks
|
2017-03-20 09:02:25 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
#### API
|
2017-03-20 09:02:25 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
_Extract data from spreadsheet bytes_
|
2017-09-30 06:18:11 +00:00
|
|
|
|
2017-03-20 09:02:25 +00:00
|
|
|
```js
|
2022-02-05 13:59:25 +00:00
|
|
|
var workbook = XLSX.read(data, opts);
|
2017-03-29 19:14:15 +00:00
|
|
|
```
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
The `read` method can extract data from spreadsheet bytes stored in a JS string,
|
|
|
|
"binary string", NodeJS buffer or typed array (`Uint8Array` or `ArrayBuffer`).
|
2017-04-30 20:37:53 +00:00
|
|
|
|
2018-02-08 18:21:39 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
_Read spreadsheet bytes from a local file and extract data_
|
2018-02-08 18:21:39 +00:00
|
|
|
|
|
|
|
```js
|
2022-02-05 13:59:25 +00:00
|
|
|
var workbook = XLSX.readFile(filename, opts);
|
2018-02-08 18:21:39 +00:00
|
|
|
```
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
The `readFile` method attempts to read a spreadsheet file at the supplied path.
|
|
|
|
Browsers generally do not allow reading files in this way (it is deemed a
|
|
|
|
security risk), and attempts to read files in this way will throw an error.
|
2018-02-08 18:21:39 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
The second `opts` argument is optional. ["Parsing Options"](#parsing-options)
|
|
|
|
covers the supported properties and behaviors.
|
2018-02-08 18:21:39 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
#### Examples
|
2017-03-29 19:14:15 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
Here are a few common scenarios (click on each subtitle to see the code):
|
2017-09-30 06:18:11 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Local file in a NodeJS server</b> (click to show)</summary>
|
2017-03-20 09:02:25 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
`readFile` uses `fs.readFileSync` under the hood:
|
2018-03-19 21:42:55 +00:00
|
|
|
|
|
|
|
```js
|
2022-02-05 13:59:25 +00:00
|
|
|
var XLSX = require("xlsx");
|
2017-09-30 06:18:11 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
var workbook = XLSX.readFile("test.xlsx");
|
2017-09-30 06:18:11 +00:00
|
|
|
```
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
For Node ESM, the `readFile` helper is not enabled. Instead, `fs.readFileSync`
|
|
|
|
should be used to read the file data as a `Buffer` for use with `XLSX.read`:
|
2017-03-20 09:02:25 +00:00
|
|
|
|
|
|
|
```js
|
2022-02-05 13:59:25 +00:00
|
|
|
import { readFileSync } from "fs";
|
|
|
|
import { read } from "xlsx/xlsx.mjs";
|
2017-03-20 09:02:25 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
const buf = readFileSync("test.xlsx");
|
|
|
|
/* buf is a Buffer */
|
|
|
|
const workbook = read(buf);
|
2017-03-20 09:02:25 +00:00
|
|
|
```
|
|
|
|
|
2017-04-30 20:37:53 +00:00
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
2022-02-05 13:59:25 +00:00
|
|
|
<summary><b>User-submitted file in a web page ("Drag-and-Drop")</b> (click to show)</summary>
|
2017-04-30 20:37:53 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
For modern websites targeting Chrome 76+, `File#arrayBuffer` is recommended:
|
2021-10-03 01:41:36 +00:00
|
|
|
|
|
|
|
```js
|
2022-02-05 13:59:25 +00:00
|
|
|
// XLSX is a global from the standalone script
|
|
|
|
|
2021-10-03 01:41:36 +00:00
|
|
|
async function handleDropAsync(e) {
|
|
|
|
e.stopPropagation(); e.preventDefault();
|
2022-02-05 13:59:25 +00:00
|
|
|
const f = e.dataTransfer.files[0];
|
|
|
|
/* f is a File */
|
2021-10-03 01:41:36 +00:00
|
|
|
const data = await f.arrayBuffer();
|
2022-02-05 13:59:25 +00:00
|
|
|
/* data is an ArrayBuffer */
|
2021-10-03 01:41:36 +00:00
|
|
|
const workbook = XLSX.read(data);
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
}
|
2022-02-05 13:59:25 +00:00
|
|
|
drop_dom_element.addEventListener("drop", handleDropAsync, false);
|
2021-10-03 01:41:36 +00:00
|
|
|
```
|
|
|
|
|
|
|
|
For maximal compatibility, the `FileReader` API should be used:
|
2017-03-20 09:02:25 +00:00
|
|
|
|
|
|
|
```js
|
|
|
|
function handleDrop(e) {
|
2017-09-24 23:40:09 +00:00
|
|
|
e.stopPropagation(); e.preventDefault();
|
2021-10-03 01:41:36 +00:00
|
|
|
var f = e.dataTransfer.files[0];
|
2022-02-05 13:59:25 +00:00
|
|
|
/* f is a File */
|
2017-09-24 23:40:09 +00:00
|
|
|
var reader = new FileReader();
|
|
|
|
reader.onload = function(e) {
|
2022-02-05 13:59:25 +00:00
|
|
|
var data = e.target.result;
|
|
|
|
/* reader.readAsArrayBuffer(file) -> data will be an ArrayBuffer */
|
|
|
|
var workbook = XLSX.read(data);
|
2017-09-24 23:40:09 +00:00
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
};
|
2018-11-19 20:44:54 +00:00
|
|
|
reader.readAsArrayBuffer(f);
|
2017-03-20 09:02:25 +00:00
|
|
|
}
|
2022-02-05 13:59:25 +00:00
|
|
|
drop_dom_element.addEventListener("drop", handleDrop, false);
|
2017-03-20 09:02:25 +00:00
|
|
|
```
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
<https://oss.sheetjs.com/sheetjs/> demonstrates the FileReader technique.
|
|
|
|
|
2017-04-30 20:37:53 +00:00
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
2022-02-05 13:59:25 +00:00
|
|
|
<summary><b>User-submitted file with an HTML INPUT element</b> (click to show)</summary>
|
2017-09-24 23:40:09 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
Starting with an HTML INPUT element with `type="file"`:
|
2021-10-03 01:41:36 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
```html
|
|
|
|
<input type="file" id="input_dom_element">
|
|
|
|
```
|
|
|
|
|
|
|
|
For modern websites targeting Chrome 76+, `Blob#arrayBuffer` is recommended:
|
2021-10-03 01:41:36 +00:00
|
|
|
|
|
|
|
```js
|
2022-02-05 13:59:25 +00:00
|
|
|
// XLSX is a global from the standalone script
|
|
|
|
|
2021-10-03 01:41:36 +00:00
|
|
|
async function handleFileAsync(e) {
|
2021-12-29 10:36:44 +00:00
|
|
|
const file = e.target.files[0];
|
2021-10-03 01:41:36 +00:00
|
|
|
const data = await file.arrayBuffer();
|
2022-02-05 13:59:25 +00:00
|
|
|
/* data is an ArrayBuffer */
|
2021-10-03 01:41:36 +00:00
|
|
|
const workbook = XLSX.read(data);
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
}
|
2022-02-05 13:59:25 +00:00
|
|
|
input_dom_element.addEventListener("change", handleFileAsync, false);
|
2021-10-03 01:41:36 +00:00
|
|
|
```
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
For broader support (including IE10+), the `FileReader` approach is recommended:
|
2017-03-20 09:02:25 +00:00
|
|
|
|
|
|
|
```js
|
|
|
|
function handleFile(e) {
|
2022-02-05 13:59:25 +00:00
|
|
|
var file = e.target.files[0];
|
2017-09-24 23:40:09 +00:00
|
|
|
var reader = new FileReader();
|
|
|
|
reader.onload = function(e) {
|
2022-02-05 13:59:25 +00:00
|
|
|
var data = e.target.result;
|
|
|
|
/* reader.readAsArrayBuffer(file) -> data will be an ArrayBuffer */
|
2021-10-03 01:41:36 +00:00
|
|
|
var workbook = XLSX.read(e.target.result);
|
2017-09-24 23:40:09 +00:00
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
};
|
2022-02-05 13:59:25 +00:00
|
|
|
reader.readAsArrayBuffer(file);
|
2017-03-20 09:02:25 +00:00
|
|
|
}
|
2022-02-05 13:59:25 +00:00
|
|
|
input_dom_element.addEventListener("change", handleFile, false);
|
2017-03-20 09:02:25 +00:00
|
|
|
```
|
|
|
|
|
2018-02-14 20:06:35 +00:00
|
|
|
The [`oldie` demo](demos/oldie/) shows an IE-compatible fallback scenario.
|
|
|
|
|
2017-04-30 20:37:53 +00:00
|
|
|
</details>
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Fetching a file in the web browser ("Ajax")</b> (click to show)</summary>
|
|
|
|
|
|
|
|
For modern websites targeting Chrome 42+, `fetch` is recommended:
|
|
|
|
|
|
|
|
```js
|
|
|
|
// XLSX is a global from the standalone script
|
|
|
|
|
|
|
|
(async() => {
|
|
|
|
const url = "http://oss.sheetjs.com/test_files/formula_stress_test.xlsx";
|
|
|
|
const data = await (await fetch(url)).arrayBuffer();
|
|
|
|
/* data is an ArrayBuffer */
|
|
|
|
const workbook = XLSX.read(data);
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
})();
|
|
|
|
```
|
|
|
|
|
|
|
|
For broader support, the `XMLHttpRequest` approach is recommended:
|
|
|
|
|
|
|
|
```js
|
|
|
|
var url = "http://oss.sheetjs.com/test_files/formula_stress_test.xlsx";
|
|
|
|
|
|
|
|
/* set up async GET request */
|
|
|
|
var req = new XMLHttpRequest();
|
|
|
|
req.open("GET", url, true);
|
|
|
|
req.responseType = "arraybuffer";
|
|
|
|
|
|
|
|
req.onload = function(e) {
|
|
|
|
var workbook = XLSX.read(req.response);
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
};
|
|
|
|
|
|
|
|
req.send();
|
|
|
|
```
|
|
|
|
|
|
|
|
The [`xhr` demo](demos/xhr/) includes a longer discussion and more examples.
|
|
|
|
|
|
|
|
<http://oss.sheetjs.com/sheetjs/ajax.html> shows fallback approaches for IE6+.
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
|
|
|
<summary><b>Local file in a PhotoShop or InDesign plugin</b> (click to show)</summary>
|
|
|
|
|
|
|
|
`readFile` wraps the `File` logic in Photoshop and other ExtendScript targets.
|
|
|
|
The specified path should be an absolute path:
|
|
|
|
|
|
|
|
```js
|
|
|
|
#include "xlsx.extendscript.js"
|
|
|
|
|
|
|
|
/* Read test.xlsx from the Documents folder */
|
|
|
|
var workbook = XLSX.readFile(Folder.myDocuments + "/test.xlsx");
|
|
|
|
```
|
|
|
|
|
|
|
|
The [`extendscript` demo](demos/extendscript/) includes a more complex example.
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
|
|
|
<summary><b>Local file in an Electron app</b> (click to show)</summary>
|
|
|
|
|
|
|
|
`readFile` can be used in the renderer process:
|
|
|
|
|
|
|
|
```js
|
|
|
|
/* From the renderer process */
|
|
|
|
var XLSX = require("xlsx");
|
|
|
|
|
|
|
|
var workbook = XLSX.readFile(path);
|
|
|
|
```
|
|
|
|
|
|
|
|
Electron APIs have changed over time. The [`electron` demo](demos/electron/)
|
|
|
|
shows a complete example and details the required version-specific settings.
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
|
|
|
<summary><b>Local file in a mobile app with React Native</b> (click to show)</summary>
|
|
|
|
|
|
|
|
The [`react` demo](demos/react) includes a sample React Native app.
|
|
|
|
|
|
|
|
Since React Native does not provide a way to read files from the filesystem, a
|
|
|
|
third-party library must be used. The following libraries have been tested:
|
|
|
|
|
|
|
|
- [`react-native-file-access`](https://npm.im/react-native-file-access)
|
|
|
|
|
|
|
|
The `base64` encoding returns strings compatible with the `base64` type:
|
|
|
|
|
|
|
|
```js
|
|
|
|
import XLSX from "xlsx";
|
|
|
|
import { FileSystem } from "react-native-file-access";
|
|
|
|
|
|
|
|
const b64 = await FileSystem.readFile(path, "base64");
|
|
|
|
/* b64 is a base64 string */
|
|
|
|
const workbook = XLSX.read(b64, {type: "base64"});
|
|
|
|
```
|
|
|
|
|
|
|
|
- [`react-native-fs`](https://npm.im/react-native-fs)
|
|
|
|
|
|
|
|
The `ascii` encoding returns binary strings compatible with the `binary` type:
|
|
|
|
|
|
|
|
```js
|
|
|
|
import XLSX from "xlsx";
|
|
|
|
import { readFile } from "react-native-fs";
|
|
|
|
|
|
|
|
const bstr = await readFile(path, "ascii");
|
|
|
|
/* bstr is a binary string */
|
|
|
|
const workbook = XLSX.read(bstr, {type: "binary"});
|
|
|
|
```
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
|
|
|
<summary><b>NodeJS Server File Uploads</b> (click to show)</summary>
|
|
|
|
|
|
|
|
`read` can accept a NodeJS buffer. `readFile` can read files generated by a
|
|
|
|
HTTP POST request body parser like [`formidable`](https://npm.im/formidable):
|
|
|
|
|
|
|
|
```js
|
|
|
|
const XLSX = require("xlsx");
|
|
|
|
const http = require("http");
|
|
|
|
const formidable = require("formidable");
|
|
|
|
|
|
|
|
const server = http.createServer((req, res) => {
|
|
|
|
const form = new formidable.IncomingForm();
|
|
|
|
form.parse(req, (err, fields, files) => {
|
|
|
|
/* grab the first file */
|
|
|
|
const f = Object.entries(files)[0][1];
|
|
|
|
const path = f.filepath;
|
|
|
|
const workbook = XLSX.readFile(path);
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
});
|
|
|
|
}).listen(process.env.PORT || 7262);
|
|
|
|
```
|
|
|
|
|
|
|
|
The [`server` demo](demos/server) has more advanced examples.
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
|
|
|
<summary><b>Download files in a NodeJS process</b> (click to show)</summary>
|
|
|
|
|
|
|
|
Node 17.5 and 18.0 have native support for fetch:
|
|
|
|
|
|
|
|
```js
|
|
|
|
const XLSX = require("xlsx");
|
2017-04-30 20:37:53 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
const data = await (await fetch(url)).arrayBuffer();
|
|
|
|
/* data is an ArrayBuffer */
|
|
|
|
const workbook = XLSX.read(data);
|
|
|
|
```
|
|
|
|
|
|
|
|
For broader compatibility, third-party modules are recommended.
|
|
|
|
|
|
|
|
[`request`](https://npm.im/request) requires a `null` encoding to yield Buffers:
|
|
|
|
|
|
|
|
```js
|
|
|
|
var XLSX = require("xlsx");
|
|
|
|
var request = require("request");
|
2017-04-16 04:32:13 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
request({url: url, encoding: null}, function(err, resp, body) {
|
|
|
|
var workbook = XLSX.read(body);
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
});
|
|
|
|
```
|
|
|
|
|
|
|
|
[`axios`](https://npm.im/axios) works the same way in browser and in NodeJS:
|
|
|
|
|
|
|
|
```js
|
|
|
|
const XLSX = require("xlsx");
|
|
|
|
const axios = require("axios");
|
|
|
|
|
|
|
|
(async() => {
|
|
|
|
const res = await axios.get(url, {responseType: "arraybuffer"});
|
|
|
|
/* res.data is a Buffer */
|
|
|
|
const workbook = XLSX.read(res.data);
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
})();
|
|
|
|
```
|
2017-04-16 04:32:13 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
</details>
|
2017-06-03 07:19:09 +00:00
|
|
|
|
|
|
|
<details>
|
2022-02-05 13:59:25 +00:00
|
|
|
<summary><b>Download files in an Electron app</b> (click to show)</summary>
|
|
|
|
|
|
|
|
The `net` module in the main process can make HTTP/HTTPS requests to external
|
|
|
|
resources. Responses should be manually concatenated using `Buffer.concat`:
|
|
|
|
|
|
|
|
```js
|
|
|
|
const XLSX = require("xlsx");
|
|
|
|
const { net } = require("electron");
|
2017-06-03 07:19:09 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
const req = net.request(url);
|
|
|
|
req.on("response", (res) => {
|
|
|
|
const bufs = []; // this array will collect all of the buffers
|
|
|
|
res.on("data", (chunk) => { bufs.push(chunk); });
|
|
|
|
res.on("end", () => {
|
|
|
|
const workbook = XLSX.read(Buffer.concat(bufs));
|
2017-04-16 04:32:13 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
/* DO SOMETHING WITH workbook HERE */
|
|
|
|
});
|
|
|
|
});
|
|
|
|
req.end();
|
2017-04-16 04:32:13 +00:00
|
|
|
```
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
</details>
|
2017-04-16 04:32:13 +00:00
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Readable Streams in NodeJS</b> (click to show)</summary>
|
|
|
|
|
|
|
|
When dealing with Readable Streams, the easiest approach is to buffer the stream
|
|
|
|
and process the whole thing at the end:
|
|
|
|
|
|
|
|
```js
|
|
|
|
var fs = require("fs");
|
|
|
|
var XLSX = require("xlsx");
|
|
|
|
|
|
|
|
function process_RS(stream, cb) {
|
|
|
|
var buffers = [];
|
|
|
|
stream.on("data", function(data) { buffers.push(data); });
|
|
|
|
stream.on("end", function() {
|
|
|
|
var buffer = Buffer.concat(buffers);
|
|
|
|
var workbook = XLSX.read(buffer, {type:"buffer"});
|
|
|
|
|
|
|
|
/* DO SOMETHING WITH workbook IN THE CALLBACK */
|
|
|
|
cb(workbook);
|
|
|
|
});
|
|
|
|
}
|
2017-04-16 04:32:13 +00:00
|
|
|
```
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
</details>
|
|
|
|
|
|
|
|
<details>
|
|
|
|
<summary><b>ReadableStream in the browser</b> (click to show)</summary>
|
|
|
|
|
|
|
|
When dealing with `ReadableStream`, the easiest approach is to buffer the stream
|
|
|
|
and process the whole thing at the end:
|
|
|
|
|
|
|
|
```js
|
|
|
|
// XLSX is a global from the standalone script
|
|
|
|
|
|
|
|
async function process_RS(stream) {
|
|
|
|
/* collect data */
|
|
|
|
const buffers = [];
|
|
|
|
const reader = stream.getReader();
|
|
|
|
for(;;) {
|
|
|
|
const res = await reader.read();
|
|
|
|
if(res.value) buffers.push(res.value);
|
|
|
|
if(res.done) break;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* concat */
|
|
|
|
const out = new Uint8Array(buffers.reduce((acc, v) => acc + v.length, 0));
|
|
|
|
|
|
|
|
let off = 0;
|
|
|
|
for(const u8 of arr) {
|
|
|
|
out.set(u8, off);
|
|
|
|
off += u8.length;
|
|
|
|
}
|
|
|
|
|
|
|
|
return out;
|
|
|
|
}
|
|
|
|
|
|
|
|
const data = await process_RS(stream);
|
|
|
|
/* data is Uint8Array */
|
|
|
|
const workbook = XLSX.read(data);
|
|
|
|
```
|
2017-04-16 04:32:13 +00:00
|
|
|
|
2017-06-03 07:19:09 +00:00
|
|
|
</details>
|
|
|
|
|
2022-02-05 13:59:25 +00:00
|
|
|
More detailed examples are covered in the [included demos](demos/)
|
|
|
|
|
2017-04-16 04:32:13 +00:00
|
|
|
|