diff --git a/.gitignore b/.gitignore
index 60d61c8..5f4b662 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,3 +1,4 @@
+.*.sw*
*.bak
package-lock.json
pnpm-lock.yaml
diff --git a/.spelling b/.spelling
index 2a16e2d..0513a27 100644
--- a/.spelling
+++ b/.spelling
@@ -76,6 +76,7 @@ WK3
WK4
WKS
WK_
+WPS
WQ
WQ1
WQ2
@@ -97,6 +98,7 @@ macrosheets
tooltip
tooltips
标文通
+电子表格
# Other terms
1.x
@@ -117,6 +119,7 @@ BOM
Base64
Base64-encoded
Big5
+BitBucket
Booleans
Browserify
Bundlers
@@ -151,6 +154,8 @@ ExpressJS
ExtendScript
Fastify
FileReader
+FileReaderSync
+FileSaver
GBK
GatsbyJS
GitLab
diff --git a/docz/docs/03-demos/07-worker.md b/docz/docs/03-demos/07-worker.md
index d7c7439..425c0ff 100644
--- a/docz/docs/03-demos/07-worker.md
+++ b/docz/docs/03-demos/07-worker.md
@@ -30,7 +30,7 @@ For production use, it is highly encouraged to download and host the script.
:::note Browser Compatibility
ESM is supported in Web Workers in the Chromium family of browsers (including
-Chrome and Edge) as well as in Webkit-based browsers (including Safari).
+Chrome and Edge) as well as in browsers powered by WebKit (including Safari).
For support in legacy browsers like Firefox, `importScripts` should be used.
@@ -102,7 +102,7 @@ self.addEventListener('message', async(e) => {
const ab = await res.arrayBuffer();
/* Parse file */
- const wb = XLSX.read(ab);
+ const wb = XLSX.read(ab, {dense: true});
const ws = wb.Sheets[wb.SheetNames[0]];
/* Generate HTML */
@@ -162,7 +162,7 @@ self.addEventListener('message', async(e) => {
SheetJS,in,Web,Workers
வணக்கம்,สวัสดี,你好,가지마
1,2,3,4\`;
- const wb = XLSX.read(csv, { type: "string" });
+ const wb = XLSX.read(csv, { type: "string", dense: true });
/* Write XLSB data (Uint8Array) */
const u8 = XLSX.write(wb, { bookType: "xlsb", type: "buffer" });
@@ -234,7 +234,7 @@ self.addEventListener('message', (e) => {
const ab = new FileReaderSync().readAsArrayBuffer(e.data.file);
/* Parse file */
- const wb = XLSX.read(ab);
+ const wb = XLSX.read(ab, {dense: true});
const ws = wb.Sheets[wb.SheetNames[0]];
/* Generate HTML */
@@ -264,4 +264,125 @@ self.addEventListener('message', (e) => {
> );
}
-```
\ No newline at end of file
+```
+
+## Streaming Write
+
+A more general discussion, including row-oriented processing demos, is included
+in the ["Large Datasets"](/docs/demos/stream#browser) demo.
+
+#### File System Access API
+
+:::note
+
+At the time of writing, the File System Access API is only available in Chromium
+and Chromium-based browsers like Chrome and Edge.
+
+:::
+
+The following live demo fetches and parses a file in a Web Worker. The script:
+
+- prompts user to save file (`window.showSaveFilePicker` in the main thread)
+- passes the URL and the file object to the Web Worker
+- loads the SheetJS library in the Web Worker
+- fetches the requested URL and parses the workbook from the Worker
+- creates a Writable Stream from the file object.
+- uses `XLSX.stream.to_csv` to generate CSV rows of the first worksheet
+ + on each row, the data is written to the file stream
+ + every 10th row, a progress message is sent back to the main thread
+ + at the end, a completion message is sent back to the main thread
+
+The demo has a URL input box. Feel free to change the URL. For example,
+
+`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
+is an XLS file over 50 MB
+
+`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
+is an XLSX file with 300000 rows (approximately 20 MB)
+
+```jsx live
+function SheetJSFetchCSVStreamFile() {
+ const [state, setState] = React.useState("");
+ const [cnt, setCnt] = React.useState(0);
+ const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");
+
+ return ( <>
+ URL: setUrl(e.target.value)} size="80"/>
+
+
State: {state} Number of rows: {cnt}
+ > );
+}
+```
diff --git a/docz/docs/03-demos/08-stream.md b/docz/docs/03-demos/08-stream.md
new file mode 100644
index 0000000..f8ee8b2
--- /dev/null
+++ b/docz/docs/03-demos/08-stream.md
@@ -0,0 +1,354 @@
+---
+title: Large Datasets
+---
+
+For maximal compatibility, the library reads entire files at once and generates
+files at once. Browsers and other JS engines enforce tight memory limits. In
+these cases, the library offers strategies to optimize for memory or space by
+using platform-specific APIs.
+
+## Dense Mode
+
+The `dense` option (supported in `read`, `readFile` and `aoa_to_sheet`) creates
+worksheet objects that use arrays of arrays under the hood:
+
+```js
+var dense_wb = XLSX.read(ab, {dense: true});
+
+var dense_sheet = XLSX.utils.aoa_to_sheet(aoa);
+```
+
+Historical Note (click to show)
+
+The earliest versions of the library aimed for IE6+ compatibility. In early
+testing, both in Chrome 26 and in IE6, the most efficient worksheet storage for
+small sheets was a large object whose keys were cell addresses.
+
+Over time, V8 (the engine behind Chrome and NodeJS) evolved in a way that made
+the array of arrays approach more efficient but reduced the performance of the
+large object approach.
+
+In the interest of preserving backwards compatibility, the library opts to make
+the array of arrays approach available behind a special `dense` option.
+
+
+
+The various API functions will seamlessly handle dense and sparse worksheets.
+
+## Streaming Write
+
+The streaming write functions are available in the `XLSX.stream` object. They
+take the same arguments as the normal write functions:
+
+- `XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`.
+- `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`.
+- `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`.
+
+"Stream" refers to the NodeJS push streams API.
+
+Historical Note (click to show)
+
+NodeJS push streams were introduced in 2012.
+
+The first streaming write function, `to_csv`, was introduced in April 2017. It
+used and still uses the same NodeJS streaming API.
+
+Years later, browser vendors are settling on a different stream API.
+
+For maximal compatibility, the library uses NodeJS push streams.
+
+
+
+### NodeJS
+
+:::note
+
+In a CommonJS context, NodeJS Streams and `fs` immediately work with SheetJS:
+
+```js
+const XLSX = require("xlsx"); // "just works"
+```
+
+In NodeJS ESM, the dependency must be loaded manually:
+
+```js
+import * as XLSX from 'xlsx';
+import { Readable } from 'stream';
+
+XLSX.stream.set_readable(Readable); // manually load stream helpers
+```
+
+Additionally, for file-related operations in NodeJS ESM, `fs` must be loaded:
+
+```js
+import * as XLSX from 'xlsx';
+import * as fs from 'fs';
+
+XLSX.set_fs(fs); // manually load fs helpers
+```
+
+**It is strongly encouraged to use CommonJS in NodeJS whenever possible.**
+
+:::
+
+This example reads a worksheet passed as an argument to the script, pulls the
+first worksheet, converts to CSV and writes to `out.csv`:
+
+```js
+var XLSX = require("xlsx");
+var workbook = XLSX.readFile(process.argv[2]);
+var worksheet = workbook.Sheets[workbook.SheetNames[0]];
+// highlight-next-line
+var stream = XLSX.stream.to_csv(worksheet);
+
+var output_file_name = "out.csv";
+// highlight-next-line
+stream.pipe(fs.createWriteStream(output_file_name));
+```
+
+`stream.to_json` uses Object-mode streams. A `Transform` stream can be used to
+generate a normal stream for streaming to a file or the screen:
+
+```js
+var XLSX = require("xlsx");
+var workbook = XLSX.readFile(process.argv[2], {dense: true});
+var worksheet = workbook.Sheets[workbook.SheetNames[0]];
+/* to_json returns an object-mode stream */
+// highlight-next-line
+var stream = XLSX.stream.to_json(worksheet, {raw:true});
+
+/* this Transform stream converts JS objects to text and prints to screen */
+var conv = new Transform({writableObjectMode:true});
+conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); };
+conv.pipe(process.stdout);
+
+// highlight-next-line
+stream.pipe(conv);
+```
+
+### Browser
+
+Live Demo (click to show)
+
+The following live demo fetches and parses a file in a Web Worker. The `to_csv`
+streaming function is used to generate CSV rows and pass back to the main thread
+for further processing.
+
+:::note
+
+For Chromium browsers, the File System Access API provides a modern worker-only
+approach. [The Web Workers demo](/docs/demos/worker#streaming-write) includes a
+live example of CSV streaming write.
+
+:::
+
+The demo has a URL input box. Feel free to change the URL. For example,
+
+`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
+is an XLS file over 50 MB
+
+`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
+is an XLSX file with 300000 rows (approximately 20 MB)
+
+```jsx live
+function SheetJSFetchCSVStreamWorker() {
+ const [__html, setHTML] = React.useState("");
+ const [state, setState] = React.useState("");
+ const [cnt, setCnt] = React.useState(0);
+ const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");
+
+ return ( <>
+ URL: setUrl(e.target.value)} size="80"/>
+
+
State: {state} Number of rows: {cnt}
+
+ > );
+}
+```
+
+
+
+NodeJS streaming APIs are not available in the browser. The following function
+supplies a pseudo stream object compatible with the `to_csv` function:
+
+```js
+function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
+ XLSX.stream.set_readable(() => ({
+ __done: false,
+ // this function will be assigned by the SheetJS stream methods
+ _read: function() { this.__done = true; },
+ // this function is called by the stream methods
+ push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
+ resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
+ }));
+ return XLSX.stream.to_csv(ws, opts);
+}
+
+// assuming `workbook` is a workbook, stream the first sheet
+const ws = workbook.Sheets[workbook.SheetNames[0]];
+const strm = sheet_to_csv_cb(ws, (csv)=>{ if(csv != null) console.log(csv); });
+strm.resume();
+```
+
+#### Web Workers
+
+For processing large files in the browser, it is strongly encouraged to use Web
+Workers. The [Worker demo](/docs/demos/worker#streaming-write) includes examples
+using the File System Access API.
+
+Typically, the file and stream processing occurs in the Web Worker. CSV rows
+can be sent back to the main thread in the callback:
+
+```js title="worker.js"
+/* load standalone script from CDN */
+importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
+
+function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
+ XLSX.stream.set_readable(() => ({
+ __done: false,
+ // this function will be assigned by the SheetJS stream methods
+ _read: function() { this.__done = true; },
+ // this function is called by the stream methods
+ push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
+ resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
+ }));
+ return XLSX.stream.to_csv(ws, opts);
+}
+
+/* this callback will run once the main context sends a message */
+self.addEventListener('message', async(e) => {
+ try {
+ postMessage({state: "fetching " + e.data.url});
+ /* Fetch file */
+ const res = await fetch(e.data.url);
+ const ab = await res.arrayBuffer();
+
+ /* Parse file */
+ postMessage({state: "parsing"});
+ const wb = XLSX.read(ab, {dense: true});
+ const ws = wb.Sheets[wb.SheetNames[0]];
+
+ /* Generate CSV rows */
+ postMessage({state: "csv"});
+ const strm = sheet_to_csv_cb(ws, (csv) => {
+ if(csv != null) postMessage({csv});
+ else postMessage({state: "done"});
+ });
+ strm.resume();
+ } catch(e) {
+ /* Pass the error message back */
+ postMessage({error: String(e.message || e) });
+ }
+}, false);
+```
+
+The main thread will receive messages with CSV rows for further processing:
+
+```js
+worker.onmessage = function(e) {
+ if(e.data.error) { console.error(e.data.error); /* show an error message */ }
+ else if(e.data.state) { console.info(e.data.state); /* current state */ }
+ else {
+ /* e.data.csv is the row generated by the stream */
+ console.log(e.data.csv);
+ }
+};
+```
+
+### Deno
+
+Deno does not support NodeJS streams in normal execution, so a wrapper is used.
+This example fetches and prints CSV rows:
+
+```ts title="sheet2csv.ts"
+// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts"
+import { stream, Sheet2CSVOpts, WorkSheet } from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs';
+
+interface Resumable { resume:()=>void; };
+/* Generate row strings from a worksheet */
+function sheet_to_csv_cb(ws: WorkSheet, cb:(d:string|null)=>void, opts: Sheet2CSVOpts = {}, batch = 1000): Resumable {
+ stream.set_readable(() => ({
+ __done: false,
+ // this function will be assigned by the SheetJS stream methods
+ _read: function() { this.__done = true; },
+ // this function is called by the stream methods
+ push: function(d: any) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
+ resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
+ }));
+ return stream.to_csv(ws, opts) as Resumable;
+}
+
+/* Callback invoked on each row (string) and at the end (null) */
+const csv_cb = (d:string|null) => {
+ if(d == null) return;
+ /* The strings include line endings, so raw write ops should be used */
+ Deno.stdout.write(new TextEncoder().encode(d));
+};
+
+/* Fetch https://sheetjs.com/pres.numbers, parse, and get first worksheet */
+import { read } from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs';
+const ab = await (await fetch("https://sheetjs.com/pres.numbers")).arrayBuffer();
+const wb = read(ab, { dense: true });
+const ws = wb.Sheets[wb.SheetNames[0]];
+
+/* Create and start CSV stream */
+sheet_to_csv_cb(ws, csv_cb).resume();
+```
diff --git a/docz/docs/03-demos/19-bundler.md b/docz/docs/03-demos/19-bundler.md
index 83fefa5..ab3f5a6 100644
--- a/docz/docs/03-demos/19-bundler.md
+++ b/docz/docs/03-demos/19-bundler.md
@@ -1061,8 +1061,8 @@ Access http://localhost:8080 in your web browser.
:::note
-The [Vite section of the Content demo](/docs/demos/content#vitejs) covers SheetJS-powered
-asset loaders, suitable for static sites pulling data from fixed spreadsheets.
+The [Vite section of the Content demo](/docs/demos/content#vitejs) covers asset
+loaders. They are ideal for static sites pulling data from sheets at build time.
:::
diff --git a/docz/docs/03-demos/33-localfile.md b/docz/docs/03-demos/33-localfile.md
index 1a4759c..1915664 100644
--- a/docz/docs/03-demos/33-localfile.md
+++ b/docz/docs/03-demos/33-localfile.md
@@ -76,8 +76,7 @@ self.addEventListener('message', (e) => {
IE10 Binary Strings (click to show)
-In IE10, binary strings are more performant than `ArrayBuffer`. `XLSX.read`
-supports binary strings with `type: "binary"`:
+`XLSX.read` supports binary strings with `type: "binary"`:
```js
// usage: file_bs_to_wb(file, function(wb) { /* wb is a workbook object */ });
diff --git a/docz/docs/06-solutions/05-output.md b/docz/docs/06-solutions/05-output.md
index 6106bd2..260d80f 100644
--- a/docz/docs/06-solutions/05-output.md
+++ b/docz/docs/06-solutions/05-output.md
@@ -840,103 +840,6 @@ Readable Stream.
- `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`.
- `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`.
-
-
-
-:::note
-
-In a CommonJS context, NodeJS Streams and `fs` immediately work with SheetJS:
-
-```js
-const XLSX = require("xlsx"); // "just works"
-```
-
-In NodeJS ESM, the dependency must be loaded manually:
-
-```js
-import * as XLSX from 'xlsx';
-import { Readable } from 'stream';
-
-XLSX.stream.set_readable(Readable); // manually load stream helpers
-```
-
-Additionally, for file-related operations in NodeJS ESM, `fs` must be loaded:
-
-```js
-import * as XLSX from 'xlsx';
-import * as fs from 'fs';
-
-XLSX.set_fs(fs); // manually load fs helpers
-```
-
-**It is strongly encouraged to use CommonJS in NodeJS whenever possible.**
-
-:::
-
-This example reads a worksheet passed as an argument to the script, pulls the
-first worksheet, converts to CSV and writes to `out.csv`:
-
-```js
-const workbook = XLSX.readFile(process.argv[2]);
-const worksheet = workbook.Sheets[workbook.SheetNames[0]];
-// highlight-next-line
-const stream = XLSX.stream.to_csv(worksheet);
-
-const output_file_name = "out.csv";
-// highlight-next-line
-stream.pipe(fs.createWriteStream(output_file_name));
-```
-
-`stream.to_json` uses Object-mode streams. A `Transform` stream can be used to
-generate a normal stream for streaming to a file or the screen:
-
-```js
-/* to_json returns an object-mode stream */
-// highlight-next-line
-var stream = XLSX.stream.to_json(worksheet, {raw:true});
-
-/* this Transform stream converts JS objects to text and prints to screen */
-var conv = new Transform({writableObjectMode:true});
-conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); };
-conv.pipe(process.stdout);
-
-// highlight-next-line
-stream.pipe(conv);
-```
-
-
-
-
-Deno does not support NodeJS streams in normal execution, so a wrapper is used.
-This demo converts a worksheet to CSV and prints each row to the screen:
-
-```ts
-// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts"
-import {utils, stream, set_cptable} from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs';
-
-/* `Readable` will be compatible with how SheetJS uses `stream.Readable` */
-function NodeReadableCB(cb:(d:any)=>void) {
- var rd = {
- __done: false,
- _read: function() {},
- push: function(d: any) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
- resume: function pump() {for(var i = 0; i < 10000 && !this.__done; ++i) rd._read(); if(!rd.__done) setTimeout(pump, 0); }
- };
- return rd;
-}
-function NodeReadable(rd: any) { return function() { return rd; }; }
-/* The callback gets each CSV row. It will be `null` when the stream is drained */
-const rt = NodeReadableCB((d: any) => { if(d != null) console.log(d); });
-const Readable = NodeReadable(rt);
-stream.set_readable(Readable);
-
-/* wire up and start the stream */
-const rd = stream.to_csv(worksheet);
-rd.resume();
-```
-
-
-
-
+Examples are included in ["Large Datasets"](/docs/demos/stream#streaming-write)
pipes write streams to nodejs response.
diff --git a/docz/docs/07-csf/07-features/01-formulae.md b/docz/docs/07-csf/07-features/01-formulae.md
index a50d5dd..bf4fed7 100644
--- a/docz/docs/07-csf/07-features/01-formulae.md
+++ b/docz/docs/07-csf/07-features/01-formulae.md
@@ -599,7 +599,7 @@ Z.TEST
In some cases, seemingly valid formulae may be rejected by spreadsheet software.
-`EVALUATE` unprefixed function is supported in WPS Office formulae. It is not
-valid in a cell formula in Excel. It can be used in an Excel defined name when
-exporting to XLSM format but not XLSX. This is a limitation of Excel. Since WPS
-Office accepts files with `EVALUATE`, the writer does not warn or throw errors.
\ No newline at end of file
+`EVALUATE` is a supported function in WPS Office. It is not valid in a cell
+formula in Excel. It can be used in an Excel defined name when exporting to XLSM
+format but not XLSX. This is a limitation of Excel. Since WPS Office accepts
+files with `EVALUATE`, the writer does not warn or throw errors.
\ No newline at end of file
diff --git a/docz/docs/08-api/05-parse-options.md b/docz/docs/08-api/05-parse-options.md
index c2e3332..2c68d17 100644
--- a/docz/docs/08-api/05-parse-options.md
+++ b/docz/docs/08-api/05-parse-options.md
@@ -70,7 +70,7 @@ The read functions accept an options argument:
errors on single worksheets, allowing you to read from the worksheets that do
parse properly. Setting `WTF:true` forces those errors to be thrown.
- By default, "sparse" mode worksheets are generated. Individual cells are
- accessed by indexing the worksheet object with an A1-style address. "dense"
+ accessed by indexing the worksheet object with an A1-Style address. "dense"
worksheets store cells in an array of arrays at `sheet["!data"]`.
### Input Type
diff --git a/docz/docs/09-miscellany/02-errors.md b/docz/docs/09-miscellany/02-errors.md
index c268023..9570a49 100644
--- a/docz/docs/09-miscellany/02-errors.md
+++ b/docz/docs/09-miscellany/02-errors.md
@@ -37,8 +37,8 @@ manifest with error messages such as `Invalid string length`.
There are memory bottlenecks associated with string addresses. A number of bugs
have been reported to the V8 and Chromium projects on this subject. While those
-bugs are being resolved, for sheets containing >100K rows, dense mode worksheets
-should be used.
+bugs are being resolved, for sheets containing hundreds of thousands of rows,
+dense mode worksheets should be used.
diff --git a/docz/package.json b/docz/package.json
index 8a23db2..7f230a7 100644
--- a/docz/package.json
+++ b/docz/package.json
@@ -15,11 +15,11 @@
},
"dependencies": {
"@cmfcmf/docusaurus-search-local": "0.11.0",
- "@docusaurus/core": "2.1.0",
- "@docusaurus/plugin-client-redirects": "2.1.0",
- "@docusaurus/preset-classic": "2.1.0",
- "@docusaurus/theme-common": "2.1.0",
- "@docusaurus/theme-live-codeblock": "2.1.0",
+ "@docusaurus/core": "2.2.0",
+ "@docusaurus/plugin-client-redirects": "2.2.0",
+ "@docusaurus/preset-classic": "2.2.0",
+ "@docusaurus/theme-common": "2.2.0",
+ "@docusaurus/theme-live-codeblock": "2.2.0",
"@mdx-js/react": "1.6.22",
"clsx": "1.2.1",
"prism-react-renderer": "1.3.5",
@@ -28,7 +28,7 @@
"xlsx": "https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz"
},
"devDependencies": {
- "@docusaurus/module-type-aliases": "2.1.0"
+ "@docusaurus/module-type-aliases": "2.2.0"
},
"browserslist": {
"production": [