From f7d9712b245668e3f7026348a0c039e798cf28f6 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Mon, 31 Oct 2022 03:26:13 -0400 Subject: [PATCH] slow web apis --- docz/docs/03-demos/07-worker.md | 137 ++++++++++++++++++++++++-------- 1 file changed, 106 insertions(+), 31 deletions(-) diff --git a/docz/docs/03-demos/07-worker.md b/docz/docs/03-demos/07-worker.md index 425c0ff..0a23022 100644 --- a/docz/docs/03-demos/07-worker.md +++ b/docz/docs/03-demos/07-worker.md @@ -15,9 +15,55 @@ added later. Feature testing is highly recommended. ::: +:::info Inline Workers + +Due to limitations of the live codeblocks, all of the workers in this section +are in-line. The code is embedded in template literals. For production sites, +typically workers are written in separate JS files. + +
Example (click to show) + +For example, an in-line worker like + +```js + const worker = new Worker(URL.createObjectURL(new Blob([`\ +/* load standalone script from CDN */ +importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js"); + +/* this callback will run once the main context sends a message */ +self.addEventListener('message', (e) => { + /* Pass the version string back */ + postMessage({ version: XLSX.version }); +}, false); + `]))); +``` + +would typically be stored in a separate JS file like "worker.js": + +```js title="worker.js" +/* load standalone script from CDN */ +importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js"); + +/* this callback will run once the main context sends a message */ +self.addEventListener('message', (e) => { + /* Pass the version string back */ + postMessage({ version: XLSX.version }); +}, false); +``` + +and the main script would pass a URL: + +```js + const worker = new Worker("./worker.js"); +``` + +
+ +::: + ## Installation -In all cases, `importScripts` can load the [Standalone scripts](/docs/getting-started/installation/standalone) +In all cases, `importScripts` in a Worker can load the [Standalone scripts](/docs/getting-started/installation/standalone) ```js importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js"); @@ -280,6 +326,16 @@ and Chromium-based browsers like Chrome and Edge. ::: +:::caution Performance + +In local testing, committing each CSV row as it is generated is significantly +slower than accumulating and writing once at the end. + +When the target CSV is known to be less than 500MB, it is preferable to batch. +Strings larger than 500M may hit browser length limits. + +::: + The following live demo fetches and parses a file in a Web Worker. The script: - prompts user to save file (`window.showSaveFilePicker` in the main thread) @@ -288,26 +344,33 @@ The following live demo fetches and parses a file in a Web Worker. The script: - fetches the requested URL and parses the workbook from the Worker - creates a Writable Stream from the file object. - uses `XLSX.stream.to_csv` to generate CSV rows of the first worksheet - + on each row, the data is written to the file stream - + every 10th row, a progress message is sent back to the main thread + + every 100th row, a progress message is sent back to the main thread + at the end, a completion message is sent back to the main thread -The demo has a URL input box. Feel free to change the URL. For example, +The demo has a checkbox. If it is not checked (default), the Worker will +collect each CSV row and write once at the end. If it is checked, the Worker +will try to commit each row as it is generated. + +The demo also has a URL input box. Feel free to change the URL. For example: `https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls` -is an XLS file over 50 MB +is an XLS file over 50 MB. The generated CSV file is about 55 MB. `https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx` -is an XLSX file with 300000 rows (approximately 20 MB) +is an XLSX file with 300000 rows (approximately 20 MB) yielding a CSV of 10 MB. ```jsx live function SheetJSFetchCSVStreamFile() { const [state, setState] = React.useState(""); + const [__html, setHTML] = React.useState(""); const [cnt, setCnt] = React.useState(0); + const [hz, setHz] = React.useState(0); const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx"); + const ref = React.useRef(null); return ( <> - URL: setUrl(e.target.value)} size="80"/> + URL: setUrl(e.target.value)} size="80"/>
+ Commit each row:
-
State: {state}
Number of rows: {cnt}
+
State: {state}
Count: {cnt} ({hz|0} Hz)
+
    );
 }
 ```