slow web apis

This commit is contained in:
SheetJS 2022-10-31 03:26:13 -04:00
parent a77f991b86
commit f7d9712b24

@ -15,9 +15,55 @@ added later. Feature testing is highly recommended.
:::
:::info Inline Workers
Due to limitations of the live codeblocks, all of the workers in this section
are in-line. The code is embedded in template literals. For production sites,
typically workers are written in separate JS files.
<details><summary><b>Example</b> (click to show)</summary>
For example, an in-line worker like
```js
const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
/* this callback will run once the main context sends a message */
self.addEventListener('message', (e) => {
/* Pass the version string back */
postMessage({ version: XLSX.version });
}, false);
`])));
```
would typically be stored in a separate JS file like "worker.js":
```js title="worker.js"
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
/* this callback will run once the main context sends a message */
self.addEventListener('message', (e) => {
/* Pass the version string back */
postMessage({ version: XLSX.version });
}, false);
```
and the main script would pass a URL:
```js
const worker = new Worker("./worker.js");
```
</details>
:::
## Installation
In all cases, `importScripts` can load the [Standalone scripts](/docs/getting-started/installation/standalone)
In all cases, `importScripts` in a Worker can load the [Standalone scripts](/docs/getting-started/installation/standalone)
```js
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
@ -280,6 +326,16 @@ and Chromium-based browsers like Chrome and Edge.
:::
:::caution Performance
In local testing, committing each CSV row as it is generated is significantly
slower than accumulating and writing once at the end.
When the target CSV is known to be less than 500MB, it is preferable to batch.
Strings larger than 500M may hit browser length limits.
:::
The following live demo fetches and parses a file in a Web Worker. The script:
- prompts user to save file (`window.showSaveFilePicker` in the main thread)
@ -288,26 +344,33 @@ The following live demo fetches and parses a file in a Web Worker. The script:
- fetches the requested URL and parses the workbook from the Worker
- creates a Writable Stream from the file object.
- uses `XLSX.stream.to_csv` to generate CSV rows of the first worksheet
+ on each row, the data is written to the file stream
+ every 10th row, a progress message is sent back to the main thread
+ every 100th row, a progress message is sent back to the main thread
+ at the end, a completion message is sent back to the main thread
The demo has a URL input box. Feel free to change the URL. For example,
The demo has a checkbox. If it is not checked (default), the Worker will
collect each CSV row and write once at the end. If it is checked, the Worker
will try to commit each row as it is generated.
The demo also has a URL input box. Feel free to change the URL. For example:
`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
is an XLS file over 50 MB
is an XLS file over 50 MB. The generated CSV file is about 55 MB.
`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
is an XLSX file with 300000 rows (approximately 20 MB)
is an XLSX file with 300000 rows (approximately 20 MB) yielding a CSV of 10 MB.
```jsx live
function SheetJSFetchCSVStreamFile() {
const [state, setState] = React.useState("");
const [__html, setHTML] = React.useState("");
const [cnt, setCnt] = React.useState(0);
const [hz, setHz] = React.useState(0);
const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");
const ref = React.useRef(null);
return ( <>
<b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/>
<b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/><br/>
<b>Commit each row: </b><input type="checkbox" ref={ref}/><br/>
<button onClick={async() => {
/* this mantra embeds the worker source in the function */
const worker = new Worker(URL.createObjectURL(new Blob([`\
@ -320,8 +383,12 @@ function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
push: function(d) {
if(!this.__done) cb(d);
if(d == null) this.__done = true; },
resume: function pump() {
for(var i = 0; i < batch && !this.__done; ++i) this._read();
if(!this.__done) setTimeout(pump.bind(this), 0); }
}));
return XLSX.stream.to_csv(ws, opts);
}
@ -329,31 +396,34 @@ function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
try {
postMessage({state: "fetching"});
/* Fetch file */
postMessage({state: "fetching"});
var t = Date.now();
const res = await fetch(e.data.url);
const ab = await res.arrayBuffer();
postMessage({time: "fetch", ts: Date.now() - t});
/* Parse file */
let len = ab.byteLength;
if(len < 1024) len += " bytes"; else { len /= 1024;
if(len < 1024) len += " KB"; else { len /= 1024; len += " MB"; }
}
postMessage({state: "parsing"});
t = Date.now();
const wb = XLSX.read(ab, {dense: true});
const ws = wb.Sheets[wb.SheetNames[0]];
postMessage({time: "parse", ts: Date.now() - t});
/* Generate CSV rows */
postMessage({state: "begin"});
t = Date.now();
const wstream = await e.data.wFile.createWritable();
let rows = 0;
let c = 0, buf = "", each = !!e.data.each;
const strm = sheet_to_csv_cb(ws, async(csv) => {
if(csv != null) {
await wstream.write(csv);
if(!(++rows % 100)) postMessage({ state: "processing", rows });
if(each) await wstream.write(csv);
else buf += csv;
if(!(++c % 100)) postMessage({ state: "writing", c, ts: Date.now() - t });
} else {
if(buf) await wstream.write(buf);
await wstream.close();
postMessage({state: "done", rows });
postMessage({state: "done", c, ts: Date.now() - t });
}
});
strm.resume();
@ -364,25 +434,30 @@ self.addEventListener('message', async(e) => {
}, false);
`])));
/* when the worker sends back data, add it to the DOM */
const log = (s, t) => setHTML(h => h + `${s}: ${(t/1000).toFixed(3).padStart(8)} sec\n`);
worker.onmessage = function(e) {
if(e.data.error) return setHTML(e.data.error);
if(e.data.error) return setState(`Processing Error: ${e.data.error}`);
else if(e.data.state) {
setState(e.data.state);
if(e.data.rows) setCnt(e.data.rows);
}
if(e.data.c) setCnt(e.data.c);
if(e.data.ts) setHz((e.data.c || cnt) * 1000 / e.data.ts);
if(e.data.state == "done") log("write", e.data.ts);
} else if(e.data.time) log(e.data.time, e.data.ts);
};
setCnt(0); setState("");
setCnt(0); setHz(0); setState(""); setHTML("");
try {
/* Show picker and get handle to file */
const wFile = await window.showSaveFilePicker({
suggestedName: "SheetJSStream.csv",
types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
});
/* Show picker and get handle to file */
const wFile = await window.showSaveFilePicker({
suggestedName: "SheetJSStream.csv",
types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
});
/* post a message to the worker with the URL to fetch */
if(wFile) worker.postMessage({url, wFile});
/* post a message to the worker with the URL to fetch */
worker.postMessage({url, wFile, each: !!ref.current.checked});
} catch(e) { setState(`Selection Error: ${e && e.message || e}`); }
}}><b>Click to Start</b></button>
<pre>State: <b>{state}</b><br/>Number of rows: <b>{cnt}</b></pre>
<pre>State: <b>{state}</b><br/>Count: <b>{cnt}</b> <b>({hz|0} Hz)</b></pre>
<pre dangerouslySetInnerHTML={{__html}}/>
</> );
}
```