forked from sheetjs/docs.sheetjs.com
slow web apis
This commit is contained in:
parent
a77f991b86
commit
f7d9712b24
@ -15,9 +15,55 @@ added later. Feature testing is highly recommended.
|
||||
|
||||
:::
|
||||
|
||||
:::info Inline Workers
|
||||
|
||||
Due to limitations of the live codeblocks, all of the workers in this section
|
||||
are in-line. The code is embedded in template literals. For production sites,
|
||||
typically workers are written in separate JS files.
|
||||
|
||||
<details><summary><b>Example</b> (click to show)</summary>
|
||||
|
||||
For example, an in-line worker like
|
||||
|
||||
```js
|
||||
const worker = new Worker(URL.createObjectURL(new Blob([`\
|
||||
/* load standalone script from CDN */
|
||||
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
|
||||
|
||||
/* this callback will run once the main context sends a message */
|
||||
self.addEventListener('message', (e) => {
|
||||
/* Pass the version string back */
|
||||
postMessage({ version: XLSX.version });
|
||||
}, false);
|
||||
`])));
|
||||
```
|
||||
|
||||
would typically be stored in a separate JS file like "worker.js":
|
||||
|
||||
```js title="worker.js"
|
||||
/* load standalone script from CDN */
|
||||
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
|
||||
|
||||
/* this callback will run once the main context sends a message */
|
||||
self.addEventListener('message', (e) => {
|
||||
/* Pass the version string back */
|
||||
postMessage({ version: XLSX.version });
|
||||
}, false);
|
||||
```
|
||||
|
||||
and the main script would pass a URL:
|
||||
|
||||
```js
|
||||
const worker = new Worker("./worker.js");
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
:::
|
||||
|
||||
## Installation
|
||||
|
||||
In all cases, `importScripts` can load the [Standalone scripts](/docs/getting-started/installation/standalone)
|
||||
In all cases, `importScripts` in a Worker can load the [Standalone scripts](/docs/getting-started/installation/standalone)
|
||||
|
||||
```js
|
||||
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
|
||||
@ -280,6 +326,16 @@ and Chromium-based browsers like Chrome and Edge.
|
||||
|
||||
:::
|
||||
|
||||
:::caution Performance
|
||||
|
||||
In local testing, committing each CSV row as it is generated is significantly
|
||||
slower than accumulating and writing once at the end.
|
||||
|
||||
When the target CSV is known to be less than 500MB, it is preferable to batch.
|
||||
Strings larger than 500M may hit browser length limits.
|
||||
|
||||
:::
|
||||
|
||||
The following live demo fetches and parses a file in a Web Worker. The script:
|
||||
|
||||
- prompts user to save file (`window.showSaveFilePicker` in the main thread)
|
||||
@ -288,26 +344,33 @@ The following live demo fetches and parses a file in a Web Worker. The script:
|
||||
- fetches the requested URL and parses the workbook from the Worker
|
||||
- creates a Writable Stream from the file object.
|
||||
- uses `XLSX.stream.to_csv` to generate CSV rows of the first worksheet
|
||||
+ on each row, the data is written to the file stream
|
||||
+ every 10th row, a progress message is sent back to the main thread
|
||||
+ every 100th row, a progress message is sent back to the main thread
|
||||
+ at the end, a completion message is sent back to the main thread
|
||||
|
||||
The demo has a URL input box. Feel free to change the URL. For example,
|
||||
The demo has a checkbox. If it is not checked (default), the Worker will
|
||||
collect each CSV row and write once at the end. If it is checked, the Worker
|
||||
will try to commit each row as it is generated.
|
||||
|
||||
The demo also has a URL input box. Feel free to change the URL. For example:
|
||||
|
||||
`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
|
||||
is an XLS file over 50 MB
|
||||
is an XLS file over 50 MB. The generated CSV file is about 55 MB.
|
||||
|
||||
`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
|
||||
is an XLSX file with 300000 rows (approximately 20 MB)
|
||||
is an XLSX file with 300000 rows (approximately 20 MB) yielding a CSV of 10 MB.
|
||||
|
||||
```jsx live
|
||||
function SheetJSFetchCSVStreamFile() {
|
||||
const [state, setState] = React.useState("");
|
||||
const [__html, setHTML] = React.useState("");
|
||||
const [cnt, setCnt] = React.useState(0);
|
||||
const [hz, setHz] = React.useState(0);
|
||||
const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");
|
||||
const ref = React.useRef(null);
|
||||
|
||||
return ( <>
|
||||
<b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/>
|
||||
<b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/><br/>
|
||||
<b>Commit each row: </b><input type="checkbox" ref={ref}/><br/>
|
||||
<button onClick={async() => {
|
||||
/* this mantra embeds the worker source in the function */
|
||||
const worker = new Worker(URL.createObjectURL(new Blob([`\
|
||||
@ -320,8 +383,12 @@ function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
|
||||
// this function will be assigned by the SheetJS stream methods
|
||||
_read: function() { this.__done = true; },
|
||||
// this function is called by the stream methods
|
||||
push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
|
||||
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
|
||||
push: function(d) {
|
||||
if(!this.__done) cb(d);
|
||||
if(d == null) this.__done = true; },
|
||||
resume: function pump() {
|
||||
for(var i = 0; i < batch && !this.__done; ++i) this._read();
|
||||
if(!this.__done) setTimeout(pump.bind(this), 0); }
|
||||
}));
|
||||
return XLSX.stream.to_csv(ws, opts);
|
||||
}
|
||||
@ -329,31 +396,34 @@ function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
|
||||
/* this callback will run once the main context sends a message */
|
||||
self.addEventListener('message', async(e) => {
|
||||
try {
|
||||
postMessage({state: "fetching"});
|
||||
/* Fetch file */
|
||||
postMessage({state: "fetching"});
|
||||
var t = Date.now();
|
||||
const res = await fetch(e.data.url);
|
||||
const ab = await res.arrayBuffer();
|
||||
postMessage({time: "fetch", ts: Date.now() - t});
|
||||
|
||||
/* Parse file */
|
||||
let len = ab.byteLength;
|
||||
if(len < 1024) len += " bytes"; else { len /= 1024;
|
||||
if(len < 1024) len += " KB"; else { len /= 1024; len += " MB"; }
|
||||
}
|
||||
postMessage({state: "parsing"});
|
||||
t = Date.now();
|
||||
const wb = XLSX.read(ab, {dense: true});
|
||||
const ws = wb.Sheets[wb.SheetNames[0]];
|
||||
postMessage({time: "parse", ts: Date.now() - t});
|
||||
|
||||
/* Generate CSV rows */
|
||||
postMessage({state: "begin"});
|
||||
t = Date.now();
|
||||
const wstream = await e.data.wFile.createWritable();
|
||||
let rows = 0;
|
||||
let c = 0, buf = "", each = !!e.data.each;
|
||||
const strm = sheet_to_csv_cb(ws, async(csv) => {
|
||||
if(csv != null) {
|
||||
await wstream.write(csv);
|
||||
if(!(++rows % 100)) postMessage({ state: "processing", rows });
|
||||
if(each) await wstream.write(csv);
|
||||
else buf += csv;
|
||||
if(!(++c % 100)) postMessage({ state: "writing", c, ts: Date.now() - t });
|
||||
} else {
|
||||
if(buf) await wstream.write(buf);
|
||||
await wstream.close();
|
||||
postMessage({state: "done", rows });
|
||||
postMessage({state: "done", c, ts: Date.now() - t });
|
||||
}
|
||||
});
|
||||
strm.resume();
|
||||
@ -364,25 +434,30 @@ self.addEventListener('message', async(e) => {
|
||||
}, false);
|
||||
`])));
|
||||
/* when the worker sends back data, add it to the DOM */
|
||||
const log = (s, t) => setHTML(h => h + `${s}: ${(t/1000).toFixed(3).padStart(8)} sec\n`);
|
||||
worker.onmessage = function(e) {
|
||||
if(e.data.error) return setHTML(e.data.error);
|
||||
if(e.data.error) return setState(`Processing Error: ${e.data.error}`);
|
||||
else if(e.data.state) {
|
||||
setState(e.data.state);
|
||||
if(e.data.rows) setCnt(e.data.rows);
|
||||
}
|
||||
if(e.data.c) setCnt(e.data.c);
|
||||
if(e.data.ts) setHz((e.data.c || cnt) * 1000 / e.data.ts);
|
||||
if(e.data.state == "done") log("write", e.data.ts);
|
||||
} else if(e.data.time) log(e.data.time, e.data.ts);
|
||||
};
|
||||
setCnt(0); setState("");
|
||||
setCnt(0); setHz(0); setState(""); setHTML("");
|
||||
try {
|
||||
/* Show picker and get handle to file */
|
||||
const wFile = await window.showSaveFilePicker({
|
||||
suggestedName: "SheetJSStream.csv",
|
||||
types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
|
||||
});
|
||||
|
||||
/* Show picker and get handle to file */
|
||||
const wFile = await window.showSaveFilePicker({
|
||||
suggestedName: "SheetJSStream.csv",
|
||||
types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
|
||||
});
|
||||
|
||||
/* post a message to the worker with the URL to fetch */
|
||||
if(wFile) worker.postMessage({url, wFile});
|
||||
/* post a message to the worker with the URL to fetch */
|
||||
worker.postMessage({url, wFile, each: !!ref.current.checked});
|
||||
} catch(e) { setState(`Selection Error: ${e && e.message || e}`); }
|
||||
}}><b>Click to Start</b></button>
|
||||
<pre>State: <b>{state}</b><br/>Number of rows: <b>{cnt}</b></pre>
|
||||
<pre>State: <b>{state}</b><br/>Count: <b>{cnt}</b> <b>({hz|0} Hz)</b></pre>
|
||||
<pre dangerouslySetInnerHTML={{__html}}/>
|
||||
</> );
|
||||
}
|
||||
```
|
||||
|
Loading…
Reference in New Issue
Block a user