docs.sheetjs.com/docz/docs/03-demos/07-worker.md

389 lines
12 KiB
Markdown
Raw Normal View History

2022-10-19 10:05:59 +00:00
---
title: Web Workers
---
Parsing and writing large spreadsheets takes time. During the process, if the
SheetJS library is running in the web browser, the website may freeze.
Workers provide a way to off-load the hard work so that the website does not
freeze during processing.
:::note Browser Compatibility
IE10+ and modern browsers support basic Web Workers. Some APIs like `fetch` were
added later. Feature testing is highly recommended.
:::
## Installation
2022-10-30 05:45:37 +00:00
In all cases, `importScripts` can load the [Standalone scripts](/docs/getting-started/installation/standalone)
2022-10-19 10:05:59 +00:00
```js
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
```
For production use, it is highly encouraged to download and host the script.
2022-10-21 00:10:10 +00:00
<details><summary><b>ECMAScript Module Support</b> (click to show)</summary>
:::note Browser Compatibility
ESM is supported in Web Workers in the Chromium family of browsers (including
2022-10-31 00:58:49 +00:00
Chrome and Edge) as well as in browsers powered by WebKit (including Safari).
2022-10-21 00:10:10 +00:00
For support in legacy browsers like Firefox, `importScripts` should be used.
:::
```js
import * as XLSX from "https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs";
```
When using modules, the script must be served with the correct MIME type and the
Worker constructor must set the `type` option:
```js
const worker_code = `\
/* load standalone script from CDN */
import * as XLSX from "https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs";
// ... do something with XLSX here ...
`;
const worker = new Worker(
URL.createObjectURL(
new Blob(
[ worker_code ],
// highlight-next-line
{ type: "text/javascript" } // second argument to the Blob constructor
)
),
// highlight-next-line
{type: "module"} // second argument to Worker constructor
);
```
</details>
2022-10-19 10:05:59 +00:00
## Downloading a Remote File
2022-10-21 00:10:10 +00:00
:::note fetch in Web Workers
2022-10-19 10:05:59 +00:00
`fetch` was enabled in Web Workers in Chrome 42 and Safari 10.3
:::
Typically the Web Worker performs the `fetch` operation, processes the workbook,
and sends a final result to the main browser context for processing.
In the following example, the script:
- downloads <https://sheetjs.com/pres.numbers> in a Web Worker
- loads the SheetJS library and parses the file in the Worker
- generates an HTML string of the first table in the Worker
- sends the string to the main browser context
- adds the HTML to the page in the main browser context
```jsx live
function SheetJSFetchDLWorker() {
2022-10-21 00:10:10 +00:00
const [__html, setHTML] = React.useState("");
2022-10-19 10:05:59 +00:00
return ( <>
<button onClick={() => {
/* this mantra embeds the worker source in the function */
const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
try {
/* Fetch file */
const res = await fetch("https://sheetjs.com/pres.numbers");
const ab = await res.arrayBuffer();
/* Parse file */
2022-10-31 00:58:49 +00:00
const wb = XLSX.read(ab, {dense: true});
2022-10-19 10:05:59 +00:00
const ws = wb.Sheets[wb.SheetNames[0]];
/* Generate HTML */
const html = XLSX.utils.sheet_to_html(ws);
/* Reply with result */
2022-10-21 00:10:10 +00:00
postMessage({ html });
2022-10-19 10:05:59 +00:00
} catch(e) {
/* Pass the error message back */
postMessage({html: String(e.message || e).bold() });
}
}, false);
`])));
/* when the worker sends back the HTML, add it to the DOM */
worker.onmessage = function(e) { setHTML(e.data.html); };
/* post a message to the worker */
worker.postMessage({});
}}><b>Click to Start</b></button>
2022-10-21 00:10:10 +00:00
<div dangerouslySetInnerHTML={{ __html }}/>
2022-10-19 10:05:59 +00:00
</> );
}
```
## Creating a Local File
2022-10-21 00:10:10 +00:00
:::caution Writing files from Web Workers
2022-10-19 10:05:59 +00:00
`XLSX.writeFile` will not work in Web Workers! Raw file data can be passed from
the Web Worker to the main browser context for downloading.
:::
In the following example, the script:
- generates a workbook object in the Web Worker
- generates a XLSB file using `XLSX.write` in the Web Worker
2022-10-19 21:12:12 +00:00
- generates an object URL in the Web Worker
- sends the object URL to the main browser context
2022-10-19 10:05:59 +00:00
- performs a download action in the main browser context
```jsx live
function SheetJSWriteFileWorker() {
2022-10-21 00:10:10 +00:00
const [__html, setHTML] = React.useState("");
2022-10-19 10:05:59 +00:00
return ( <>
<button onClick={() => { setHTML("");
/* this mantra embeds the worker source in the function */
const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
try {
/* Create a new Workbook (in this case, from a CSV string) */
const csv = \`\
SheetJS,in,Web,Workers
வணக்கம்,สวัสดี,你好,가지마
1,2,3,4\`;
2022-10-31 00:58:49 +00:00
const wb = XLSX.read(csv, { type: "string", dense: true });
2022-10-19 10:05:59 +00:00
2022-10-19 21:12:12 +00:00
/* Write XLSB data (Uint8Array) */
2022-10-19 10:05:59 +00:00
const u8 = XLSX.write(wb, { bookType: "xlsb", type: "buffer" });
2022-10-19 21:12:12 +00:00
/* Generate URL */
const url = URL.createObjectURL(new Blob([u8]));
2022-10-19 10:05:59 +00:00
/* Reply with result */
2022-10-19 21:12:12 +00:00
postMessage({ url });
2022-10-19 10:05:59 +00:00
} catch(e) {
/* Pass the error message back */
postMessage({error: String(e.message || e).bold() });
}
}, false);
`])));
/* when the worker sends back the data, create a download */
worker.onmessage = function(e) {
if(e.data.error) return setHTML(e.data.error);
/* this mantra is the standard HTML5 download attribute technique */
const a = document.createElement("a");
a.download = "SheetJSWriteFileWorker.xlsb";
2022-10-19 21:12:12 +00:00
a.href = e.data.url;
2022-10-19 10:05:59 +00:00
document.body.appendChild(a);
a.click();
document.body.removeChild(a);
};
/* post a message to the worker */
worker.postMessage({});
}}><b>Click to Start</b></button>
2022-10-21 00:10:10 +00:00
<div dangerouslySetInnerHTML={{ __html }}/>
2022-10-19 10:05:59 +00:00
</> );
}
```
## User-Submitted File
2022-10-21 00:10:10 +00:00
:::note FileReaderSync
2022-10-19 10:05:59 +00:00
Typically `FileReader` is used in the main browser context. In Web Workers, the
synchronous version `FileReaderSync` is more efficient.
:::
2022-10-21 00:10:10 +00:00
In the following example, when a file is dropped over the DIV or when the INPUT
element is used to select a file, the script:
2022-10-19 10:05:59 +00:00
- sends the `File` object to the Web Worker
- loads the SheetJS library and parses the file in the Worker
- generates an HTML string of the first table in the Worker
- sends the string to the main browser context
- adds the HTML to the page in the main browser context
```jsx live
function SheetJSDragDropWorker() {
2022-10-21 00:10:10 +00:00
const [__html, setHTML] = React.useState("");
2022-10-19 21:12:12 +00:00
/* suppress default behavior for drag and drop */
2022-10-19 10:05:59 +00:00
function suppress(e) { e.stopPropagation(); e.preventDefault(); }
2022-10-21 00:10:10 +00:00
/* this worker is shared between drag-drop and file input element */
const worker = new Worker(URL.createObjectURL(new Blob([`\
2022-10-19 10:05:59 +00:00
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
/* this callback will run once the main context sends a message */
2022-10-19 21:12:12 +00:00
self.addEventListener('message', (e) => {
2022-10-19 10:05:59 +00:00
try {
/* Read file data */
const ab = new FileReaderSync().readAsArrayBuffer(e.data.file);
/* Parse file */
2022-10-31 00:58:49 +00:00
const wb = XLSX.read(ab, {dense: true});
2022-10-19 10:05:59 +00:00
const ws = wb.Sheets[wb.SheetNames[0]];
/* Generate HTML */
const html = XLSX.utils.sheet_to_html(ws);
/* Reply with result */
2022-10-21 00:10:10 +00:00
postMessage({ html });
2022-10-19 10:05:59 +00:00
} catch(e) {
/* Pass the error message back */
postMessage({html: String(e.message || e).bold() });
}
}, false);
2022-10-21 00:10:10 +00:00
`])));
/* when the worker sends back the HTML, add it to the DOM */
worker.onmessage = function(e) { setHTML(e.data.html); };
return ( <>
<div onDragOver={suppress} onDragEnter={suppress} onDrop={(e) => {
suppress(e);
2022-10-19 10:05:59 +00:00
/* post a message with the first File to the worker */
worker.postMessage({ file: e.dataTransfer.files[0] });
2022-10-21 00:10:10 +00:00
}}>Drag a file to this DIV to process! (or use the file input)</div>
<input type="file" onChange={(e) => {
suppress(e);
/* post a message with the first File to the worker */
worker.postMessage({ file: e.target.files[0] });
}}/>
<div dangerouslySetInnerHTML={{ __html }}/>
2022-10-19 10:05:59 +00:00
</> );
}
2022-10-31 00:58:49 +00:00
```
## Streaming Write
A more general discussion, including row-oriented processing demos, is included
in the ["Large Datasets"](/docs/demos/stream#browser) demo.
#### File System Access API
:::note
At the time of writing, the File System Access API is only available in Chromium
and Chromium-based browsers like Chrome and Edge.
:::
The following live demo fetches and parses a file in a Web Worker. The script:
- prompts user to save file (`window.showSaveFilePicker` in the main thread)
- passes the URL and the file object to the Web Worker
- loads the SheetJS library in the Web Worker
- fetches the requested URL and parses the workbook from the Worker
- creates a Writable Stream from the file object.
- uses `XLSX.stream.to_csv` to generate CSV rows of the first worksheet
+ on each row, the data is written to the file stream
+ every 10th row, a progress message is sent back to the main thread
+ at the end, a completion message is sent back to the main thread
The demo has a URL input box. Feel free to change the URL. For example,
`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
is an XLS file over 50 MB
`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
is an XLSX file with 300000 rows (approximately 20 MB)
```jsx live
function SheetJSFetchCSVStreamFile() {
const [state, setState] = React.useState("");
const [cnt, setCnt] = React.useState(0);
const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");
return ( <>
<b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/>
<button onClick={async() => {
/* this mantra embeds the worker source in the function */
const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
XLSX.stream.set_readable(() => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
}));
return XLSX.stream.to_csv(ws, opts);
}
/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
try {
postMessage({state: "fetching"});
/* Fetch file */
const res = await fetch(e.data.url);
const ab = await res.arrayBuffer();
/* Parse file */
let len = ab.byteLength;
if(len < 1024) len += " bytes"; else { len /= 1024;
if(len < 1024) len += " KB"; else { len /= 1024; len += " MB"; }
}
postMessage({state: "parsing"});
const wb = XLSX.read(ab, {dense: true});
const ws = wb.Sheets[wb.SheetNames[0]];
/* Generate CSV rows */
postMessage({state: "begin"});
const wstream = await e.data.wFile.createWritable();
let rows = 0;
const strm = sheet_to_csv_cb(ws, async(csv) => {
if(csv != null) {
await wstream.write(csv);
if(!(++rows % 100)) postMessage({ state: "processing", rows });
} else {
await wstream.close();
postMessage({state: "done", rows });
}
});
strm.resume();
} catch(e) {
/* Pass the error message back */
postMessage({error: String(e.message || e) });
}
}, false);
`])));
/* when the worker sends back data, add it to the DOM */
worker.onmessage = function(e) {
if(e.data.error) return setHTML(e.data.error);
else if(e.data.state) {
setState(e.data.state);
if(e.data.rows) setCnt(e.data.rows);
}
};
setCnt(0); setState("");
/* Show picker and get handle to file */
const wFile = await window.showSaveFilePicker({
suggestedName: "SheetJSStream.csv",
types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
});
/* post a message to the worker with the URL to fetch */
if(wFile) worker.postMessage({url, wFile});
}}><b>Click to Start</b></button>
<pre>State: <b>{state}</b><br/>Number of rows: <b>{cnt}</b></pre>
</> );
}
```