2022-10-19 10:05:59 +00:00
|
|
|
---
|
|
|
|
title: Web Workers
|
2023-02-28 11:40:44 +00:00
|
|
|
pagination_prev: demos/extensions/index
|
|
|
|
pagination_next: demos/engines/index
|
|
|
|
sidebar_custom_props:
|
|
|
|
summary: Run large data flows without freezing the browser
|
2022-10-19 10:05:59 +00:00
|
|
|
---
|
|
|
|
|
2023-04-29 11:21:37 +00:00
|
|
|
import current from '/version.js';
|
|
|
|
import CodeBlock from '@theme/CodeBlock';
|
|
|
|
|
2022-10-19 10:05:59 +00:00
|
|
|
Parsing and writing large spreadsheets takes time. During the process, if the
|
|
|
|
SheetJS library is running in the web browser, the website may freeze.
|
|
|
|
|
|
|
|
Workers provide a way to off-load the hard work so that the website does not
|
2023-09-02 09:26:57 +00:00
|
|
|
freeze during processing. The work is still performed locally. No data is sent
|
2022-12-28 11:56:52 +00:00
|
|
|
to a remote server.
|
|
|
|
|
|
|
|
The following diagrams show the normal and Web Worker flows when exporting a
|
|
|
|
dataset. The regions with a red background mark when the browser is frozen.
|
|
|
|
|
2024-04-12 01:04:37 +00:00
|
|
|
<table>
|
|
|
|
<thead><tr><th>Normal Export</th><th>Web Worker Export</th></tr></thead>
|
|
|
|
<tbody><tr><td>
|
2022-12-28 11:56:52 +00:00
|
|
|
|
|
|
|
```mermaid
|
|
|
|
sequenceDiagram
|
|
|
|
autonumber
|
|
|
|
actor User
|
|
|
|
participant Page
|
|
|
|
User->>Page: click button
|
|
|
|
rect rgba(255,0,0,.2)
|
|
|
|
activate Page
|
|
|
|
Note over Page: collect dataset
|
|
|
|
Note over Page: generate workbook
|
|
|
|
Note over Page: create file
|
|
|
|
Note over Page: setup download
|
|
|
|
end
|
|
|
|
Page->>User: download workbook
|
|
|
|
deactivate Page
|
|
|
|
```
|
|
|
|
|
|
|
|
</td><td>
|
|
|
|
|
|
|
|
```mermaid
|
|
|
|
sequenceDiagram
|
|
|
|
autonumber
|
|
|
|
actor User
|
|
|
|
participant Page
|
|
|
|
participant Worker
|
|
|
|
User->>Page: click button
|
|
|
|
rect rgba(255,0,0,.2)
|
|
|
|
activate Page
|
|
|
|
Note over Page: collect dataset
|
|
|
|
Page->>Worker: transfer dataset
|
|
|
|
end
|
|
|
|
deactivate Page
|
|
|
|
activate Worker
|
|
|
|
Note over Worker: generate workbook
|
|
|
|
Note over Worker: create file
|
|
|
|
Note over Worker: setup download
|
|
|
|
Worker->>Page: URL to download
|
|
|
|
deactivate Worker
|
|
|
|
activate Page
|
|
|
|
Page->>User: download workbook
|
|
|
|
deactivate Page
|
|
|
|
```
|
|
|
|
|
|
|
|
</td></tr></tbody></table>
|
|
|
|
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
:::note Browser Compatibility
|
|
|
|
|
|
|
|
IE10+ and modern browsers support basic Web Workers. Some APIs like `fetch` were
|
2023-09-02 09:26:57 +00:00
|
|
|
added later. Feature testing is strongly recommended.
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
:::
|
|
|
|
|
2022-10-31 07:26:13 +00:00
|
|
|
:::info Inline Workers
|
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
Due to limitations of the live code blocks, all of the workers in this section
|
2022-10-31 07:26:13 +00:00
|
|
|
are in-line. The code is embedded in template literals. For production sites,
|
|
|
|
typically workers are written in separate JS files.
|
|
|
|
|
2024-04-08 04:47:04 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Example</b> (click to show)</summary>
|
2022-10-31 07:26:13 +00:00
|
|
|
|
|
|
|
For example, an in-line worker like
|
|
|
|
|
2023-04-29 11:21:37 +00:00
|
|
|
<CodeBlock language="js">{`\
|
|
|
|
const worker = new Worker(URL.createObjectURL(new Blob([\`\\
|
2022-10-31 07:26:13 +00:00
|
|
|
/* load standalone script from CDN */
|
2023-04-29 11:21:37 +00:00
|
|
|
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
|
|
|
|
\n\
|
2022-10-31 07:26:13 +00:00
|
|
|
/* this callback will run once the main context sends a message */
|
|
|
|
self.addEventListener('message', (e) => {
|
|
|
|
/* Pass the version string back */
|
|
|
|
postMessage({ version: XLSX.version });
|
|
|
|
}, false);
|
2023-04-29 11:21:37 +00:00
|
|
|
\`])));`}
|
|
|
|
</CodeBlock>
|
2022-10-31 07:26:13 +00:00
|
|
|
|
|
|
|
would typically be stored in a separate JS file like "worker.js":
|
|
|
|
|
2023-04-29 11:21:37 +00:00
|
|
|
<CodeBlock language="js" title="worker.js">{`\
|
2022-10-31 07:26:13 +00:00
|
|
|
/* load standalone script from CDN */
|
2023-04-29 11:21:37 +00:00
|
|
|
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
|
|
|
|
\n\
|
2022-10-31 07:26:13 +00:00
|
|
|
/* this callback will run once the main context sends a message */
|
|
|
|
self.addEventListener('message', (e) => {
|
|
|
|
/* Pass the version string back */
|
|
|
|
postMessage({ version: XLSX.version });
|
2023-04-29 11:21:37 +00:00
|
|
|
}, false);`}
|
|
|
|
</CodeBlock>
|
2022-10-31 07:26:13 +00:00
|
|
|
|
2023-04-29 11:21:37 +00:00
|
|
|
and the main script would pass a URL to the `Worker` constructor:
|
2022-10-31 07:26:13 +00:00
|
|
|
|
|
|
|
```js
|
2023-04-29 11:21:37 +00:00
|
|
|
const worker = new Worker("./worker.js");
|
2022-10-31 07:26:13 +00:00
|
|
|
```
|
|
|
|
|
|
|
|
</details>
|
|
|
|
|
|
|
|
:::
|
|
|
|
|
2022-10-19 10:05:59 +00:00
|
|
|
## Installation
|
|
|
|
|
2023-09-22 06:32:55 +00:00
|
|
|
In all cases, `importScripts` in a Worker can load the
|
|
|
|
[SheetJS Standalone scripts](/docs/getting-started/installation/standalone)
|
2022-10-19 10:05:59 +00:00
|
|
|
|
2023-04-29 11:21:37 +00:00
|
|
|
<CodeBlock language="js">{`\
|
|
|
|
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");`}
|
|
|
|
</CodeBlock>
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
For production use, it is highly encouraged to download and host the script.
|
|
|
|
|
2024-04-08 04:47:04 +00:00
|
|
|
<details open>
|
|
|
|
<summary><b>ECMAScript Module Support</b> (click to hide)</summary>
|
2022-10-21 00:10:10 +00:00
|
|
|
|
|
|
|
:::note Browser Compatibility
|
|
|
|
|
|
|
|
ESM is supported in Web Workers in the Chromium family of browsers (including
|
2022-10-31 00:58:49 +00:00
|
|
|
Chrome and Edge) as well as in browsers powered by WebKit (including Safari).
|
2022-10-21 00:10:10 +00:00
|
|
|
|
2023-09-02 09:26:57 +00:00
|
|
|
For legacy browsers including Firefox and IE, `importScripts` should be used.
|
2022-10-21 00:10:10 +00:00
|
|
|
|
|
|
|
:::
|
|
|
|
|
2023-02-26 11:38:03 +00:00
|
|
|
Browser ESM imports require a complete URL including the `.mjs` extension:
|
|
|
|
|
2023-04-29 11:21:37 +00:00
|
|
|
<CodeBlock language="js">{`\
|
|
|
|
import * as XLSX from "https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs";`}
|
|
|
|
</CodeBlock>
|
2022-10-21 00:10:10 +00:00
|
|
|
|
2023-02-26 11:38:03 +00:00
|
|
|
When using Worker ESM, the Worker constructor must set the `type` option:
|
|
|
|
|
|
|
|
```js
|
|
|
|
const worker = new Worker(
|
|
|
|
url_to_worker_script,
|
|
|
|
// highlight-next-line
|
|
|
|
{ type: "module" } // second argument to Worker constructor
|
|
|
|
);
|
|
|
|
```
|
|
|
|
|
|
|
|
Inline workers additionally require the Blob MIME type `text/javascript`:
|
2022-10-21 00:10:10 +00:00
|
|
|
|
2023-05-01 01:27:02 +00:00
|
|
|
<CodeBlock language="js">{`\
|
|
|
|
const worker_code = \`\\
|
2022-10-21 00:10:10 +00:00
|
|
|
/* load standalone script from CDN */
|
2023-05-01 01:27:02 +00:00
|
|
|
import * as XLSX from "https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs";
|
2022-10-21 00:10:10 +00:00
|
|
|
// ... do something with XLSX here ...
|
2023-05-01 01:27:02 +00:00
|
|
|
\`;
|
2022-10-21 00:10:10 +00:00
|
|
|
const worker = new Worker(
|
|
|
|
URL.createObjectURL(
|
|
|
|
new Blob(
|
|
|
|
[ worker_code ],
|
|
|
|
// highlight-next-line
|
|
|
|
{ type: "text/javascript" } // second argument to the Blob constructor
|
|
|
|
)
|
|
|
|
),
|
|
|
|
// highlight-next-line
|
2023-02-26 11:38:03 +00:00
|
|
|
{ type: "module" } // second argument to Worker constructor
|
2023-05-01 01:27:02 +00:00
|
|
|
);`}
|
|
|
|
</CodeBlock>
|
2022-10-21 00:10:10 +00:00
|
|
|
|
|
|
|
</details>
|
|
|
|
|
2023-02-26 11:38:03 +00:00
|
|
|
## Live Demos
|
|
|
|
|
2024-03-12 06:47:52 +00:00
|
|
|
:::note Tested Deployments
|
2023-05-23 06:28:14 +00:00
|
|
|
|
|
|
|
Each browser demo was tested in the following environments:
|
|
|
|
|
|
|
|
| Browser | Date | Comments |
|
|
|
|
|:------------|:-----------|:----------------------------------------|
|
2025-01-06 02:51:20 +00:00
|
|
|
| Chrome 131 | 2024-12-31 | |
|
|
|
|
| Edge 131 | 2024-12-31 | |
|
|
|
|
| Safari 17.5 | 2024-12-31 | File System Access API is not supported |
|
|
|
|
| Brave 1.63 | 2024-12-31 | File System Access API is not supported |
|
|
|
|
| Firefox 133 | 2024-12-31 | File System Access API is not supported |
|
2023-05-23 06:28:14 +00:00
|
|
|
|
|
|
|
:::
|
|
|
|
|
2023-02-26 11:38:03 +00:00
|
|
|
### Downloading a Remote File
|
2022-10-19 10:05:59 +00:00
|
|
|
|
2022-10-21 00:10:10 +00:00
|
|
|
:::note fetch in Web Workers
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
`fetch` was enabled in Web Workers in Chrome 42 and Safari 10.3
|
|
|
|
|
|
|
|
:::
|
|
|
|
|
|
|
|
Typically the Web Worker performs the `fetch` operation, processes the workbook,
|
2022-12-28 11:56:52 +00:00
|
|
|
and sends a final result (HTML table or raw data) to the main browser context:
|
|
|
|
|
|
|
|
```mermaid
|
|
|
|
sequenceDiagram
|
|
|
|
autonumber
|
|
|
|
actor User
|
|
|
|
participant Page
|
|
|
|
participant Worker
|
|
|
|
User->>Page: click button
|
|
|
|
activate Page
|
|
|
|
Page->>Worker: send URL
|
|
|
|
deactivate Page
|
|
|
|
activate Worker
|
|
|
|
Note over Worker: fetch file
|
|
|
|
Note over Worker: parse file
|
|
|
|
Note over Worker: generate table
|
|
|
|
Worker->>Page: HTML table
|
|
|
|
deactivate Worker
|
|
|
|
activate Page
|
|
|
|
Note over Page: add to DOM
|
|
|
|
Page->>User: table is visible
|
|
|
|
deactivate Page
|
|
|
|
```
|
|
|
|
|
2024-04-08 04:47:04 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Live Demo</b> (click to show)</summary>
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
In the following example, the script:
|
|
|
|
|
2024-04-26 04:16:13 +00:00
|
|
|
- downloads https://docs.sheetjs.com/pres.numbers in a Web Worker
|
2022-10-19 10:05:59 +00:00
|
|
|
- loads the SheetJS library and parses the file in the Worker
|
|
|
|
- generates an HTML string of the first table in the Worker
|
|
|
|
- sends the string to the main browser context
|
|
|
|
- adds the HTML to the page in the main browser context
|
|
|
|
|
2023-05-01 01:27:02 +00:00
|
|
|
<CodeBlock language="jsx" live>{`\
|
2022-10-19 10:05:59 +00:00
|
|
|
function SheetJSFetchDLWorker() {
|
2022-10-21 00:10:10 +00:00
|
|
|
const [__html, setHTML] = React.useState("");
|
2023-05-01 01:27:02 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
return ( <>
|
|
|
|
<button onClick={() => {
|
|
|
|
/* this mantra embeds the worker source in the function */
|
2023-05-01 01:27:02 +00:00
|
|
|
const worker = new Worker(URL.createObjectURL(new Blob([\`\\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* load standalone script from CDN */
|
2023-05-01 01:27:02 +00:00
|
|
|
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
|
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* this callback will run once the main context sends a message */
|
|
|
|
self.addEventListener('message', async(e) => {
|
|
|
|
try {
|
|
|
|
/* Fetch file */
|
2024-04-26 04:16:13 +00:00
|
|
|
const res = await fetch("https://docs.sheetjs.com/pres.numbers");
|
2022-10-19 10:05:59 +00:00
|
|
|
const ab = await res.arrayBuffer();
|
2023-05-01 01:27:02 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* Parse file */
|
2022-10-31 00:58:49 +00:00
|
|
|
const wb = XLSX.read(ab, {dense: true});
|
2022-10-19 10:05:59 +00:00
|
|
|
const ws = wb.Sheets[wb.SheetNames[0]];
|
2023-05-01 01:27:02 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* Generate HTML */
|
|
|
|
const html = XLSX.utils.sheet_to_html(ws);
|
2023-05-01 01:27:02 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* Reply with result */
|
2022-10-21 00:10:10 +00:00
|
|
|
postMessage({ html });
|
2022-10-19 10:05:59 +00:00
|
|
|
} catch(e) {
|
|
|
|
/* Pass the error message back */
|
|
|
|
postMessage({html: String(e.message || e).bold() });
|
|
|
|
}
|
|
|
|
}, false);
|
2023-05-01 01:27:02 +00:00
|
|
|
\`])));
|
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* when the worker sends back the HTML, add it to the DOM */
|
|
|
|
worker.onmessage = function(e) { setHTML(e.data.html); };
|
|
|
|
/* post a message to the worker */
|
|
|
|
worker.postMessage({});
|
|
|
|
}}><b>Click to Start</b></button>
|
2022-10-21 00:10:10 +00:00
|
|
|
<div dangerouslySetInnerHTML={{ __html }}/>
|
2022-10-19 10:05:59 +00:00
|
|
|
</> );
|
2023-05-01 01:27:02 +00:00
|
|
|
}`}
|
|
|
|
</CodeBlock>
|
2022-10-19 10:05:59 +00:00
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
</details>
|
|
|
|
|
2023-02-26 11:38:03 +00:00
|
|
|
### Creating a Local File
|
2022-10-19 10:05:59 +00:00
|
|
|
|
2022-10-21 00:10:10 +00:00
|
|
|
:::caution Writing files from Web Workers
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
`XLSX.writeFile` will not work in Web Workers! Raw file data can be passed from
|
|
|
|
the Web Worker to the main browser context for downloading.
|
|
|
|
|
|
|
|
:::
|
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
Typically the Web Worker receives an array of JS objects, generates a workbook,
|
|
|
|
and sends a URL to the main browser context for download:
|
|
|
|
|
|
|
|
```mermaid
|
|
|
|
sequenceDiagram
|
|
|
|
autonumber
|
|
|
|
actor User
|
|
|
|
participant Page
|
|
|
|
participant Worker
|
|
|
|
User->>Page: click button
|
|
|
|
activate Page
|
|
|
|
Note over Page: collect dataset
|
|
|
|
Page->>Worker: transfer dataset
|
|
|
|
deactivate Page
|
|
|
|
activate Worker
|
|
|
|
Note over Worker: generate workbook
|
|
|
|
Note over Worker: create file
|
|
|
|
Note over Worker: setup download
|
|
|
|
Worker->>Page: URL to download
|
|
|
|
deactivate Worker
|
|
|
|
activate Page
|
|
|
|
Page->>User: download workbook
|
|
|
|
deactivate Page
|
|
|
|
```
|
|
|
|
|
2024-04-08 04:47:04 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Live Demo</b> (click to show)</summary>
|
2022-12-28 11:56:52 +00:00
|
|
|
|
2022-10-19 10:05:59 +00:00
|
|
|
In the following example, the script:
|
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
- sends a dataset (array of JS objects) to the Web Worker
|
2022-10-19 10:05:59 +00:00
|
|
|
- generates a workbook object in the Web Worker
|
|
|
|
- generates a XLSB file using `XLSX.write` in the Web Worker
|
2022-10-19 21:12:12 +00:00
|
|
|
- generates an object URL in the Web Worker
|
|
|
|
- sends the object URL to the main browser context
|
2022-10-19 10:05:59 +00:00
|
|
|
- performs a download action in the main browser context
|
|
|
|
|
2023-05-03 03:40:40 +00:00
|
|
|
<CodeBlock language="jsx" live>{`\
|
2022-10-19 10:05:59 +00:00
|
|
|
function SheetJSWriteFileWorker() {
|
2022-10-21 00:10:10 +00:00
|
|
|
const [__html, setHTML] = React.useState("");
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-12-28 11:56:52 +00:00
|
|
|
const data = [
|
|
|
|
{ "SheetJS": "வணக்கம்", "in": "สวัสดี", "Web": "你好", "Workers": "가지마" },
|
|
|
|
{ "SheetJS": 1, "in": 2, "Web": 3, "Workers": 4 },
|
|
|
|
];
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
return ( <>
|
|
|
|
<button onClick={() => { setHTML("");
|
|
|
|
/* this mantra embeds the worker source in the function */
|
2023-05-03 03:40:40 +00:00
|
|
|
const worker = new Worker(URL.createObjectURL(new Blob([\`\\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* load standalone script from CDN */
|
2023-05-03 03:40:40 +00:00
|
|
|
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
|
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* this callback will run once the main context sends a message */
|
|
|
|
self.addEventListener('message', async(e) => {
|
|
|
|
try {
|
2022-12-28 11:56:52 +00:00
|
|
|
/* Create a new workbook from the data */
|
|
|
|
const ws = XLSX.utils.json_to_sheet(e.data.data);
|
|
|
|
const wb = XLSX.utils.book_new();
|
|
|
|
XLSX.utils.book_append_sheet(wb, ws, "Data");
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 21:12:12 +00:00
|
|
|
/* Write XLSB data (Uint8Array) */
|
2022-10-19 10:05:59 +00:00
|
|
|
const u8 = XLSX.write(wb, { bookType: "xlsb", type: "buffer" });
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 21:12:12 +00:00
|
|
|
/* Generate URL */
|
|
|
|
const url = URL.createObjectURL(new Blob([u8]));
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* Reply with result */
|
2022-10-19 21:12:12 +00:00
|
|
|
postMessage({ url });
|
2022-10-19 10:05:59 +00:00
|
|
|
} catch(e) {
|
|
|
|
/* Pass the error message back */
|
|
|
|
postMessage({error: String(e.message || e).bold() });
|
|
|
|
}
|
|
|
|
}, false);
|
2023-05-03 03:40:40 +00:00
|
|
|
\`])));
|
2022-10-19 10:05:59 +00:00
|
|
|
/* when the worker sends back the data, create a download */
|
|
|
|
worker.onmessage = function(e) {
|
|
|
|
if(e.data.error) return setHTML(e.data.error);
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* this mantra is the standard HTML5 download attribute technique */
|
|
|
|
const a = document.createElement("a");
|
|
|
|
a.download = "SheetJSWriteFileWorker.xlsb";
|
2022-10-19 21:12:12 +00:00
|
|
|
a.href = e.data.url;
|
2022-10-19 10:05:59 +00:00
|
|
|
document.body.appendChild(a);
|
|
|
|
a.click();
|
|
|
|
document.body.removeChild(a);
|
|
|
|
};
|
|
|
|
/* post a message to the worker */
|
2022-12-28 11:56:52 +00:00
|
|
|
worker.postMessage({ data });
|
2022-10-19 10:05:59 +00:00
|
|
|
}}><b>Click to Start</b></button>
|
2022-10-21 00:10:10 +00:00
|
|
|
<div dangerouslySetInnerHTML={{ __html }}/>
|
2022-10-19 10:05:59 +00:00
|
|
|
</> );
|
2023-05-03 03:40:40 +00:00
|
|
|
}`}
|
|
|
|
</CodeBlock>
|
2022-10-19 10:05:59 +00:00
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
</details>
|
|
|
|
|
2023-02-26 11:38:03 +00:00
|
|
|
### User-Submitted File
|
2022-10-19 10:05:59 +00:00
|
|
|
|
2022-10-21 00:10:10 +00:00
|
|
|
:::note FileReaderSync
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
Typically `FileReader` is used in the main browser context. In Web Workers, the
|
|
|
|
synchronous version `FileReaderSync` is more efficient.
|
|
|
|
|
|
|
|
:::
|
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
Typically the Web Worker receives a file pointer, reads and parses the file,
|
|
|
|
and sends a final result (HTML table or raw data) to the main browser context:
|
|
|
|
|
|
|
|
```mermaid
|
|
|
|
sequenceDiagram
|
|
|
|
autonumber
|
|
|
|
actor User
|
|
|
|
participant Page
|
|
|
|
participant Worker
|
2023-02-26 11:38:03 +00:00
|
|
|
User->>Page: submit file
|
2022-12-28 11:56:52 +00:00
|
|
|
activate Page
|
2023-02-26 11:38:03 +00:00
|
|
|
Page->>Worker: send pointer
|
2022-12-28 11:56:52 +00:00
|
|
|
deactivate Page
|
|
|
|
activate Worker
|
|
|
|
Note over Worker: fetch file
|
|
|
|
Note over Worker: parse file
|
|
|
|
Note over Worker: generate table
|
|
|
|
Worker->>Page: HTML table
|
|
|
|
deactivate Worker
|
|
|
|
activate Page
|
|
|
|
Note over Page: add to DOM
|
|
|
|
Page->>User: table is visible
|
|
|
|
deactivate Page
|
|
|
|
```
|
|
|
|
|
2024-04-08 04:47:04 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Live Demo</b> (click to show)</summary>
|
2022-12-28 11:56:52 +00:00
|
|
|
|
2022-10-21 00:10:10 +00:00
|
|
|
In the following example, when a file is dropped over the DIV or when the INPUT
|
|
|
|
element is used to select a file, the script:
|
2022-10-19 10:05:59 +00:00
|
|
|
|
|
|
|
- sends the `File` object to the Web Worker
|
|
|
|
- loads the SheetJS library and parses the file in the Worker
|
|
|
|
- generates an HTML string of the first table in the Worker
|
|
|
|
- sends the string to the main browser context
|
|
|
|
- adds the HTML to the page in the main browser context
|
|
|
|
|
2023-05-03 03:40:40 +00:00
|
|
|
<CodeBlock language="jsx" live>{`\
|
2022-10-19 10:05:59 +00:00
|
|
|
function SheetJSDragDropWorker() {
|
2022-10-21 00:10:10 +00:00
|
|
|
const [__html, setHTML] = React.useState("");
|
2022-10-19 21:12:12 +00:00
|
|
|
/* suppress default behavior for drag and drop */
|
2022-10-19 10:05:59 +00:00
|
|
|
function suppress(e) { e.stopPropagation(); e.preventDefault(); }
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-21 00:10:10 +00:00
|
|
|
/* this worker is shared between drag-drop and file input element */
|
2023-05-03 03:40:40 +00:00
|
|
|
const worker = new Worker(URL.createObjectURL(new Blob([\`\\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* load standalone script from CDN */
|
2023-05-03 03:40:40 +00:00
|
|
|
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
|
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* this callback will run once the main context sends a message */
|
2022-10-19 21:12:12 +00:00
|
|
|
self.addEventListener('message', (e) => {
|
2022-10-19 10:05:59 +00:00
|
|
|
try {
|
|
|
|
/* Read file data */
|
|
|
|
const ab = new FileReaderSync().readAsArrayBuffer(e.data.file);
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* Parse file */
|
2022-10-31 00:58:49 +00:00
|
|
|
const wb = XLSX.read(ab, {dense: true});
|
2022-10-19 10:05:59 +00:00
|
|
|
const ws = wb.Sheets[wb.SheetNames[0]];
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* Generate HTML */
|
|
|
|
const html = XLSX.utils.sheet_to_html(ws);
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-19 10:05:59 +00:00
|
|
|
/* Reply with result */
|
2022-10-21 00:10:10 +00:00
|
|
|
postMessage({ html });
|
2022-10-19 10:05:59 +00:00
|
|
|
} catch(e) {
|
|
|
|
/* Pass the error message back */
|
|
|
|
postMessage({html: String(e.message || e).bold() });
|
|
|
|
}
|
|
|
|
}, false);
|
2023-05-03 03:40:40 +00:00
|
|
|
\`])));
|
2022-10-21 00:10:10 +00:00
|
|
|
/* when the worker sends back the HTML, add it to the DOM */
|
|
|
|
worker.onmessage = function(e) { setHTML(e.data.html); };
|
|
|
|
return ( <>
|
|
|
|
<div onDragOver={suppress} onDragEnter={suppress} onDrop={(e) => {
|
|
|
|
suppress(e);
|
2022-10-19 10:05:59 +00:00
|
|
|
/* post a message with the first File to the worker */
|
|
|
|
worker.postMessage({ file: e.dataTransfer.files[0] });
|
2022-10-21 00:10:10 +00:00
|
|
|
}}>Drag a file to this DIV to process! (or use the file input)</div>
|
|
|
|
<input type="file" onChange={(e) => {
|
|
|
|
suppress(e);
|
|
|
|
/* post a message with the first File to the worker */
|
|
|
|
worker.postMessage({ file: e.target.files[0] });
|
|
|
|
}}/>
|
|
|
|
<div dangerouslySetInnerHTML={{ __html }}/>
|
2022-10-19 10:05:59 +00:00
|
|
|
</> );
|
2023-05-03 03:40:40 +00:00
|
|
|
}`}
|
|
|
|
</CodeBlock>
|
2022-10-31 00:58:49 +00:00
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
</details>
|
|
|
|
|
2023-02-26 11:38:03 +00:00
|
|
|
### Streaming Write
|
2022-10-31 00:58:49 +00:00
|
|
|
|
2024-07-18 22:19:02 +00:00
|
|
|
The ["Stream Export"](/docs/api/stream) section covers the streaming write
|
|
|
|
methods in more detail.
|
2022-10-31 00:58:49 +00:00
|
|
|
|
2024-07-18 22:19:02 +00:00
|
|
|
The ["Large Datasets"](/docs/demos/bigdata/stream#browser) demo includes browser
|
|
|
|
live examples.
|
2022-12-28 11:56:52 +00:00
|
|
|
|
2022-10-31 00:58:49 +00:00
|
|
|
#### File System Access API
|
|
|
|
|
2024-04-14 07:40:38 +00:00
|
|
|
:::danger Browser Compatibility
|
2022-10-31 00:58:49 +00:00
|
|
|
|
|
|
|
At the time of writing, the File System Access API is only available in Chromium
|
|
|
|
and Chromium-based browsers like Chrome and Edge.
|
|
|
|
|
|
|
|
:::
|
|
|
|
|
2022-10-31 07:26:13 +00:00
|
|
|
:::caution Performance
|
|
|
|
|
|
|
|
In local testing, committing each CSV row as it is generated is significantly
|
|
|
|
slower than accumulating and writing once at the end.
|
|
|
|
|
|
|
|
When the target CSV is known to be less than 500MB, it is preferable to batch.
|
2023-04-19 20:03:23 +00:00
|
|
|
Larger files may hit browser length limits.
|
2022-10-31 07:26:13 +00:00
|
|
|
|
|
|
|
:::
|
|
|
|
|
2022-12-28 11:56:52 +00:00
|
|
|
```mermaid
|
|
|
|
sequenceDiagram
|
|
|
|
autonumber
|
|
|
|
actor User
|
|
|
|
participant Page
|
|
|
|
participant Worker
|
|
|
|
User->>Page: click button
|
|
|
|
activate Page
|
|
|
|
Page->>User: seek permission
|
|
|
|
User->>Page: grant permission
|
|
|
|
Note over Page: collect dataset
|
|
|
|
Page->>Worker: transfer dataset
|
|
|
|
deactivate Page
|
|
|
|
activate Worker
|
|
|
|
Note over Worker: setup stream
|
|
|
|
Worker->>User: start download
|
|
|
|
loop every 100 rows
|
|
|
|
Note over Worker: generate rows
|
|
|
|
Worker->>User: write rows to file (without freezing)
|
|
|
|
Worker->>Page: progress message
|
|
|
|
activate Page
|
|
|
|
Page->>User: display progress
|
|
|
|
deactivate Page
|
|
|
|
end
|
|
|
|
Worker->>User: finish download
|
2023-04-03 09:09:59 +00:00
|
|
|
Worker->>Page: send completion message
|
2022-12-28 11:56:52 +00:00
|
|
|
deactivate Worker
|
|
|
|
activate Page
|
|
|
|
Page->>User: download complete
|
|
|
|
deactivate Page
|
|
|
|
```
|
|
|
|
|
|
|
|
|
2024-04-08 04:47:04 +00:00
|
|
|
<details>
|
|
|
|
<summary><b>Live Demo</b> (click to show)</summary>
|
2022-12-28 11:56:52 +00:00
|
|
|
|
2022-10-31 00:58:49 +00:00
|
|
|
The following live demo fetches and parses a file in a Web Worker. The script:
|
|
|
|
|
|
|
|
- prompts user to save file (`window.showSaveFilePicker` in the main thread)
|
|
|
|
- passes the URL and the file object to the Web Worker
|
|
|
|
- loads the SheetJS library in the Web Worker
|
|
|
|
- fetches the requested URL and parses the workbook from the Worker
|
|
|
|
- creates a Writable Stream from the file object.
|
|
|
|
- uses `XLSX.stream.to_csv` to generate CSV rows of the first worksheet
|
2022-10-31 07:26:13 +00:00
|
|
|
+ every 100th row, a progress message is sent back to the main thread
|
2022-10-31 00:58:49 +00:00
|
|
|
+ at the end, a completion message is sent back to the main thread
|
|
|
|
|
2022-10-31 07:26:13 +00:00
|
|
|
The demo has a checkbox. If it is not checked (default), the Worker will
|
|
|
|
collect each CSV row and write once at the end. If it is checked, the Worker
|
|
|
|
will try to commit each row as it is generated.
|
|
|
|
|
|
|
|
The demo also has a URL input box. Feel free to change the URL. For example:
|
2022-10-31 00:58:49 +00:00
|
|
|
|
|
|
|
`https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls`
|
2022-10-31 07:26:13 +00:00
|
|
|
is an XLS file over 50 MB. The generated CSV file is about 55 MB.
|
2022-10-31 00:58:49 +00:00
|
|
|
|
|
|
|
`https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx`
|
2022-10-31 07:26:13 +00:00
|
|
|
is an XLSX file with 300000 rows (approximately 20 MB) yielding a CSV of 10 MB.
|
2022-10-31 00:58:49 +00:00
|
|
|
|
2023-05-03 03:40:40 +00:00
|
|
|
<CodeBlock language="jsx" live>{`\
|
2022-10-31 00:58:49 +00:00
|
|
|
function SheetJSFetchCSVStreamFile() {
|
2022-12-01 01:13:00 +00:00
|
|
|
const [state, setState] = React.useState("");
|
|
|
|
const [__html, setHTML] = React.useState("");
|
2022-10-31 00:58:49 +00:00
|
|
|
const [cnt, setCnt] = React.useState(0);
|
2022-10-31 07:26:13 +00:00
|
|
|
const [hz, setHz] = React.useState(0);
|
2023-06-05 20:12:53 +00:00
|
|
|
const [url, setUrl] = React.useState("https://docs.sheetjs.com/test_files/large_strings.xlsx");
|
2022-10-31 07:26:13 +00:00
|
|
|
const ref = React.useRef(null);
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-31 00:58:49 +00:00
|
|
|
return ( <>
|
2022-10-31 07:26:13 +00:00
|
|
|
<b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/><br/>
|
|
|
|
<b>Commit each row: </b><input type="checkbox" ref={ref}/><br/>
|
2022-10-31 00:58:49 +00:00
|
|
|
<button onClick={async() => {
|
|
|
|
/* this mantra embeds the worker source in the function */
|
2023-05-03 03:40:40 +00:00
|
|
|
const worker = new Worker(URL.createObjectURL(new Blob([\`\\
|
2022-10-31 00:58:49 +00:00
|
|
|
/* load standalone script from CDN */
|
2023-05-03 03:40:40 +00:00
|
|
|
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
|
|
|
|
\n\
|
2022-10-31 00:58:49 +00:00
|
|
|
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
|
|
|
|
XLSX.stream.set_readable(() => ({
|
|
|
|
__done: false,
|
|
|
|
// this function will be assigned by the SheetJS stream methods
|
|
|
|
_read: function() { this.__done = true; },
|
|
|
|
// this function is called by the stream methods
|
2022-10-31 07:26:13 +00:00
|
|
|
push: function(d) {
|
|
|
|
if(!this.__done) cb(d);
|
|
|
|
if(d == null) this.__done = true; },
|
|
|
|
resume: function pump() {
|
|
|
|
for(var i = 0; i < batch && !this.__done; ++i) this._read();
|
|
|
|
if(!this.__done) setTimeout(pump.bind(this), 0); }
|
2022-10-31 00:58:49 +00:00
|
|
|
}));
|
|
|
|
return XLSX.stream.to_csv(ws, opts);
|
|
|
|
}
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-31 00:58:49 +00:00
|
|
|
/* this callback will run once the main context sends a message */
|
|
|
|
self.addEventListener('message', async(e) => {
|
|
|
|
try {
|
|
|
|
/* Fetch file */
|
2022-10-31 07:26:13 +00:00
|
|
|
postMessage({state: "fetching"});
|
|
|
|
var t = Date.now();
|
2022-10-31 00:58:49 +00:00
|
|
|
const res = await fetch(e.data.url);
|
|
|
|
const ab = await res.arrayBuffer();
|
2022-10-31 07:26:13 +00:00
|
|
|
postMessage({time: "fetch", ts: Date.now() - t});
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-31 00:58:49 +00:00
|
|
|
/* Parse file */
|
|
|
|
postMessage({state: "parsing"});
|
2022-10-31 07:26:13 +00:00
|
|
|
t = Date.now();
|
2022-10-31 00:58:49 +00:00
|
|
|
const wb = XLSX.read(ab, {dense: true});
|
|
|
|
const ws = wb.Sheets[wb.SheetNames[0]];
|
2022-10-31 07:26:13 +00:00
|
|
|
postMessage({time: "parse", ts: Date.now() - t});
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-31 00:58:49 +00:00
|
|
|
/* Generate CSV rows */
|
|
|
|
postMessage({state: "begin"});
|
2022-10-31 07:26:13 +00:00
|
|
|
t = Date.now();
|
2022-10-31 00:58:49 +00:00
|
|
|
const wstream = await e.data.wFile.createWritable();
|
2022-10-31 07:26:13 +00:00
|
|
|
let c = 0, buf = "", each = !!e.data.each;
|
2022-10-31 00:58:49 +00:00
|
|
|
const strm = sheet_to_csv_cb(ws, async(csv) => {
|
|
|
|
if(csv != null) {
|
2022-10-31 07:26:13 +00:00
|
|
|
if(each) await wstream.write(csv);
|
|
|
|
else buf += csv;
|
|
|
|
if(!(++c % 100)) postMessage({ state: "writing", c, ts: Date.now() - t });
|
2022-10-31 00:58:49 +00:00
|
|
|
} else {
|
2022-10-31 07:26:13 +00:00
|
|
|
if(buf) await wstream.write(buf);
|
2022-10-31 00:58:49 +00:00
|
|
|
await wstream.close();
|
2022-10-31 07:26:13 +00:00
|
|
|
postMessage({state: "done", c, ts: Date.now() - t });
|
2022-10-31 00:58:49 +00:00
|
|
|
}
|
|
|
|
});
|
|
|
|
strm.resume();
|
|
|
|
} catch(e) {
|
|
|
|
/* Pass the error message back */
|
|
|
|
postMessage({error: String(e.message || e) });
|
|
|
|
}
|
|
|
|
}, false);
|
2023-05-03 03:40:40 +00:00
|
|
|
\`])));
|
2022-10-31 00:58:49 +00:00
|
|
|
/* when the worker sends back data, add it to the DOM */
|
2023-05-03 03:40:40 +00:00
|
|
|
const log = (s, t) => setHTML(h => h + \`\${s}: \${(t/1000).toFixed(3).padStart(8)} sec\\n\`);
|
2022-10-31 00:58:49 +00:00
|
|
|
worker.onmessage = function(e) {
|
2023-05-03 03:40:40 +00:00
|
|
|
if(e.data.error) return setState(\`Processing Error: \${e.data.error}\`);
|
2022-10-31 00:58:49 +00:00
|
|
|
else if(e.data.state) {
|
|
|
|
setState(e.data.state);
|
2022-10-31 07:26:13 +00:00
|
|
|
if(e.data.c) setCnt(e.data.c);
|
|
|
|
if(e.data.ts) setHz((e.data.c || cnt) * 1000 / e.data.ts);
|
|
|
|
if(e.data.state == "done") log("write", e.data.ts);
|
|
|
|
} else if(e.data.time) log(e.data.time, e.data.ts);
|
2022-10-31 00:58:49 +00:00
|
|
|
};
|
2022-10-31 07:26:13 +00:00
|
|
|
setCnt(0); setHz(0); setState(""); setHTML("");
|
2023-09-02 09:26:57 +00:00
|
|
|
if(!window.showSaveFilePicker) setState("Browser missing API support!");
|
|
|
|
else try {
|
2022-10-31 07:26:13 +00:00
|
|
|
/* Show picker and get handle to file */
|
|
|
|
const wFile = await window.showSaveFilePicker({
|
|
|
|
suggestedName: "SheetJSStream.csv",
|
|
|
|
types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
|
|
|
|
});
|
2023-05-03 03:40:40 +00:00
|
|
|
\n\
|
2022-10-31 07:26:13 +00:00
|
|
|
/* post a message to the worker with the URL to fetch */
|
|
|
|
worker.postMessage({url, wFile, each: !!ref.current.checked});
|
2023-05-03 03:40:40 +00:00
|
|
|
} catch(e) { setState(\`Selection Error: \${e && e.message || e}\`); }
|
2022-10-31 00:58:49 +00:00
|
|
|
}}><b>Click to Start</b></button>
|
2022-10-31 07:26:13 +00:00
|
|
|
<pre>State: <b>{state}</b><br/>Count: <b>{cnt}</b> <b>({hz|0} Hz)</b></pre>
|
|
|
|
<pre dangerouslySetInnerHTML={{__html}}/>
|
2022-10-31 00:58:49 +00:00
|
|
|
</> );
|
2023-05-03 03:40:40 +00:00
|
|
|
}`}
|
|
|
|
</CodeBlock>
|
2022-12-28 11:56:52 +00:00
|
|
|
|
|
|
|
</details>
|