docs.sheetjs.com/docz/docs/03-demos/07-worker.md
2022-10-20 20:10:10 -04:00

8.1 KiB

title
Web Workers

Parsing and writing large spreadsheets takes time. During the process, if the SheetJS library is running in the web browser, the website may freeze.

Workers provide a way to off-load the hard work so that the website does not freeze during processing.

:::note Browser Compatibility

IE10+ and modern browsers support basic Web Workers. Some APIs like fetch were added later. Feature testing is highly recommended.

:::

Installation

In all cases, importScripts can load the Standalone scripts

importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

For production use, it is highly encouraged to download and host the script.

ECMAScript Module Support (click to show)

:::note Browser Compatibility

ESM is supported in Web Workers in the Chromium family of browsers (including Chrome and Edge) as well as in Webkit-based browsers (including Safari).

For support in legacy browsers like Firefox, importScripts should be used.

:::

import * as XLSX from "https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs";

When using modules, the script must be served with the correct MIME type and the Worker constructor must set the type option:

const worker_code = `\
/* load standalone script from CDN */
import * as XLSX from "https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs";
// ... do something with XLSX here ...
`;
const worker = new Worker(
  URL.createObjectURL(
    new Blob(
      [ worker_code ],
      // highlight-next-line
      { type: "text/javascript" } // second argument to the Blob constructor
    )
  ),
  // highlight-next-line
  {type: "module"} // second argument to Worker constructor
);

Downloading a Remote File

:::note fetch in Web Workers

fetch was enabled in Web Workers in Chrome 42 and Safari 10.3

:::

Typically the Web Worker performs the fetch operation, processes the workbook, and sends a final result to the main browser context for processing.

In the following example, the script:

  • downloads https://sheetjs.com/pres.numbers in a Web Worker
  • loads the SheetJS library and parses the file in the Worker
  • generates an HTML string of the first table in the Worker
  • sends the string to the main browser context
  • adds the HTML to the page in the main browser context
function SheetJSFetchDLWorker() {
  const [__html, setHTML] = React.useState("");

  return ( <>
    <button onClick={() => {
      /* this mantra embeds the worker source in the function */
      const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
  try {
    /* Fetch file */
    const res = await fetch("https://sheetjs.com/pres.numbers");
    const ab = await res.arrayBuffer();

    /* Parse file */
    const wb = XLSX.read(ab);
    const ws = wb.Sheets[wb.SheetNames[0]];

    /* Generate HTML */
    const html = XLSX.utils.sheet_to_html(ws);

    /* Reply with result */
    postMessage({ html });
  } catch(e) {
    /* Pass the error message back */
    postMessage({html: String(e.message || e).bold() });
  }
}, false);
      `])));
      /* when the worker sends back the HTML, add it to the DOM */
      worker.onmessage = function(e) { setHTML(e.data.html); };
      /* post a message to the worker */
      worker.postMessage({});
    }}><b>Click to Start</b></button>
    <div dangerouslySetInnerHTML={{ __html }}/>
  </> );
}

Creating a Local File

:::caution Writing files from Web Workers

XLSX.writeFile will not work in Web Workers! Raw file data can be passed from the Web Worker to the main browser context for downloading.

:::

In the following example, the script:

  • generates a workbook object in the Web Worker
  • generates a XLSB file using XLSX.write in the Web Worker
  • generates an object URL in the Web Worker
  • sends the object URL to the main browser context
  • performs a download action in the main browser context
function SheetJSWriteFileWorker() {
  const [__html, setHTML] = React.useState("");

  return ( <>
    <button onClick={() => { setHTML("");
      /* this mantra embeds the worker source in the function */
      const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
  try {
    /* Create a new Workbook (in this case, from a CSV string) */
    const csv = \`\
SheetJS,in,Web,Workers
வணக்கம்,สวัสดี,你好,가지마
1,2,3,4\`;
    const wb = XLSX.read(csv, { type: "string" });

    /* Write XLSB data (Uint8Array) */
    const u8 = XLSX.write(wb, { bookType: "xlsb", type: "buffer" });

    /* Generate URL */
    const url = URL.createObjectURL(new Blob([u8]));

    /* Reply with result */
    postMessage({ url });
  } catch(e) {
    /* Pass the error message back */
    postMessage({error: String(e.message || e).bold() });
  }
}, false);
      `])));
      /* when the worker sends back the data, create a download */
      worker.onmessage = function(e) {
        if(e.data.error) return setHTML(e.data.error);

        /* this mantra is the standard HTML5 download attribute technique */
        const a = document.createElement("a");
        a.download = "SheetJSWriteFileWorker.xlsb";
        a.href = e.data.url;
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
      };
      /* post a message to the worker */
      worker.postMessage({});
    }}><b>Click to Start</b></button>
    <div dangerouslySetInnerHTML={{ __html }}/>
  </> );
}

User-Submitted File

:::note FileReaderSync

Typically FileReader is used in the main browser context. In Web Workers, the synchronous version FileReaderSync is more efficient.

:::

In the following example, when a file is dropped over the DIV or when the INPUT element is used to select a file, the script:

  • sends the File object to the Web Worker
  • loads the SheetJS library and parses the file in the Worker
  • generates an HTML string of the first table in the Worker
  • sends the string to the main browser context
  • adds the HTML to the page in the main browser context
function SheetJSDragDropWorker() {
  const [__html, setHTML] = React.useState("");
  /* suppress default behavior for drag and drop */
  function suppress(e) { e.stopPropagation(); e.preventDefault(); }

  /* this worker is shared between drag-drop and file input element */
  const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

/* this callback will run once the main context sends a message */
self.addEventListener('message', (e) => {
  try {
    /* Read file data */
    const ab = new FileReaderSync().readAsArrayBuffer(e.data.file);

    /* Parse file */
    const wb = XLSX.read(ab);
    const ws = wb.Sheets[wb.SheetNames[0]];

    /* Generate HTML */
    const html = XLSX.utils.sheet_to_html(ws);

    /* Reply with result */
    postMessage({ html });
  } catch(e) {
    /* Pass the error message back */
    postMessage({html: String(e.message || e).bold() });
  }
}, false);
  `])));
    /* when the worker sends back the HTML, add it to the DOM */
    worker.onmessage = function(e) { setHTML(e.data.html); };
  return ( <>
    <div onDragOver={suppress} onDragEnter={suppress} onDrop={(e) => {
      suppress(e);
      /* post a message with the first File to the worker */
      worker.postMessage({ file: e.dataTransfer.files[0] });
    }}>Drag a file to this DIV to process! (or use the file input)</div>
    <input type="file" onChange={(e) => {
      suppress(e);
      /* post a message with the first File to the worker */
      worker.postMessage({ file: e.target.files[0] });
    }}/>
    <div dangerouslySetInnerHTML={{ __html }}/>
  </> );
}