docs.sheetjs.com/docz/docs/03-demos/07-worker.md
2022-10-31 00:47:55 -04:00

12 KiB

title
Web Workers

Parsing and writing large spreadsheets takes time. During the process, if the SheetJS library is running in the web browser, the website may freeze.

Workers provide a way to off-load the hard work so that the website does not freeze during processing.

:::note Browser Compatibility

IE10+ and modern browsers support basic Web Workers. Some APIs like fetch were added later. Feature testing is highly recommended.

:::

Installation

In all cases, importScripts can load the Standalone scripts

importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

For production use, it is highly encouraged to download and host the script.

ECMAScript Module Support (click to show)

:::note Browser Compatibility

ESM is supported in Web Workers in the Chromium family of browsers (including Chrome and Edge) as well as in browsers powered by WebKit (including Safari).

For support in legacy browsers like Firefox, importScripts should be used.

:::

import * as XLSX from "https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs";

When using modules, the script must be served with the correct MIME type and the Worker constructor must set the type option:

const worker_code = `\
/* load standalone script from CDN */
import * as XLSX from "https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs";
// ... do something with XLSX here ...
`;
const worker = new Worker(
  URL.createObjectURL(
    new Blob(
      [ worker_code ],
      // highlight-next-line
      { type: "text/javascript" } // second argument to the Blob constructor
    )
  ),
  // highlight-next-line
  {type: "module"} // second argument to Worker constructor
);

Downloading a Remote File

:::note fetch in Web Workers

fetch was enabled in Web Workers in Chrome 42 and Safari 10.3

:::

Typically the Web Worker performs the fetch operation, processes the workbook, and sends a final result to the main browser context for processing.

In the following example, the script:

  • downloads https://sheetjs.com/pres.numbers in a Web Worker
  • loads the SheetJS library and parses the file in the Worker
  • generates an HTML string of the first table in the Worker
  • sends the string to the main browser context
  • adds the HTML to the page in the main browser context
function SheetJSFetchDLWorker() {
  const [__html, setHTML] = React.useState("");

  return ( <>
    <button onClick={() => {
      /* this mantra embeds the worker source in the function */
      const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
  try {
    /* Fetch file */
    const res = await fetch("https://sheetjs.com/pres.numbers");
    const ab = await res.arrayBuffer();

    /* Parse file */
    const wb = XLSX.read(ab, {dense: true});
    const ws = wb.Sheets[wb.SheetNames[0]];

    /* Generate HTML */
    const html = XLSX.utils.sheet_to_html(ws);

    /* Reply with result */
    postMessage({ html });
  } catch(e) {
    /* Pass the error message back */
    postMessage({html: String(e.message || e).bold() });
  }
}, false);
      `])));
      /* when the worker sends back the HTML, add it to the DOM */
      worker.onmessage = function(e) { setHTML(e.data.html); };
      /* post a message to the worker */
      worker.postMessage({});
    }}><b>Click to Start</b></button>
    <div dangerouslySetInnerHTML={{ __html }}/>
  </> );
}

Creating a Local File

:::caution Writing files from Web Workers

XLSX.writeFile will not work in Web Workers! Raw file data can be passed from the Web Worker to the main browser context for downloading.

:::

In the following example, the script:

  • generates a workbook object in the Web Worker
  • generates a XLSB file using XLSX.write in the Web Worker
  • generates an object URL in the Web Worker
  • sends the object URL to the main browser context
  • performs a download action in the main browser context
function SheetJSWriteFileWorker() {
  const [__html, setHTML] = React.useState("");

  return ( <>
    <button onClick={() => { setHTML("");
      /* this mantra embeds the worker source in the function */
      const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
  try {
    /* Create a new Workbook (in this case, from a CSV string) */
    const csv = \`\
SheetJS,in,Web,Workers
வணக்கம்,สวัสดี,你好,가지마
1,2,3,4\`;
    const wb = XLSX.read(csv, { type: "string", dense: true });

    /* Write XLSB data (Uint8Array) */
    const u8 = XLSX.write(wb, { bookType: "xlsb", type: "buffer" });

    /* Generate URL */
    const url = URL.createObjectURL(new Blob([u8]));

    /* Reply with result */
    postMessage({ url });
  } catch(e) {
    /* Pass the error message back */
    postMessage({error: String(e.message || e).bold() });
  }
}, false);
      `])));
      /* when the worker sends back the data, create a download */
      worker.onmessage = function(e) {
        if(e.data.error) return setHTML(e.data.error);

        /* this mantra is the standard HTML5 download attribute technique */
        const a = document.createElement("a");
        a.download = "SheetJSWriteFileWorker.xlsb";
        a.href = e.data.url;
        document.body.appendChild(a);
        a.click();
        document.body.removeChild(a);
      };
      /* post a message to the worker */
      worker.postMessage({});
    }}><b>Click to Start</b></button>
    <div dangerouslySetInnerHTML={{ __html }}/>
  </> );
}

User-Submitted File

:::note FileReaderSync

Typically FileReader is used in the main browser context. In Web Workers, the synchronous version FileReaderSync is more efficient.

:::

In the following example, when a file is dropped over the DIV or when the INPUT element is used to select a file, the script:

  • sends the File object to the Web Worker
  • loads the SheetJS library and parses the file in the Worker
  • generates an HTML string of the first table in the Worker
  • sends the string to the main browser context
  • adds the HTML to the page in the main browser context
function SheetJSDragDropWorker() {
  const [__html, setHTML] = React.useState("");
  /* suppress default behavior for drag and drop */
  function suppress(e) { e.stopPropagation(); e.preventDefault(); }

  /* this worker is shared between drag-drop and file input element */
  const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

/* this callback will run once the main context sends a message */
self.addEventListener('message', (e) => {
  try {
    /* Read file data */
    const ab = new FileReaderSync().readAsArrayBuffer(e.data.file);

    /* Parse file */
    const wb = XLSX.read(ab, {dense: true});
    const ws = wb.Sheets[wb.SheetNames[0]];

    /* Generate HTML */
    const html = XLSX.utils.sheet_to_html(ws);

    /* Reply with result */
    postMessage({ html });
  } catch(e) {
    /* Pass the error message back */
    postMessage({html: String(e.message || e).bold() });
  }
}, false);
  `])));
    /* when the worker sends back the HTML, add it to the DOM */
    worker.onmessage = function(e) { setHTML(e.data.html); };
  return ( <>
    <div onDragOver={suppress} onDragEnter={suppress} onDrop={(e) => {
      suppress(e);
      /* post a message with the first File to the worker */
      worker.postMessage({ file: e.dataTransfer.files[0] });
    }}>Drag a file to this DIV to process! (or use the file input)</div>
    <input type="file" onChange={(e) => {
      suppress(e);
      /* post a message with the first File to the worker */
      worker.postMessage({ file: e.target.files[0] });
    }}/>
    <div dangerouslySetInnerHTML={{ __html }}/>
  </> );
}

Streaming Write

A more general discussion, including row-oriented processing demos, is included in the "Large Datasets" demo.

File System Access API

:::note

At the time of writing, the File System Access API is only available in Chromium and Chromium-based browsers like Chrome and Edge.

:::

The following live demo fetches and parses a file in a Web Worker. The script:

  • prompts user to save file (window.showSaveFilePicker in the main thread)
  • passes the URL and the file object to the Web Worker
  • loads the SheetJS library in the Web Worker
  • fetches the requested URL and parses the workbook from the Worker
  • creates a Writable Stream from the file object.
  • uses XLSX.stream.to_csv to generate CSV rows of the first worksheet
    • on each row, the data is written to the file stream
    • every 10th row, a progress message is sent back to the main thread
    • at the end, a completion message is sent back to the main thread

The demo has a URL input box. Feel free to change the URL. For example,

https://raw.githubusercontent.com/SheetJS/test_files/master/large_strings.xls is an XLS file over 50 MB

https://raw.githubusercontent.com/SheetJS/libreoffice_test-files/master/calc/xlsx-import/perf/8-by-300000-cells.xlsx is an XLSX file with 300000 rows (approximately 20 MB)

function SheetJSFetchCSVStreamFile() {
  const [state, setState]  = React.useState("");
  const [cnt, setCnt] = React.useState(0);
  const [url, setUrl] = React.useState("https://oss.sheetjs.com/test_files/large_strings.xlsx");

  return ( <>
    <b>URL: </b><input type="text" value={url} onChange={(e) => setUrl(e.target.value)} size="80"/>
    <button onClick={async() => {
      /* this mantra embeds the worker source in the function */
      const worker = new Worker(URL.createObjectURL(new Blob([`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js");

function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
  XLSX.stream.set_readable(() => ({
    __done: false,
    // this function will be assigned by the SheetJS stream methods
    _read: function() { this.__done = true; },
    // this function is called by the stream methods
    push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
    resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
  }));
  return XLSX.stream.to_csv(ws, opts);
}

/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
  try {
    postMessage({state: "fetching"});
    /* Fetch file */
    const res = await fetch(e.data.url);
    const ab = await res.arrayBuffer();

    /* Parse file */
    let len = ab.byteLength;
    if(len < 1024) len += " bytes"; else { len /= 1024;
      if(len < 1024) len += " KB"; else { len /= 1024; len += " MB"; }
    }
    postMessage({state: "parsing"});
    const wb = XLSX.read(ab, {dense: true});
    const ws = wb.Sheets[wb.SheetNames[0]];

    /* Generate CSV rows */
    postMessage({state: "begin"});
    const wstream = await e.data.wFile.createWritable();
    let rows = 0;
    const strm = sheet_to_csv_cb(ws, async(csv) => {
      if(csv != null) {
        await wstream.write(csv);
        if(!(++rows % 100)) postMessage({ state: "processing", rows });
      } else {
        await wstream.close();
        postMessage({state: "done", rows });
      }
    });
    strm.resume();
  } catch(e) {
    /* Pass the error message back */
    postMessage({error: String(e.message || e) });
  }
}, false);
      `])));
      /* when the worker sends back data, add it to the DOM */
      worker.onmessage = function(e) {
        if(e.data.error) return setHTML(e.data.error);
        else if(e.data.state) {
          setState(e.data.state);
          if(e.data.rows) setCnt(e.data.rows);
        }
      };
      setCnt(0); setState("");

      /* Show picker and get handle to file */
      const wFile = await window.showSaveFilePicker({
        suggestedName: "SheetJSStream.csv",
        types: [ { description: 'csv', accept: { 'text/csv': ['.csv'] } } ]
      });

      /* post a message to the worker with the URL to fetch */
      if(wFile) worker.postMessage({url, wFile});
    }}><b>Click to Start</b></button>
    <pre>State: <b>{state}</b><br/>Number of rows: <b>{cnt}</b></pre>
  </> );
}