From 3645643606c0001040fdc6dae8e28aa29422a4f0 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Thu, 18 Jul 2024 18:19:02 -0400 Subject: [PATCH] Stream Export docs --- .../01-installation/01-standalone.mdx | 2 +- .../01-installation/03-nodejs.md | 4 +- .../01-installation/07-bun.md | 2 +- docz/docs/03-demos/37-bigdata/01-stream.md | 139 +++++--- docz/docs/03-demos/37-bigdata/02-worker.md | 7 +- docz/docs/06-solutions/05-output.md | 3 +- docz/docs/08-api/11-stream.md | 303 ++++++++++++++++++ docz/docs/08-api/index.md | 3 +- docz/docs/09-miscellany/04-testing.md | 37 ++- tests/bigdata-stream-bun.sh | 17 + tests/bigdata-stream-nodejs.sh | 2 +- tests/bundler-browserify.sh | 2 +- tests/bundler-esbuild.sh | 2 +- tests/bundler-parcel.sh | 2 +- tests/bundler-requirejs.sh | 2 +- tests/bundler-vite.sh | 2 +- tests/cli-boxednode.sh | 2 +- tests/cli-bunsea.sh | 2 +- tests/cli-nexe.sh | 2 +- tests/cli-nodesea.sh | 2 +- tests/cli-pkg.sh | 2 +- tests/dom-happydom.sh | 2 +- tests/dom-jsdom.sh | 2 +- tests/engines-jurassic.sh | 4 +- tests/engines-rhino.sh | 2 +- tests/headless-playwright.sh | 4 +- tests/headless-puppeteer-deno.sh | 2 +- tests/headless-puppeteer.sh | 4 +- tests/server-express-worker.sh | 2 +- tests/static-vite.sh | 4 +- 30 files changed, 487 insertions(+), 78 deletions(-) create mode 100644 docz/docs/08-api/11-stream.md create mode 100755 tests/bigdata-stream-bun.sh diff --git a/docz/docs/02-getting-started/01-installation/01-standalone.mdx b/docz/docs/02-getting-started/01-installation/01-standalone.mdx index 8c80b0f..58fefb3 100644 --- a/docz/docs/02-getting-started/01-installation/01-standalone.mdx +++ b/docz/docs/02-getting-started/01-installation/01-standalone.mdx @@ -51,7 +51,7 @@ reading and writing many spreadsheet formats. - CSV and SYLK encodings (directly affecting users outside of the United States) - XLSB / XLS / Lotus 1-2-3 / SpreadsheetML 2003 / Numbers file formats -- Stream utility functions +- [Stream utility functions](/docs/api/stream)
How to integrate the mini build (click to show) diff --git a/docz/docs/02-getting-started/01-installation/03-nodejs.md b/docz/docs/02-getting-started/01-installation/03-nodejs.md index 2584787..422f379 100644 --- a/docz/docs/02-getting-started/01-installation/03-nodejs.md +++ b/docz/docs/02-getting-started/01-installation/03-nodejs.md @@ -218,7 +218,7 @@ The package supports CommonJS `require` and ESM `import` module systems. ### CommonJS `require` By default, the module supports `require` and it will automatically add support -for streams and file system access: +for encodings, streams and file system access: ```js var XLSX = require("xlsx"); @@ -246,7 +246,7 @@ XLSX.set_fs(fs); #### Stream Operations The `set_readable` method accepts a `stream.Readable` instance for use in stream -methods such as `XLSX.stream.to_csv`: +methods including [`XLSX.stream.to_csv`](/docs/api/stream): ```js import * as XLSX from 'xlsx'; diff --git a/docz/docs/02-getting-started/01-installation/07-bun.md b/docz/docs/02-getting-started/01-installation/07-bun.md index 0cc5de3..9213c94 100644 --- a/docz/docs/02-getting-started/01-installation/07-bun.md +++ b/docz/docs/02-getting-started/01-installation/07-bun.md @@ -76,7 +76,7 @@ The package supports CommonJS `require` and ESM `import` module systems. ### CommonJS `require` By default, the module supports `require` and it will automatically add support -for streams and file system access: +for encodings, streams and file system access: ```js const { readFile } = require("xlsx"); diff --git a/docz/docs/03-demos/37-bigdata/01-stream.md b/docz/docs/03-demos/37-bigdata/01-stream.md index 3f405b0..fb37ba4 100644 --- a/docz/docs/03-demos/37-bigdata/01-stream.md +++ b/docz/docs/03-demos/37-bigdata/01-stream.md @@ -3,19 +3,24 @@ title: Large Datasets pagination_prev: demos/extensions/index pagination_next: demos/engines/index sidebar_custom_props: - summary: Dense Mode + Incremental CSV / HTML / JSON Export + summary: Dense Mode + Incremental CSV / HTML / JSON / XLML Export --- import current from '/version.js'; +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; import CodeBlock from '@theme/CodeBlock'; -For maximal compatibility, the library reads entire files at once and generates -files at once. Browsers and other JS engines enforce tight memory limits. In -these cases, the library offers strategies to optimize for memory or space by -using platform-specific APIs. +For maximal compatibility, SheetJS API functions read entire files into memory +and write files in memory. Browsers and other JS engines enforce tight memory +limits. The library offers alternate strategies to optimize for memory usage. ## Dense Mode +[Dense mode worksheets](/docs/csf/sheet#dense-mode), which store cells in arrays +of arrays, are designed to work around Google Chrome performance regressions. +For backwards compatibility, dense mode worksheets are not created by default. + `read`, `readFile` and `aoa_to_sheet` accept the `dense` option. When enabled, the methods create worksheet objects that store cells in arrays of arrays: @@ -51,24 +56,18 @@ take the same arguments as the normal write functions: - `XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. - `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`. - `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`. +- `XLSX.stream.to_xlml` is the streaming SpreadsheetML2003 workbook writer. -"Stream" refers to the NodeJS push streams API. +These functions are covered in the ["Stream Export"](/docs/api/stream) section. -
- Historical Note (click to show) +:::tip pass -NodeJS push streams were introduced in 2012. The text streaming methods `to_csv` -and `to_html` are supported in NodeJS v0.10 and later while the object streaming -method `to_json` is supported in NodeJS v0.12 and later. +This feature was expanded in version `0.20.3`. It is strongly recommended to +[upgrade to the latest version](/docs/getting-started/installation/). -The first SheetJS streaming write function, `to_csv`, was introduced in 2017. It -used and still uses the same NodeJS streaming API. +::: -Years later, browser vendors are settling on a different stream API. -For maximal compatibility, the library uses NodeJS push streams. - -
### NodeJS @@ -102,7 +101,11 @@ XLSX.set_fs(fs); // manually load fs helpers ::: -**`XLSX.stream.to_csv`** +#### Text Streams + +`to_csv`, `to_html`, and `to_xlml` emit strings. The data can be directly pushed +to a `Writable` stream. `fs.createWriteStream`[^1] is the recommended approach +for streaming to a file in NodeJS. This example reads a worksheet passed as an argument to the script, pulls the first worksheet, converts to CSV and writes to `SheetJSNodeJStream.csv`: @@ -110,22 +113,37 @@ first worksheet, converts to CSV and writes to `SheetJSNodeJStream.csv`: ```js var XLSX = require("xlsx"), fs = require("fs"); -var wb = XLSX.readFile(process.argv[2]); +/* read file */ +var wb = XLSX.readFile(process.argv[2]), {dense: true}; + +/* get first worksheet */ var ws = wb.Sheets[wb.SheetNames[0]]; + +/* create CSV stream */ +var csvstream = XLSX.stream.to_csv(ws); + +/* create output stream */ var ostream = fs.createWriteStream("SheetJSNodeJStream.csv"); +/* write data from CSV stream to output file */ // highlight-next-line -XLSX.stream.to_csv(ws).pipe(ostream); +csvstream.pipe(ostream); ``` -**`XLSX.stream.to_json`** +#### Object Streams -`stream.to_json` uses Object-mode streams. A `Transform` stream can be used to -generate a normal stream for streaming to a file or the screen: +`to_json` uses Object-mode streams[^2]. A `Transform` stream[^3] can be used to +generate a text stream for streaming to a file or the screen. + +The following example prints data by writing to the `process.stdout` stream: ```js var XLSX = require("xlsx"), Transform = require("stream").Transform; + +/* read file */ var wb = XLSX.readFile(process.argv[2], {dense: true}); + +/* get first worksheet */ var ws = wb.Sheets[wb.SheetNames[0]]; /* this Transform stream converts JS objects to text */ @@ -137,7 +155,18 @@ conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); }; XLSX.stream.to_json(ws, {raw: true}).pipe(conv).pipe(process.stdout); ``` -**Demo** +#### BunJS + +BunJS is directly compatible with NodeJS streams. + +:::caution Bun support is considered experimental. + +Great open source software grows with user tests and reports. Any issues should +be reported to the Bun project for further diagnosis. + +::: + +#### NodeJS Demo :::note Tested Deployments @@ -145,29 +174,44 @@ This demo was tested in the following deployments: | Node Version | Date | Node Status when tested | |:-------------|:-----------|:------------------------| -| `0.12.18` | 2024-06-30 | End-of-Life | -| `4.9.1` | 2024-06-30 | End-of-Life | -| `6.17.1` | 2024-06-30 | End-of-Life | -| `8.17.0` | 2024-06-30 | End-of-Life | -| `10.24.1` | 2024-06-30 | End-of-Life | -| `12.22.12` | 2024-06-30 | End-of-Life | -| `14.21.3` | 2024-06-30 | End-of-Life | -| `16.20.2` | 2024-06-30 | End-of-Life | -| `18.20.3` | 2024-06-30 | Maintenance LTS | -| `20.15.0` | 2024-06-30 | Active LTS | -| `22.3.0` | 2024-06-30 | Current | +| `0.12.18` | 2024-07-18 | End-of-Life | +| `4.9.1` | 2024-07-18 | End-of-Life | +| `6.17.1` | 2024-07-18 | End-of-Life | +| `8.17.0` | 2024-07-18 | End-of-Life | +| `10.24.1` | 2024-07-18 | End-of-Life | +| `12.22.12` | 2024-07-18 | End-of-Life | +| `14.21.3` | 2024-07-18 | End-of-Life | +| `16.20.2` | 2024-07-18 | End-of-Life | +| `18.20.4` | 2024-07-18 | Maintenance LTS | +| `20.15.1` | 2024-07-18 | Active LTS | +| `22.5.0` | 2024-07-18 | Current | While streaming methods work in End-of-Life versions of NodeJS, production deployments should upgrade to a Current or LTS version of NodeJS. +This demo was also tested against BunJS `1.1.18` on 2024-07-18. + ::: 1) Install the [NodeJS module](/docs/getting-started/installation/nodejs) + + + {`\ npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz`} + + + +{`\ +bun i --save xlsx@https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz`} + + + + + 2) Download [`SheetJSNodeJStream.js`](pathname:///stream/SheetJSNodeJStream.js): ```bash @@ -182,10 +226,23 @@ curl -LO https://docs.sheetjs.com/pres.xlsx 4) Run the script: + + + ```bash node SheetJSNodeJStream.js pres.xlsx ``` + + + +```bash +bun SheetJSNodeJStream.js pres.xlsx +``` + + + +
Expected Output (click to show) @@ -220,8 +277,8 @@ Each browser demo was tested in the following environments: | Browser | Date | |:------------|:-----------| -| Chrome 126 | 2024-06-30 | -| Safari 17.3 | 2024-06-30 | +| Chrome 126 | 2024-07-18 | +| Safari 17.4 | 2024-07-18 | ::: @@ -453,7 +510,7 @@ stream.to_csv(wb.Sheets[wb.SheetNames[0]]).resume();`} :::note Tested Deployments -This demo was last tested on 2024-04-25 against Deno `1.42.4`. +This demo was last tested on 2024-07-18 against Deno `1.45.2`. ::: @@ -468,4 +525,8 @@ deno run -A https://docs.sheetjs.com/stream/SheetJSDenoStream.ts ``` This script will fetch [`pres.numbers`](https://docs.sheetjs.com/pres.numbers) and -generate CSV rows. The result will be printed to the terminal window. \ No newline at end of file +generate CSV rows. The result will be printed to the terminal window. + +[^1]: See [`fs.createWriteStream`](https://nodejs.org/api/fs.html#fscreatewritestreampath-options) in the NodeJS documentation. +[^2]: See ["Object mode"](https://nodejs.org/api/stream.html#object-mode) in the NodeJS documentation. +[^3]: See [`Transform`](https://nodejs.org/api/stream.html#class-streamtransform) in the NodeJS documentation. diff --git a/docz/docs/03-demos/37-bigdata/02-worker.md b/docz/docs/03-demos/37-bigdata/02-worker.md index 55411e7..d3825f2 100644 --- a/docz/docs/03-demos/37-bigdata/02-worker.md +++ b/docz/docs/03-demos/37-bigdata/02-worker.md @@ -497,10 +497,11 @@ self.addEventListener('message', (e) => { ### Streaming Write -A more general discussion, including row-oriented processing demos, is included -in the ["Large Datasets"](/docs/demos/bigdata/stream#browser) demo. +The ["Stream Export"](/docs/api/stream) section covers the streaming write +methods in more detail. -`XLSX.stream.to_csv` incrementally generates CSV rows. +The ["Large Datasets"](/docs/demos/bigdata/stream#browser) demo includes browser +live examples. #### File System Access API diff --git a/docz/docs/06-solutions/05-output.md b/docz/docs/06-solutions/05-output.md index 7eb1b57..2838604 100644 --- a/docz/docs/06-solutions/05-output.md +++ b/docz/docs/06-solutions/05-output.md @@ -853,5 +853,6 @@ Readable Stream. - `XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. - `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`. - `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`. +- `XLSX.stream.to_xlml` is the streaming SpreadsheetML2003 workbook writer. -Examples are included in ["Large Datasets"](/docs/demos/bigdata/stream#streaming-write) +["Stream Export"](/docs/api/stream) describes the function in more detail. diff --git a/docz/docs/08-api/11-stream.md b/docz/docs/08-api/11-stream.md new file mode 100644 index 0000000..db5aebe --- /dev/null +++ b/docz/docs/08-api/11-stream.md @@ -0,0 +1,303 @@ +--- +title: Stream Export +sidebar_position: 11 +hide_table_of_contents: true +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +Many platforms offer methods to write files. These methods typically expect the +entire file to be generated before writing. Large workbook files may exceed +platform-specific size limits. + +Some platforms also offer a "streaming" or "incremental" approach. Instead of +writing the entire file at once, these methods can accept small chunks of data +and incrementally write to the filesystem. + +The [Streaming Write](/docs/demos/bigdata/stream#streaming-write) demo includes +live browser demos and notes for platforms that do not support SheetJS streams. + +:::tip pass + +This feature was expanded in version `0.20.3`. It is strongly recommended to +[upgrade to the latest version](/docs/getting-started/installation/). + +::: + +## Streaming Basics + +SheetJS streams use the NodeJS push streams API. It is strongly recommended to +review the official NodeJS "Stream" documentation[^1]. + +
+ Historical Note (click to show) + +NodeJS push streams were introduced in 2012. The text streaming methods `to_csv` +and `to_html` are supported in NodeJS v0.10 and later while the object streaming +method `to_json` is supported in NodeJS v0.12 and later. + +The first SheetJS streaming write function, `to_csv`, was introduced in 2017. It +used and still uses the battle-tested NodeJS streaming API. + +Years later, browser vendors opted to standardize a different stream API. + +For maximal compatibility, the library uses NodeJS push streams. + +
+ +#### NodeJS ECMAScript Module Support + +In CommonJS modules, libraries can load the `stream` module using `require`. +SheetJS libraries will load streaming support where applicable. + +Due to ESM limitations, libraries cannot freely import the `stream` module. + +:::danger ECMAScript Module Limitations + +The original specification only supported top-level imports: + +```js +import { Readable } from 'stream'; +``` + +If a module is unavailable, there is no way for scripts to gracefully fail or +ignore the error. + +--- + +Patches to the specification added two different solutions to the problem: + +- "dynamic imports" will throw errors that can be handled by libraries. Dynamic +imports will taint APIs that do not use Promise-based methods. + +```js +/* Readable will be undefined if stream cannot be imported */ +const Readable = await (async() => { + try { + return (await import("stream"))?.Readable; + } catch(e) { /* silently ignore error */ } +})(); +``` + +- "import maps" control module resolution, allowing library users to manually +shunt unsupported modules. + +**These patches were released after browsers adopted ESM!** A number of browsers +and other platforms support top-level imports but do not support the patches. + +--- + +**Due to ESM woes, it is strongly recommended to use CommonJS when possible!** + +::: + +For maximal platform support, SheetJS libraries expose a special `set_readable` +method to provide a `Readable` implementation: + +```js title="SheetJS NodeJS ESM streaming support" +import { stream as SheetJStream } from 'xlsx'; +import { Readable } from 'stream'; + +SheetJStream.set_readable(Readable); +``` + +## Worksheet Export + +The worksheet export methods accept a SheetJS worksheet object. + +### CSV Export + +**Export worksheet data in "Comma-Separated Values" (CSV)** + +```js +var csvstream = XLSX.stream.to_csv(ws, opts); +``` + +`to_csv` creates a NodeJS text stream. The options mirror the non-streaming +[`sheet_to_csv`](/docs/api/utilities/csv#delimiter-separated-output) method. + +The following NodeJS script fetches https://docs.sheetjs.com/pres.numbers and +streams CSV rows to the terminal. + + + + +```js title="Streaming CSV Print Example" +const XLSX = require("xlsx"); + +(async() => { + var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() + var wb = XLSX.read(ab); + var ws = wb.Sheets[wb.SheetNames[0]]; + XLSX.stream.to_csv(ws).pipe(process.stdout); +})(); +``` + + + + +```js title="Streaming CSV Print Example" +import { read, stream } from "xlsx"; +import { Readable } from "stream"; +stream.set_readable(Readable); + +var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() +var wb = read(ab); +var ws = wb.Sheets[wb.SheetNames[0]]; +stream.to_csv(ws).pipe(process.stdout); +``` + + + + +### JSON Export + +**Export worksheet data to "Arrays of Arrays" or "Arrays of Objects"** + +```js +var jsonstream = XLSX.stream.to_json(ws, opts); +``` + +`to_json` creates a NodeJS object stream. The options mirror the non-streaming +[`sheet_to_json`](/docs/api/utilities/array#array-output) method. + +The following NodeJS script fetches https://docs.sheetjs.com/pres.numbers and +streams JSON rows to the terminal. A `Transform`[^2] stream generates text from +the object streams. + + + + +```js title="Streaming Objects Print Example" +const XLSX = require("xlsx") +const { Transform } = require("stream"); + +/* this Transform stream converts JS objects to text */ +var conv = new Transform({writableObjectMode:true}); +conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); }; + +(async() => { + var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() + var wb = XLSX.read(ab); + var ws = wb.Sheets[wb.SheetNames[0]]; + XLSX.stream.to_json(ws, {raw: true}).pipe(conv).pipe(process.stdout); +})(); +``` + + + + +```js title="Streaming Objects Print Example" +import { read, stream } from "xlsx"; +import { Readable, Transform } from "stream"; +stream.set_readable(Readable); + +/* this Transform stream converts JS objects to text */ +var conv = new Transform({writableObjectMode:true}); +conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); }; + +var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() +var wb = read(ab); +var ws = wb.Sheets[wb.SheetNames[0]]; +stream.to_json(ws, {raw: true}).pipe(conv).pipe(process.stdout); +``` + + + + +### HTML Export + +**Export worksheet data to HTML TABLE** + +```js +var htmlstream = XLSX.stream.to_html(ws, opts); +``` + +`to_html` creates a NodeJS text stream. The options mirror the non-streaming +[`sheet_to_html`](/docs/api/utilities/html#html-table-output) method. + +The following NodeJS script fetches https://docs.sheetjs.com/pres.numbers and +streams HTML TABLE rows to the terminal. + + + + +```js title="Streaming HTML Print Example" +const XLSX = require("xlsx"); + +(async() => { + var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() + var wb = XLSX.read(ab); + var ws = wb.Sheets[wb.SheetNames[0]]; + XLSX.stream.to_html(ws).pipe(process.stdout); +})(); +``` + + + + +```js title="Streaming HTML Print Example" +import { read, stream } from "xlsx"; +import { Readable } from "stream"; +stream.set_readable(Readable); + +var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() +var wb = read(ab); +var ws = wb.Sheets[wb.SheetNames[0]]; +stream.to_html(ws).pipe(process.stdout); +``` + + + + +## Workbook Export + +The workbook export methods accept a SheetJS workbook object. + +### XLML Export + +**Export workbook data to SpreadsheetML2003 XML files** + +```js +var xlmlstream = XLSX.stream.to_xlml(wb, opts); +``` + +`to_xlml` creates a NodeJS text stream. The options mirror the non-streaming +[`write`](/docs/api/write-options) method using the `xlml` book type. + +The following NodeJS script fetches https://docs.sheetjs.com/pres.numbers and +writes a SpreadsheetML2003 workbook to `SheetJStream.xml.xls`: + + + + +```js title="Streaming XLML Write Example" +const XLSX = require("xlsx"), fs = require("fs"); + +(async() => { + var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() + var wb = XLSX.read(ab); + XLSX.stream.to_xlml(wb).pipe(fs.createWriteStream("SheetJStream.xml.xls")); +})(); +``` + + + + +```js title="Streaming XLML Write Example" +import { read, stream } from "xlsx"; +import { Readable } from "stream"; +stream.set_readable(Readable); +import { createWriteStream } from "fs"; + +var ab = await (await fetch("https://docs.sheetjs.com/pres.numbers")).arrayBuffer() +var wb = read(ab); +stream.to_xlml(wb).pipe(createWriteStream("SheetJStream.xml.xls")); +``` + + + + +[^1]: See ["Stream"](https://nodejs.org/api/stream.html) in the NodeJS documentation. +[^2]: See [`Transform`](https://nodejs.org/api/stream.html#class-streamtransform) in the NodeJS documentation. diff --git a/docz/docs/08-api/index.md b/docz/docs/08-api/index.md index 8006646..ae2af78 100644 --- a/docz/docs/08-api/index.md +++ b/docz/docs/08-api/index.md @@ -135,8 +135,9 @@ _Miscellaneous_ - `to_csv(sheet, opts)` streams CSV rows - `to_html(sheet, opts)` streams an HTML table incrementally - `to_json(sheet, opts)` streams JS objects (object-mode stream) +- `to_xlml(book, opts)` streams a SpreadsheetML2003 workbook incrementally -Streaming write functions are described in the [Streaming Write demo](/docs/demos/bigdata/stream#streaming-write). +Stream methods are described in the ["Stream Export"](/docs/api/stream) section. ### ESM Helpers diff --git a/docz/docs/09-miscellany/04-testing.md b/docz/docs/09-miscellany/04-testing.md index edfeebd..871eb64 100644 --- a/docz/docs/09-miscellany/04-testing.md +++ b/docz/docs/09-miscellany/04-testing.md @@ -7,7 +7,11 @@ hide_table_of_contents: true import Tabs from '@theme/Tabs'; import TabItem from '@theme/TabItem'; - +SheetJS libraries have extensive test suites. + +### Local Tests + + `make test` will run the NodeJS tests. By default it runs tests on files in @@ -91,10 +95,11 @@ the Edge tests are run on major releases ending in `5` (starting from Edge 85). **Server Platforms** - NodeJS `0.8`, `0.10`, `0.12`, and every major version starting from `4` - io.js 1/2/3 + - Bun latest - Deno latest -The test suite also includes tests for various time zones. To change -the timezone locally, set the `TZ` environment variable: +The test suite also includes tests for various time zones. Timezones can be +controlled by setting the `TZ` environment variable: ```bash env TZ="Asia/Kolkata" WTF=1 make test_misc @@ -104,7 +109,27 @@ env TZ="Asia/Kolkata" WTF=1 make test_misc ### Test Files -The test files were hosted on [GitHub](https://github.com/SheetJS/test_files). +[Download the test artifacts](https://test-files.sheetjs.com/test_files.zip). -Due to breaking changes in the GitHub infrastructure, the test artifacts are now -distributed in a [ZIP archive](https://test-files.sheetjs.com/test_files.zip) +#### Artifact Sources + +Test files include derivatives of files from external sources. Files were opened +in various spreadsheet software and exported to CSV and other file formats. The +enclosed `README.md` file explains the naming conventions and file origins. + +External sources typically distribute files under open source licenses. Some +sources have dedicated files to the public domain. + +It is assumed that external sources have proper authorization to release files +under the asserted license terms. For example, if an external source releases a +file under the Apache 2.0 license, it is assumed that they either generated the +file directly or obtained permission from the creator. + +#### Requests for Removal + +External sources may have added files from contributors without proper consent. +Users are encouraged to submit reports if files contain private information that +was not properly vetted by the parties that posted the original content. + +Please [send an email](mailto:support@sheetjs.com?subject=removal%20request) or +[file an issue in the main source repository](/docs/miscellany/source). diff --git a/tests/bigdata-stream-bun.sh b/tests/bigdata-stream-bun.sh new file mode 100755 index 0000000..3d4b98d --- /dev/null +++ b/tests/bigdata-stream-bun.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# https://docs.sheetjs.com/docs/demos/bigdata/stream#nodejs + +cd /tmp +rm -rf sheetjs-stream + +mkdir sheetjs-stream +cd sheetjs-stream + +bun i xlsx@https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz + +curl -LO https://docs.sheetjs.com/stream/SheetJSNodeJStream.js + +curl -LO https://docs.sheetjs.com/pres.xlsx + +bun --version +bun SheetJSNodeJStream.js pres.xlsx diff --git a/tests/bigdata-stream-nodejs.sh b/tests/bigdata-stream-nodejs.sh index 95cb996..01b0a9c 100755 --- a/tests/bigdata-stream-nodejs.sh +++ b/tests/bigdata-stream-nodejs.sh @@ -7,7 +7,7 @@ rm -rf sheetjs-stream mkdir sheetjs-stream cd sheetjs-stream -npm i --save https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz curl -LO https://docs.sheetjs.com/stream/SheetJSNodeJStream.js diff --git a/tests/bundler-browserify.sh b/tests/bundler-browserify.sh index f3ce482..3484662 100755 --- a/tests/bundler-browserify.sh +++ b/tests/bundler-browserify.sh @@ -13,7 +13,7 @@ mkdir sheetjs-browserify-$n cd sheetjs-browserify-$n npm init -y -npm i --save https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz cat >index.js <index.html < diff --git a/tests/bundler-requirejs.sh b/tests/bundler-requirejs.sh index 7e8b8cf..2a07db3 100755 --- a/tests/bundler-requirejs.sh +++ b/tests/bundler-requirejs.sh @@ -9,7 +9,7 @@ rm -rf sheetjs-requirejs mkdir sheetjs-requirejs cd sheetjs-requirejs -curl -LO https://cdn.sheetjs.com/xlsx-0.20.2/package/dist/xlsx.full.min.js +curl -LO https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js cat >SheetJSRequire.js <SheetJSDOM.js <Program.cs <SheetJSPlaywright.js < { diff --git a/tests/headless-puppeteer-deno.sh b/tests/headless-puppeteer-deno.sh index d268835..d5824e6 100755 --- a/tests/headless-puppeteer-deno.sh +++ b/tests/headless-puppeteer-deno.sh @@ -20,7 +20,7 @@ await page.setViewport({width: 1920, height: 1080}); await page.goto('https://sheetjs.com/demos/table'); /* (2) Load the standalone SheetJS build from the CDN */ -await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-0.20.2/package/dist/xlsx.full.min.js' }); +await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js' }); /* (3) Run the snippet in browser and return data */ const b64 = await page.evaluate(() => { diff --git a/tests/headless-puppeteer.sh b/tests/headless-puppeteer.sh index 14ddc23..9473f1b 100755 --- a/tests/headless-puppeteer.sh +++ b/tests/headless-puppeteer.sh @@ -6,7 +6,7 @@ rm -rf sheetjs-puppeteer mkdir sheetjs-puppeteer cd sheetjs-puppeteer -npm i --save https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz puppeteer@22.12.0 +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz puppeteer@22.12.0 cat >SheetJSPuppeteer.js < { diff --git a/tests/server-express-worker.sh b/tests/server-express-worker.sh index 7bca13c..dabf310 100755 --- a/tests/server-express-worker.sh +++ b/tests/server-express-worker.sh @@ -8,7 +8,7 @@ mkdir sheetjs-worker cd sheetjs-worker echo '{ "type": "module" }' > package.json -npm i --save https://cdn.sheetjs.com/xlsx-0.20.2/xlsx-0.20.2.tgz express@4.18.2 formidable@2.1.2 +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz express@4.18.2 formidable@2.1.2 cat >worker.js < { page.on("console", msg => console.log("PAGE LOG:", msg.text())); await page.setViewport({width: 1920, height: 1080}); await page.goto('http://localhost:7262/'); - await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-0.20.2/package/dist/xlsx.full.min.js' }); + await page.addScriptTag({ url: 'https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js' }); await new Promise((res,rej) => setTimeout(res, 1000)); const csv = await page.evaluate(() => { const tbl = document.querySelector('table');