From f78c866cf4c87c1d3ff6ab6c3841feaefc302b61 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Tue, 8 Feb 2022 04:50:51 -0500 Subject: [PATCH] Parse non-conformant records from Access export --- .spelling | 1 + README.md | 580 +++++++++++++++++++++++++++----- bits/28_binstructs.js | 3 +- bits/30_ctype.js | 2 +- bits/68_wsbin.js | 8 + bits/77_parsetab.js | 2 +- bits/79_html.js | 3 +- demos/headless/.eslintrc | 11 + demos/headless/README.md | 54 +-- demos/headless/chromeless.js | 9 - demos/headless/html.js | 51 +++ demos/headless/phantomjs.js | 36 +- demos/headless/puppeteer.js | 14 - demos/headless/slimerjs.js | 15 - demos/headless/test.html | 35 ++ demos/headless/xlsx.full.min.js | 1 + docbits/13_usage.md | 1 - docbits/20_import.md | 8 +- docbits/22_ingress.md | 169 +++++++++- docbits/30_export.md | 213 ++++++++---- docbits/31_writestream.md | 39 --- docbits/32_egress.md | 217 ++++++++++++ misc/docs/README.md | 528 +++++++++++++++++++++++++---- misc/docs/SUMMARY.md | 11 +- types/index.d.ts | 4 +- 25 files changed, 1681 insertions(+), 334 deletions(-) create mode 100644 demos/headless/.eslintrc delete mode 100644 demos/headless/chromeless.js create mode 100755 demos/headless/html.js delete mode 100644 demos/headless/puppeteer.js delete mode 100644 demos/headless/slimerjs.js create mode 100644 demos/headless/test.html create mode 120000 demos/headless/xlsx.full.min.js delete mode 100644 docbits/31_writestream.md create mode 100644 docbits/32_egress.md diff --git a/.spelling b/.spelling index 300f8b1..99aa170 100644 --- a/.spelling +++ b/.spelling @@ -49,6 +49,7 @@ SessionStorage SQLite SystemJS VueJS +WebKit WebSQL WK_ iOS diff --git a/README.md b/README.md index 80226dd..8ffa936 100644 --- a/README.md +++ b/README.md @@ -44,19 +44,16 @@ port calculations to web apps; automate common spreadsheet tasks, and much more! * [JS Ecosystem Demos](#js-ecosystem-demos) - [Acquiring and Extracting Data](#acquiring-and-extracting-data) * [Parsing Workbooks](#parsing-workbooks) - + [API](#api) - + [Examples](#examples) * [Processing JSON and JS Data](#processing-json-and-js-data) - + [API](#api-1) - + [Examples](#examples-1) * [Processing HTML Tables](#processing-html-tables) - + [API](#api-2) - + [Examples](#examples-2) - [Working with the Workbook](#working-with-the-workbook) * [Parsing and Writing Examples](#parsing-and-writing-examples) -- [Writing Workbooks](#writing-workbooks) +- [Packaging and Releasing Data](#packaging-and-releasing-data) + * [Writing Workbooks](#writing-workbooks) * [Writing Examples](#writing-examples) * [Streaming Write](#streaming-write) + * [Generating JSON and JS Data](#generating-json-and-js-data) + * [Generating HTML Tables](#generating-html-tables) - [Interface](#interface) * [Parsing functions](#parsing-functions) * [Writing functions](#writing-functions) @@ -263,7 +260,6 @@ and approaches for steps 1 and 5. Utility functions help with step 3. - ### The Zen of SheetJS _Data processing should fit in any workflow_ @@ -525,7 +521,7 @@ Other examples are included in the [showcase](demos/showcase/). ### Parsing Workbooks -#### API +**API** _Extract data from spreadsheet bytes_ @@ -550,7 +546,7 @@ security risk), and attempts to read files in this way will throw an error. The second `opts` argument is optional. ["Parsing Options"](#parsing-options) covers the supported properties and behaviors. -#### Examples +**Examples** Here are a few common scenarios (click on each subtitle to see the code): @@ -565,7 +561,7 @@ var XLSX = require("xlsx"); var workbook = XLSX.readFile("test.xlsx"); ``` -For Node ESM, the `readFile` helper is not enabled. Instead, `fs.readFileSync` +For Node ESM, the `readFile` helper is not enabled. Instead, `fs.readFileSync` should be used to read the file data as a `Buffer` for use with `XLSX.read`: ```js @@ -946,8 +942,6 @@ const workbook = XLSX.read(data); More detailed examples are covered in the [included demos](demos/) - - ### Processing JSON and JS Data JSON and JS data tend to represent single worksheets. This section will use a @@ -973,7 +967,7 @@ The third argument specifies the desired worksheet name. Multiple worksheets can be added to a workbook by calling the function multiple times. -#### API +**API** _Create a worksheet from an array of arrays of JS values_ @@ -1011,17 +1005,68 @@ control the column order and header output. ["Array of Objects Input"](#array-of-arrays-input) describes the function and the optional `opts` argument in more detail. -#### Examples +**Examples** ["Zen of SheetJS"](#the-zen-of-sheetjs) contains a detailed example "Get Data from a JSON Endpoint and Generate a Workbook" + +[`x-spreadsheet`](https://github.com/myliang/x-spreadsheet) is an interactive +data grid for previewing and modifying structured data in the web browser. The +[`xspreadsheet` demo](/demos/xspreadsheet) includes a sample script with the +`xtos` function for converting from x-spreadsheet data object to a workbook. + is a live demo. + +
+ Records from a database query (SQL or no-SQL) (click to show) + The [`database` demo](/demos/database/) includes examples of working with databases and query results. +
+ + +
+ Numerical Computations with TensorFlow.js (click to show) + +[`@tensorflow/tfjs`](@tensorflow/tfjs) and other libraries expect data in simple +arrays, well-suited for worksheets where each column is a data vector. That is +the transpose of how most people use spreadsheets, where each row is a vector. + +When recovering data from `tfjs`, the returned data points are stored in a typed +array. An array of arrays can be constructed with loops. `Array#unshift` can +prepend a title row before the conversion: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +/* suppose xs and ys are vectors (1D tensors) -> tfarr will be a typed array */ +const tfdata = tf.stack([xs, ys]).transpose(); +const shape = tfdata.shape; +const tfarr = tfdata.dataSync(); + +/* construct the array of arrays */ +const aoa = []; +for(let j = 0; j < shape[0]; ++j) { + aoa[j] = []; + for(let i = 0; i < shape[1]; ++i) aoa[j][i] = tfarr[j * shape[1] + i]; +} +/* add headers to the top */ +aoa.unshift(["x", "y"]); + +/* generate worksheet */ +const worksheet = XLSX.utils.aoa_to_sheet(aoa); +``` + +The [`array` demo](demos/array/) shows a complete example. + +
+ + ### Processing HTML Tables -#### API +**API** _Create a worksheet by scraping an HTML TABLE in the page_ @@ -1049,7 +1094,7 @@ The options argument supports the same options as `table_to_sheet`, with the addition of a `sheet` property to control the worksheet name. If the property is missing or no options are specified, the default name `Sheet1` is used. -#### Examples +**Examples** Here are a few common scenarios (click on each subtitle to see the code): @@ -1130,6 +1175,116 @@ chrome.runtime.onMessage.addListener(function(msg, sender, cb) { +
+ Server-Side HTML Tables with Headless Chrome (click to show) + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks. The core idea is to add the script to the page, parse +the table in the page context, generate a `base64` workbook and send it back +for further processing: + +```js +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"), puppeteer = require("puppeteer"); + +const url = `https://sheetjs.com/demos/table`; + +/* get the standalone build source (node_modules/xlsx/dist/xlsx.full.min.js) */ +const lib = readFileSync(require.resolve("xlsx/dist/xlsx.full.min.js"), "utf8"); + +(async() => { + /* start browser and go to web page */ + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + await page.goto(url, {waitUntil: "networkidle2"}); + + /* inject library */ + await page.addScriptTag({content: lib}); + + /* this function `s5s` will be called by the script below, receiving the Base64-encoded file */ + await page.exposeFunction("s5s", async(b64) => { + const workbook = XLSX.read(b64, {type: "base64" }); + + /* DO SOMETHING WITH workbook HERE */ + }); + + /* generate XLSB file in webpage context and send back result */ + await page.addScriptTag({content: ` + /* call table_to_book on first table */ + var workbook = XLSX.utils.table_to_book(document.querySelector("TABLE")); + + /* generate XLSX file */ + var b64 = XLSX.write(workbook, {type: "base64", bookType: "xlsb"}); + + /* call "s5s" hook exposed from the node process */ + window.s5s(b64); + `}); + + /* cleanup */ + await browser.close(); +})(); +``` + +
+ +
+ Server-Side HTML Tables with Headless WebKit (click to show) + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks using [PhantomJS](https://phantomjs.org/). The core idea +is to add the script to the page, parse the table in the page context, generate +a `binary` workbook and send it back for further processing: + +```js +var XLSX = require('xlsx'); +var page = require('webpage').create(); + +/* this code will be run in the page */ +var code = [ "function(){", + /* call table_to_book on first table */ + "var wb = XLSX.utils.table_to_book(document.body.getElementsByTagName('table')[0]);", + + /* generate XLSB file and return binary string */ + "return XLSX.write(wb, {type: 'binary', bookType: 'xlsb'});", +"}" ].join(""); + +page.open('https://sheetjs.com/demos/table', function() { + /* Load the browser script from the UNPKG CDN */ + page.includeJs("https://unpkg.com/xlsx/dist/xlsx.full.min.js", function() { + /* The code will return an XLSB file encoded as binary string */ + var bin = page.evaluateJavaScript(code); + + var workbook = XLSX.read(bin, {type: "binary"}); + /* DO SOMETHING WITH workbook HERE */ + + phantom.exit(); + }); +}); +``` + +
+ +
+ NodeJS HTML Tables without a browser (click to show) + +NodeJS does not include a DOM implementation and Puppeteer requires a hefty +Chromium build. [`jsdom`](https://npm.im/jsdom) is a lightweight alternative: + +```js +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"); +const { JSDOM } = require("jsdom"); + +/* obtain HTML string. This example reads from test.html */ +const html_str = fs.readFileSync("test.html", "utf8"); +/* get first TABLE element */ +const doc = new JSDOM(html_str).window.document.querySelector("table"); +/* generate workbook */ +const workbook = XLSX.utils.table_to_book(doc); +``` + +
+ ## Working with the Workbook The full object format is described later in this README. @@ -1213,37 +1368,82 @@ Some helper functions in `XLSX.utils` generate different views of the sheets: - `XLSX.utils.sheet_to_json` generates an array of objects - `XLSX.utils.sheet_to_formulae` generates a list of formulae -## Writing Workbooks +## Packaging and Releasing Data -For writing, the first step is to generate output data. The helper functions -`write` and `writeFile` will produce the data in various formats suitable for -dissemination. The second step is to actual share the data with the end point. -Assuming `workbook` is a workbook object: +### Writing Workbooks -
- nodejs write a file (click to show) +**API** -`XLSX.writeFile` uses `fs.writeFileSync` in server environments: +_Generate spreadsheet bytes (file) from data_ ```js -if(typeof require !== 'undefined') XLSX = require('xlsx'); +var data = XLSX.write(workbook, opts); +``` + +The `write` method attempts to package data from the workbook into a file in +memory. By default, XLSX files are generated, but that can be controlled with +the `bookType` property of the `opts` argument. Based on the `type` option, +the data can be stored as a "binary string", JS string, `Uint8Array` or Buffer. + +The second `opts` argument is required. ["Writing Options"](#writing-options) +covers the supported properties and behaviors. + +_Generate and attempt to save file_ + +```js +XLSX.writeFile(workbook, filename, opts); +``` + +The `writeFile` method packages the data and attempts to save the new file. The +export file format is determined by the extension of `filename` (`SheetJS.xlsx` +signals XLSX export, `SheetJS.xlsb` signals XLSB export, etc). + +The `writeFile` method uses platform-specific APIs to initiate the file save. In +NodeJS, `fs.readFileSync` can create a file. In the web browser, a download is +attempted using the HTML5 `download` attribute, with fallbacks for IE. + +The second `opts` argument is optional. ["Writing Options"](#writing-options) +covers the supported properties and behaviors. + +**Examples** + +
+ Local file in a NodeJS server (click to show) + +`writeFile` uses `fs.writeFileSync` in server environments: + +```js +var XLSX = require("xlsx"); + /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsb'); -/* at this point, out.xlsb is a file that you can distribute */ +XLSX.writeFile(workbook, "out.xlsb"); +``` + +For Node ESM, the `writeFile` helper is not enabled. Instead, `fs.writeFileSync` +should be used to write the file data to a `Buffer` for use with `XLSX.write`: + +```js +import { writeFileSync } from "fs"; +import { write } from "xlsx/xlsx.mjs"; + +const buf = write(workbook, {type: "buffer", bookType: "xlsb"}); +/* buf is a Buffer */ +const workbook = writeFileSync("out.xlsb", buf); ```
- Photoshop ExtendScript write a file (click to show) + Local file in a PhotoShop or InDesign plugin (click to show) `writeFile` wraps the `File` logic in Photoshop and other ExtendScript targets. The specified path should be an absolute path: ```js #include "xlsx.extendscript.js" + /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsx'); +XLSX.writeFile(workbook, "out.xlsx"); /* at this point, out.xlsx is a file that you can distribute */ ``` @@ -1252,44 +1452,7 @@ The [`extendscript` demo](demos/extendscript/) includes a more complex example.
- Browser add TABLE element to page (click to show) - -The `sheet_to_html` utility function generates HTML code that can be added to -any DOM element. - -```js -var worksheet = workbook.Sheets[workbook.SheetNames[0]]; -var container = document.getElementById('tableau'); -container.innerHTML = XLSX.utils.sheet_to_html(worksheet); -``` - -
- -
- Browser upload file (ajax) (click to show) - -A complete example using XHR is [included in the XHR demo](demos/xhr/), along -with examples for fetch and wrapper libraries. This example assumes the server -can handle Base64-encoded files (see the demo for a basic nodejs server): - -```js -/* in this example, send a base64 string to the server */ -var wopts = { bookType:'xlsx', bookSST:false, type:'base64' }; - -var wbout = XLSX.write(workbook,wopts); - -var req = new XMLHttpRequest(); -req.open("POST", "/upload", true); -var formdata = new FormData(); -formdata.append('file', 'test.xlsx'); // <-- server expects `file` to hold name -formdata.append('data', wbout); // <-- `data` holds the base64-encoded data -req.send(formdata); -``` - -
- -
- Browser save file (click to show) + Download a file in the browser to the user machine (click to show) `XLSX.writeFile` wraps a few techniques for triggering a file save: @@ -1303,14 +1466,14 @@ There is no standard way to determine if the actual file has been downloaded. ```js /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsb'); +XLSX.writeFile(workbook, "out.xlsb"); /* at this point, out.xlsb will have been downloaded */ ```
- Browser save file (compatibility) (click to show) + Download a file in legacy browsers (click to show) `XLSX.writeFile` techniques work for most modern browsers as well as older IE. For much older browsers, there are workarounds implemented by wrapper libraries. @@ -1320,7 +1483,7 @@ Note: `XLSX.writeFile` will automatically call `saveAs` if available. ```js /* bookType can be any supported output type */ -var wopts = { bookType:'xlsx', bookSST:false, type:'array' }; +var wopts = { bookType:"xlsx", bookSST:false, type:"array" }; var wbout = XLSX.write(workbook,wopts); @@ -1333,11 +1496,11 @@ to generate local files, suitable for environments where ActiveX is unavailable: ```js Downloadify.create(id,{ - /* other options are required! read the downloadify docs for more info */ - filename: "test.xlsx", - data: function() { return XLSX.write(wb, {bookType:"xlsx", type:'base64'}); }, - append: false, - dataType: 'base64' + /* other options are required! read the downloadify docs for more info */ + filename: "test.xlsx", + data: function() { return XLSX.write(wb, {bookType:"xlsx", type:"base64"}); }, + append: false, + dataType: "base64" }); ``` @@ -1345,6 +1508,54 @@ The [`oldie` demo](demos/oldie/) shows an IE-compatible fallback scenario.
+
+ Browser upload file (ajax) (click to show) + +A complete example using XHR is [included in the XHR demo](demos/xhr/), along +with examples for fetch and wrapper libraries. This example assumes the server +can handle Base64-encoded files (see the demo for a basic nodejs server): + +```js +/* in this example, send a base64 string to the server */ +var wopts = { bookType:"xlsx", bookSST:false, type:"base64" }; + +var wbout = XLSX.write(workbook,wopts); + +var req = new XMLHttpRequest(); +req.open("POST", "/upload", true); +var formdata = new FormData(); +formdata.append("file", "test.xlsx"); // <-- server expects `file` to hold name +formdata.append("data", wbout); // <-- `data` holds the base64-encoded data +req.send(formdata); +``` + +
+ +
+ PhantomJS (Headless Webkit) File Generation (click to show) + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks using [PhantomJS](https://phantomjs.org/). PhantomJS +`fs.write` supports writing files from the main process but has a different +interface from the NodeJS `fs` module: + +```js +var XLSX = require('xlsx'); +var fs = require('fs'); + +/* generate a binary string */ +var bin = XLSX.write(workbook, { type:"binary", bookType: "xlsx" }); +/* write to file */ +fs.write("test.xlsx", bin, "wb"); +``` + +Note: The section ["Processing HTML Tables"](#processing-html-tables) shows how +to generate a workbook from HTML tables in a page in "Headless WebKit". + +
+ + + The [included demos](demos/) cover mobile apps and other special deployments. ### Writing Examples @@ -1391,6 +1602,223 @@ stream.pipe(conv); conv.pipe(process.stdout); pipes write streams to nodejs response. +### Generating JSON and JS Data + +JSON and JS data tend to represent single worksheets. The utility functions in +this section work with single worksheets. + +The ["Common Spreadsheet Format"](#common-spreadsheet-format) section describes +the object structure in more detail. `workbook.SheetNames` is an ordered list +of the worksheet names. `workbook.Sheets` is an object whose keys are sheet +names and whose values are worksheet objects. + +The "first worksheet" is stored at `workbook.Sheets[workbook.SheetNames[0]]`. + +**API** + +_Create an array of JS objects from a worksheet_ + +```js +var jsa = XLSX.utils.sheet_to_json(worksheet, opts); +``` + +_Create an array of arrays of JS values from a worksheet_ + +```js +var aoa = XLSX.utils.sheet_to_json(worksheet, {...opts, header: 1}); +``` + +The `sheet_to_json` utility function walks a workbook in row-major order, +generating an array of objects. The second `opts` argument controls a number of +export decisions including the type of values (JS values or formatted text). The +["JSON"](#json) section describes the argument in more detail. + +By default, `sheet_to_json` scans the first row and uses the values as headers. +With the `header: 1` option, the function exports an array of arrays of values. + +**Examples** + +[`x-spreadsheet`](https://github.com/myliang/x-spreadsheet) is an interactive +data grid for previewing and modifying structured data in the web browser. The +[`xspreadsheet` demo](/demos/xspreadsheet) includes a sample script with the +`stox` function for converting from a workbook to x-spreadsheet data object. + is a live demo. + +
+ Populating a database (SQL or no-SQL) (click to show) + +The [`database` demo](/demos/database/) includes examples of working with +databases and query results. + +
+ +
+ Numerical Computations with TensorFlow.js (click to show) + +[`@tensorflow/tfjs`](@tensorflow/tfjs) and other libraries expect data in simple +arrays, well-suited for worksheets where each column is a data vector. That is +the transpose of how most people use spreadsheets, where each row is a vector. + +A single `Array#map` can pull individual named rows from `sheet_to_json` export: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +const key = "age"; // this is the field we want to pull +const ages = XLSX.utils.sheet_to_json(worksheet).map(r => r[key]); +const tf_data = tf.tensor1d(ages); +``` + +All fields can be processed at once using a transpose of the 2D tensor generated +with the `sheet_to_json` export with `header: 1`. The first row, if it contains +header labels, should be removed with a slice: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +/* array of arrays of the data starting on the second row */ +const aoa = XLSX.utils.sheet_to_json(worksheet, {header: 1}).slice(1); +/* dataset in the "correct orientation" */ +const tf_dataset = tf.tensor2d(aoa).transpose(); +/* pull out each dataset with a slice */ +const tf_field0 = tf_dataset.slice([0,0], [1,tensor.shape[1]]).flatten(); +const tf_field1 = tf_dataset.slice([1,0], [1,tensor.shape[1]]).flatten(); +``` + +The [`array` demo](demos/array/) shows a complete example. + +
+ + +### Generating HTML Tables + +**API** + +_Generate HTML Table from Worksheet_ + +```js +var html = XLSX.utils.sheet_to_html(worksheet); +``` + +The `sheet_to_html` utility function generates HTML code based on the worksheet +data. Each cell in the worksheet is mapped to a `` element. Merged cells +in the worksheet are serialized by setting `colspan` and `rowspan` attributes. + +**Examples** + +The `sheet_to_html` utility function generates HTML code that can be added to +any DOM element by setting the `innerHTML`: + +```js +var container = document.getElementById("tavolo"); +container.innerHTML = XLSX.utils.sheet_to_html(worksheet); +``` + +Combining with `fetch`, constructing a site from a workbook is straightforward: + +
+ Vanilla JS + HTML fetch workbook and generate table previews (click to show) + +```html + + +
+ + + +``` + +
+ +
+ React fetch workbook and generate HTML table previews (click to show) + +It is generally recommended to use a React-friendly workflow, but it is possible +to generate HTML and use it in React with `dangerouslySetInnerHTML`: + +```jsx +function Tabeller(props) { + /* the workbook object is the state */ + const [workbook, setWorkbook] = React.useState(XLSX.utils.book_new()); + + /* fetch and update the workbook with an effect */ + React.useEffect(() => { (async() => { + /* fetch and parse workbook -- see the fetch example for details */ + const wb = XLSX.read(await (await fetch("sheetjs.xlsx")).arrayBuffer()); + setWorkbook(wb); + })(); }); + + return workbook.SheetNames.map(name => (<> +

name

+
+ )); +} +``` + +The [`react` demo](demos/react) includes more React examples. + +
+ +
+ VueJS fetch workbook and generate HTML table previews (click to show) + +It is generally recommended to use a VueJS-friendly workflow, but it is possible +to generate HTML and use it in VueJS with the `v-html` directive: + +```jsx +import { read, utils } from 'xlsx'; +import { reactive } from 'vue'; + +const S5SComponent = { + mounted() { (async() => { + /* fetch and parse workbook -- see the fetch example for details */ + const workbook = read(await (await fetch("sheetjs.xlsx")).arrayBuffer()); + /* loop through the worksheet names in order */ + workbook.SheetNames.forEach(name => { + /* generate HTML from the corresponding worksheets */ + const html = utils.sheet_to_html(workbook.Sheets[name]); + /* add to state */ + this.wb.wb.push({ name, html }); + }); + })(); }, + /* this state mantra is required for array updates to work */ + setup() { return { wb: reactive({ wb: [] }) }; }, + template: ` +
+

{{ ws.name }}

+
+
` +}; +``` + +The [`vuejs` demo](demos/vue) includes more React examples. + +
+ ## Interface `XLSX` is the exposed variable in the browser and the exported node variable diff --git a/bits/28_binstructs.js b/bits/28_binstructs.js index dbed6fb..beef60c 100644 --- a/bits/28_binstructs.js +++ b/bits/28_binstructs.js @@ -132,8 +132,7 @@ function parse_RkNumber(data)/*:number*/ { var b = data.slice(data.l, data.l + 4); var fX100 = (b[0] & 1), fInt = (b[0] & 2); data.l += 4; - b[0] &= 0xFC; // b[0] &= ~3; - var RK = fInt === 0 ? __double([0, 0, 0, 0, b[0], b[1], b[2], b[3]], 0) : __readInt32LE(b, 0) >> 2; + var RK = fInt === 0 ? __double([0, 0, 0, 0, (b[0] & 0xFC), b[1], b[2], b[3]], 0) : __readInt32LE(b, 0) >> 2; return fX100 ? (RK / 100) : RK; } function write_RkNumber(data/*:number*/, o) { diff --git a/bits/30_ctype.js b/bits/30_ctype.js index 93b0120..976c1eb 100644 --- a/bits/30_ctype.js +++ b/bits/30_ctype.js @@ -111,7 +111,7 @@ var ct2type/*{[string]:string}*/ = ({ /* VBA */ "application/vnd.ms-office.vbaProject": "vba", - "application/vnd.ms-office.vbaProjectSignature": "vba", + "application/vnd.ms-office.vbaProjectSignature": "TODO", /* Volatile Dependencies */ "application/vnd.ms-office.volatileDependencies": "TODO", diff --git a/bits/68_wsbin.js b/bits/68_wsbin.js index 0adf921..901f7bf 100644 --- a/bits/68_wsbin.js +++ b/bits/68_wsbin.js @@ -244,6 +244,12 @@ function write_BrtShortRk(cell, ncell, o) { return o; } +/* [MS-XLSB] 2.4.323 BrtCellRString */ +function parse_BrtCellRString(data) { + var cell = parse_XLSBCell(data); + var value = parse_RichStr(data); + return [cell, value, 'is']; +} /* [MS-XLSB] 2.4.317 BrtCellSt */ function parse_BrtCellSt(data) { @@ -564,6 +570,7 @@ function parse_ws_bin(data, _opts, idx, rels, wb/*:WBWBProps*/, themes, styles)/ case 0x0010: /* 'BrtShortReal' */ case 0x0011: /* 'BrtShortSt' */ case 0x0012: /* 'BrtShortIsst' */ + case 0x003E: /* 'BrtCellRString' */ p = ({t:val[2]}/*:any*/); switch(val[2]) { case 'n': p.v = val[1]; break; @@ -571,6 +578,7 @@ function parse_ws_bin(data, _opts, idx, rels, wb/*:WBWBProps*/, themes, styles)/ case 'b': p.v = val[1] ? true : false; break; case 'e': p.v = val[1]; if(opts.cellText !== false) p.w = BErr[p.v]; break; case 'str': p.t = 's'; p.v = val[1]; break; + case 'is': p.t = 's'; p.v = val[1].t; break; } if((cf = styles.CellXf[val[0].iStyleRef])) safe_format(p,cf.numFmtId,null,opts, themes, styles); C = val[0].c == -1 ? C + 1 : val[0].c; diff --git a/bits/77_parsetab.js b/bits/77_parsetab.js index 24fd18c..0e9553d 100644 --- a/bits/77_parsetab.js +++ b/bits/77_parsetab.js @@ -60,7 +60,7 @@ var XLSBRecordEnum = { /*::[*/0x003A/*::]*/: { n:"BrtMdxMbrIstr" }, /*::[*/0x003B/*::]*/: { n:"BrtStr" }, /*::[*/0x003C/*::]*/: { n:"BrtColInfo", f:parse_ColInfo }, - /*::[*/0x003E/*::]*/: { n:"BrtCellRString" }, + /*::[*/0x003E/*::]*/: { n:"BrtCellRString", f:parse_BrtCellRString }, /*::[*/0x003F/*::]*/: { n:"BrtCalcChainItem$", f:parse_BrtCalcChainItem$ }, /*::[*/0x0040/*::]*/: { n:"BrtDVal", f:parse_BrtDVal }, /*::[*/0x0041/*::]*/: { n:"BrtSxvcellNum" }, diff --git a/bits/79_html.js b/bits/79_html.js index b8849cf..92b1884 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -139,6 +139,7 @@ function sheet_add_dom(ws/*:Worksheet*/, table/*:HTMLElement*/, _opts/*:?any*/)/ or_R = _origin.r; or_C = _origin.c; } } + var rows/*:HTMLCollection*/ = table.getElementsByTagName('tr'); var sheetRows = Math.min(opts.sheetRows||10000000, rows.length); var range/*:Range*/ = {s:{r:0,c:0},e:{r:or_R,c:or_C}}; @@ -223,7 +224,7 @@ function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ { var display/*:string*/ = ''; var get_computed_style/*:?function*/ = get_get_computed_style_function(element); if(get_computed_style) display = get_computed_style(element).getPropertyValue('display'); - if(!display) display = element.style.display; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document) + if(!display) display = element.style && element.style.display; return display === 'none'; } diff --git a/demos/headless/.eslintrc b/demos/headless/.eslintrc new file mode 100644 index 0000000..4d91234 --- /dev/null +++ b/demos/headless/.eslintrc @@ -0,0 +1,11 @@ +{ + + "env": { "node":true }, + "parserOptions": { + "ecmaVersion": 8 + }, + "rules": { + "no-var": 0, + "semi": [ 2, "always" ] + } +} diff --git a/demos/headless/README.md b/demos/headless/README.md index d26ef0f..9a5b3b6 100644 --- a/demos/headless/README.md +++ b/demos/headless/README.md @@ -3,30 +3,42 @@ The library, eschewing unstable and nascent ECMAScript features, plays nicely with most headless browsers. This demo shows a few common headless scenarios. -## PhantomJS +NodeJS does not ship with its own layout engine. For advanced HTML exports, a +headless browser is generally indistinguishable from a browser process. -This was tested in PhantomJS 2.1.1, installed using the node module: +## Chromium Automation with Puppeteer + +[Puppeteer](https://npm.im/puppeteer) enables headless Chromium automation. + +[`html.js`](./html.js) shows a dedicated script for converting an HTML file to +XLSB using puppeteer. The first argument is the path to the HTML file. The +script writes to `output.xlsb`: + +```bash +# read from test.html and write to output.xlsb +$ node html.js test.html +``` + +The script pulls up the webpage using headless Chromium and adds a script tag +reference to the standalone browser build. That will make the `XLSX` variable +available to future scripts added in the page! The browser context is not able +to save the file using `writeFile`, so the demo generates the XLSB spreadsheet +bytes with the `base64` type, sends the string back to the main process, and +uses `fs.writeFileSync` to write the file. + +## WebKit Automation with PhantomJS + +This was tested using [PhantomJS 2.1.1](https://phantomjs.org/download.html) ```bash -$ npm install -g phantomjs $ phantomjs phantomjs.js ``` -## Chrome Automation - -This was tested in puppeteer 0.9.0 (Chromium revision 494755) and `chromeless`: - -```bash -$ npm install puppeteer -$ node puppeteer.js - -$ npm install -g chromeless -$ node chromeless.js -``` - -Since the main process is node, the read and write features should be placed in -the webpage. The `dist` versions are suitable for web pages. +The flow is similar to the Puppeteer flow (scrape table and generate workbook in +website context, copy string back, write string to file from main process). +The `binary` type generates strings that can be written in PhantomJS using the +`fs.write` method with mode `"wb"`. ## wkhtmltopdf @@ -36,13 +48,5 @@ This was tested in wkhtmltopdf 0.12.4, installed using the official binaries: $ wkhtmltopdf --javascript-delay 20000 http://oss.sheetjs.com/sheetjs/tests/ test.pdf ``` -## SlimerJS - -This was tested in SlimerJS 0.10.3 and FF 52.0, installed using `brew` on OSX: - -```bash -$ brew install slimerjs -$ slimerjs slimerjs.js -``` [![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-xlsx?pixel)](https://github.com/SheetJS/js-xlsx) diff --git a/demos/headless/chromeless.js b/demos/headless/chromeless.js deleted file mode 100644 index ed63b76..0000000 --- a/demos/headless/chromeless.js +++ /dev/null @@ -1,9 +0,0 @@ -const { Chromeless } = require('chromeless'); -const TEST = 'http://localhost:8000', TIME = 30 * 1000; -(async() => { - const browser = new Chromeless(); - const pth = await browser.goto(TEST).wait(TIME).screenshot(); - console.log(pth); - await browser.end(); -})().catch(e=>{ console.error(e); }); - diff --git a/demos/headless/html.js b/demos/headless/html.js new file mode 100755 index 0000000..4abec89 --- /dev/null +++ b/demos/headless/html.js @@ -0,0 +1,51 @@ +#!/usr/bin/env node +/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ +const puppeteer = require("puppeteer"); +const path = require("path"); +const fs = require("fs"); + +/* inf is the path to the html file -> url is a file URL */ +let inf = process.argv[2] || "test.html"; +let htmlpath = path.join(__dirname, inf); +if(!fs.existsSync(htmlpath)) htmlpath = path.join(process.cwd(), inf); +if(!fs.existsSync(htmlpath)) htmlpath = path.resolve(inf); +if(!fs.existsSync(htmlpath)) { console.error(`Could not find a valid file for \`${inf}\``); process.exit(4); } +console.error(`Reading from ${htmlpath}`); +const url = `file://${htmlpath}`; + +/* get the standalone build source (e.g. node_modules/xlsx/dist/xlsx.full.min.js) */ +// const websrc = fs.readFileSync(require.resolve("xlsx/dist/xlsx.full.min.js"), "utf8"); +const get_lib = (jspath) => fs.readFileSync(path.resolve(__dirname, jspath)).toString(); +const websrc = get_lib("xlsx.full.min.js"); + +(async() => { + /* start browser and go to web page */ + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + page.on("console", msg => console.log("PAGE LOG:", msg.text())); + await page.setViewport({width: 1920, height: 1080}); + await page.goto(url, {waitUntil: "networkidle2"}); + + /* inject library */ + await page.addScriptTag({content: websrc}); + + /* this function `s5s` will be called by the script below, receiving the Base64-encoded file */ + await page.exposeFunction("s5s", async(b64) => { + fs.writeFileSync("output.xlsb", b64, {encoding: "base64"}); + }); + + /* generate XLSB file in webpage context and send back a Base64-encoded string */ + await page.addScriptTag({content: ` + /* call table_to_book on first table */ + var wb = XLSX.utils.table_to_book(document.getElementsByTagName("TABLE")[0]); + + /* generate XLSB file */ + var b64 = XLSX.write(wb, {type: "base64", bookType: "xlsb"}); + + /* call "s5s" hook exposed from the node process */ + window.s5s(b64); + `}); + + /* cleanup */ + await browser.close(); +})(); diff --git a/demos/headless/phantomjs.js b/demos/headless/phantomjs.js index 0281d48..25bf7df 100644 --- a/demos/headless/phantomjs.js +++ b/demos/headless/phantomjs.js @@ -1,15 +1,35 @@ /* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ -var fs = require('fs'); -var xlsx = require('../../xlsx'); +/* eslint-env phantomjs */ +var XLSX = require('xlsx'); + var page = require('webpage').create(); +page.onConsoleMessage = function(msg) { console.log(msg); }; -page.open('http://oss.sheetjs.com/sheetjs/tests/', function(status) { +/* this code will be run in the page */ +var code = [ "function(){", + /* call table_to_book on first table */ + "var wb = XLSX.utils.table_to_book(document.body.getElementsByTagName('table')[0]);", - var data = fs.read('sheetjs.xlsx', {mode: 'rb', charset: 'utf8'}); - var workbook = xlsx.read(data, {type: 'binary'}); - data = xlsx.utils.sheet_to_csv(workbook.Sheets['SheetJS']); - console.log("Data: " + data); + /* generate XLSB file and return binary string */ + "return XLSX.write(wb, {type: 'binary', bookType: 'xlsb'});", +"}" ].join(""); - phantom.exit(); +page.open('https://sheetjs.com/demos/table', function() { + console.log("Page Loaded"); + /* Load the browser script from the UNPKG CDN */ + page.includeJs("https://unpkg.com/xlsx/dist/xlsx.full.min.js", function() { + /* Verify the page is loaded by logging the version number */ + var version = "function(){ console.log('Library Version:' + window.XLSX.version); }"; + page.evaluateJavaScript(version); + + /* The code will return a binary string */ + var bin = page.evaluateJavaScript(code); + var workbook = XLSX.read(bin, {type: "binary"}); + console.log(XLSX.utils.sheet_to_csv(workbook.Sheets[workbook.SheetNames[0]])); + + /* XLSX.writeFile will not work here -- have to write manually */ + require("fs").write("phantomjs.xlsb", bin, "wb"); + phantom.exit(); + }); }); diff --git a/demos/headless/puppeteer.js b/demos/headless/puppeteer.js deleted file mode 100644 index 0958297..0000000 --- a/demos/headless/puppeteer.js +++ /dev/null @@ -1,14 +0,0 @@ -/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ -const puppeteer = require('puppeteer'); - -(async () => { - - const browser = await puppeteer.launch(); - const page = await browser.newPage(); - await page.goto('http://oss.sheetjs.com/sheetjs/tests/', {waitUntil: 'load'}); - await page.waitFor(30*1000); - await page.pdf({path: 'test.pdf', format: 'A4'}); - - browser.close(); -})(); - diff --git a/demos/headless/slimerjs.js b/demos/headless/slimerjs.js deleted file mode 100644 index 1d1fe16..0000000 --- a/demos/headless/slimerjs.js +++ /dev/null @@ -1,15 +0,0 @@ -/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ -var fs = require('fs'); -var xlsx = require('../../dist/xlsx.full.min'); -var page = require('webpage').create(); - -page.open('http://oss.sheetjs.com/sheetjs/tests/', function(status) { - - var data = fs.read('sheetjs.xlsx', {mode: 'rb', charset: 'utf8'}); - var workbook = xlsx.read(data, {type: 'binary'}); - data = xlsx.utils.sheet_to_csv(workbook.Sheets['SheetJS']); - console.log("Data: " + data); - - phantom.exit(); -}); - diff --git a/demos/headless/test.html b/demos/headless/test.html new file mode 100644 index 0000000..2a6f24c --- /dev/null +++ b/demos/headless/test.html @@ -0,0 +1,35 @@ + + + + + SheetJS Table Export + + + + + + + + + + + + + + + + + + + + + + + + + + + +
SheetJSTableExportTest
வணக்கம்สวัสดี你好가지마
1234
Clicktoeditcells
+ + diff --git a/demos/headless/xlsx.full.min.js b/demos/headless/xlsx.full.min.js new file mode 120000 index 0000000..dbca48d --- /dev/null +++ b/demos/headless/xlsx.full.min.js @@ -0,0 +1 @@ +../../dist/xlsx.full.min.js \ No newline at end of file diff --git a/docbits/13_usage.md b/docbits/13_usage.md index 979cdf4..5a621d1 100644 --- a/docbits/13_usage.md +++ b/docbits/13_usage.md @@ -48,7 +48,6 @@ and approaches for steps 1 and 5. Utility functions help with step 3. - ### The Zen of SheetJS _Data processing should fit in any workflow_ diff --git a/docbits/20_import.md b/docbits/20_import.md index 5c0bbac..33a879c 100644 --- a/docbits/20_import.md +++ b/docbits/20_import.md @@ -2,7 +2,7 @@ ### Parsing Workbooks -#### API +**API** _Extract data from spreadsheet bytes_ @@ -27,7 +27,7 @@ security risk), and attempts to read files in this way will throw an error. The second `opts` argument is optional. ["Parsing Options"](#parsing-options) covers the supported properties and behaviors. -#### Examples +**Examples** Here are a few common scenarios (click on each subtitle to see the code): @@ -42,7 +42,7 @@ var XLSX = require("xlsx"); var workbook = XLSX.readFile("test.xlsx"); ``` -For Node ESM, the `readFile` helper is not enabled. Instead, `fs.readFileSync` +For Node ESM, the `readFile` helper is not enabled. Instead, `fs.readFileSync` should be used to read the file data as a `Buffer` for use with `XLSX.read`: ```js @@ -423,5 +423,3 @@ const workbook = XLSX.read(data);
More detailed examples are covered in the [included demos](demos/) - - diff --git a/docbits/22_ingress.md b/docbits/22_ingress.md index 14550ce..379b3ed 100644 --- a/docbits/22_ingress.md +++ b/docbits/22_ingress.md @@ -23,7 +23,7 @@ The third argument specifies the desired worksheet name. Multiple worksheets can be added to a workbook by calling the function multiple times. -#### API +**API** _Create a worksheet from an array of arrays of JS values_ @@ -61,17 +61,68 @@ control the column order and header output. ["Array of Objects Input"](#array-of-arrays-input) describes the function and the optional `opts` argument in more detail. -#### Examples +**Examples** ["Zen of SheetJS"](#the-zen-of-sheetjs) contains a detailed example "Get Data from a JSON Endpoint and Generate a Workbook" + +[`x-spreadsheet`](https://github.com/myliang/x-spreadsheet) is an interactive +data grid for previewing and modifying structured data in the web browser. The +[`xspreadsheet` demo](/demos/xspreadsheet) includes a sample script with the +`xtos` function for converting from x-spreadsheet data object to a workbook. + is a live demo. + +
+ Records from a database query (SQL or no-SQL) (click to show) + The [`database` demo](/demos/database/) includes examples of working with databases and query results. +
+ + +
+ Numerical Computations with TensorFlow.js (click to show) + +[`@tensorflow/tfjs`](@tensorflow/tfjs) and other libraries expect data in simple +arrays, well-suited for worksheets where each column is a data vector. That is +the transpose of how most people use spreadsheets, where each row is a vector. + +When recovering data from `tfjs`, the returned data points are stored in a typed +array. An array of arrays can be constructed with loops. `Array#unshift` can +prepend a title row before the conversion: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +/* suppose xs and ys are vectors (1D tensors) -> tfarr will be a typed array */ +const tfdata = tf.stack([xs, ys]).transpose(); +const shape = tfdata.shape; +const tfarr = tfdata.dataSync(); + +/* construct the array of arrays */ +const aoa = []; +for(let j = 0; j < shape[0]; ++j) { + aoa[j] = []; + for(let i = 0; i < shape[1]; ++i) aoa[j][i] = tfarr[j * shape[1] + i]; +} +/* add headers to the top */ +aoa.unshift(["x", "y"]); + +/* generate worksheet */ +const worksheet = XLSX.utils.aoa_to_sheet(aoa); +``` + +The [`array` demo](demos/array/) shows a complete example. + +
+ + ### Processing HTML Tables -#### API +**API** _Create a worksheet by scraping an HTML TABLE in the page_ @@ -99,7 +150,7 @@ The options argument supports the same options as `table_to_sheet`, with the addition of a `sheet` property to control the worksheet name. If the property is missing or no options are specified, the default name `Sheet1` is used. -#### Examples +**Examples** Here are a few common scenarios (click on each subtitle to see the code): @@ -180,3 +231,113 @@ chrome.runtime.onMessage.addListener(function(msg, sender, cb) { +
+ Server-Side HTML Tables with Headless Chrome (click to show) + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks. The core idea is to add the script to the page, parse +the table in the page context, generate a `base64` workbook and send it back +for further processing: + +```js +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"), puppeteer = require("puppeteer"); + +const url = `https://sheetjs.com/demos/table`; + +/* get the standalone build source (node_modules/xlsx/dist/xlsx.full.min.js) */ +const lib = readFileSync(require.resolve("xlsx/dist/xlsx.full.min.js"), "utf8"); + +(async() => { + /* start browser and go to web page */ + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + await page.goto(url, {waitUntil: "networkidle2"}); + + /* inject library */ + await page.addScriptTag({content: lib}); + + /* this function `s5s` will be called by the script below, receiving the Base64-encoded file */ + await page.exposeFunction("s5s", async(b64) => { + const workbook = XLSX.read(b64, {type: "base64" }); + + /* DO SOMETHING WITH workbook HERE */ + }); + + /* generate XLSB file in webpage context and send back result */ + await page.addScriptTag({content: ` + /* call table_to_book on first table */ + var workbook = XLSX.utils.table_to_book(document.querySelector("TABLE")); + + /* generate XLSX file */ + var b64 = XLSX.write(workbook, {type: "base64", bookType: "xlsb"}); + + /* call "s5s" hook exposed from the node process */ + window.s5s(b64); + `}); + + /* cleanup */ + await browser.close(); +})(); +``` + +
+ +
+ Server-Side HTML Tables with Headless WebKit (click to show) + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks using [PhantomJS](https://phantomjs.org/). The core idea +is to add the script to the page, parse the table in the page context, generate +a `binary` workbook and send it back for further processing: + +```js +var XLSX = require('xlsx'); +var page = require('webpage').create(); + +/* this code will be run in the page */ +var code = [ "function(){", + /* call table_to_book on first table */ + "var wb = XLSX.utils.table_to_book(document.body.getElementsByTagName('table')[0]);", + + /* generate XLSB file and return binary string */ + "return XLSX.write(wb, {type: 'binary', bookType: 'xlsb'});", +"}" ].join(""); + +page.open('https://sheetjs.com/demos/table', function() { + /* Load the browser script from the UNPKG CDN */ + page.includeJs("https://unpkg.com/xlsx/dist/xlsx.full.min.js", function() { + /* The code will return an XLSB file encoded as binary string */ + var bin = page.evaluateJavaScript(code); + + var workbook = XLSX.read(bin, {type: "binary"}); + /* DO SOMETHING WITH workbook HERE */ + + phantom.exit(); + }); +}); +``` + +
+ +
+ NodeJS HTML Tables without a browser (click to show) + +NodeJS does not include a DOM implementation and Puppeteer requires a hefty +Chromium build. [`jsdom`](https://npm.im/jsdom) is a lightweight alternative: + +```js +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"); +const { JSDOM } = require("jsdom"); + +/* obtain HTML string. This example reads from test.html */ +const html_str = fs.readFileSync("test.html", "utf8"); +/* get first TABLE element */ +const doc = new JSDOM(html_str).window.document.querySelector("table"); +/* generate workbook */ +const workbook = XLSX.utils.table_to_book(doc); +``` + +
+ diff --git a/docbits/30_export.md b/docbits/30_export.md index a3d9cb8..d0ca55c 100644 --- a/docbits/30_export.md +++ b/docbits/30_export.md @@ -1,34 +1,79 @@ -## Writing Workbooks +## Packaging and Releasing Data -For writing, the first step is to generate output data. The helper functions -`write` and `writeFile` will produce the data in various formats suitable for -dissemination. The second step is to actual share the data with the end point. -Assuming `workbook` is a workbook object: +### Writing Workbooks -
- nodejs write a file (click to show) +**API** -`XLSX.writeFile` uses `fs.writeFileSync` in server environments: +_Generate spreadsheet bytes (file) from data_ ```js -if(typeof require !== 'undefined') XLSX = require('xlsx'); +var data = XLSX.write(workbook, opts); +``` + +The `write` method attempts to package data from the workbook into a file in +memory. By default, XLSX files are generated, but that can be controlled with +the `bookType` property of the `opts` argument. Based on the `type` option, +the data can be stored as a "binary string", JS string, `Uint8Array` or Buffer. + +The second `opts` argument is required. ["Writing Options"](#writing-options) +covers the supported properties and behaviors. + +_Generate and attempt to save file_ + +```js +XLSX.writeFile(workbook, filename, opts); +``` + +The `writeFile` method packages the data and attempts to save the new file. The +export file format is determined by the extension of `filename` (`SheetJS.xlsx` +signals XLSX export, `SheetJS.xlsb` signals XLSB export, etc). + +The `writeFile` method uses platform-specific APIs to initiate the file save. In +NodeJS, `fs.readFileSync` can create a file. In the web browser, a download is +attempted using the HTML5 `download` attribute, with fallbacks for IE. + +The second `opts` argument is optional. ["Writing Options"](#writing-options) +covers the supported properties and behaviors. + +**Examples** + +
+ Local file in a NodeJS server (click to show) + +`writeFile` uses `fs.writeFileSync` in server environments: + +```js +var XLSX = require("xlsx"); + /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsb'); -/* at this point, out.xlsb is a file that you can distribute */ +XLSX.writeFile(workbook, "out.xlsb"); +``` + +For Node ESM, the `writeFile` helper is not enabled. Instead, `fs.writeFileSync` +should be used to write the file data to a `Buffer` for use with `XLSX.write`: + +```js +import { writeFileSync } from "fs"; +import { write } from "xlsx/xlsx.mjs"; + +const buf = write(workbook, {type: "buffer", bookType: "xlsb"}); +/* buf is a Buffer */ +const workbook = writeFileSync("out.xlsb", buf); ```
- Photoshop ExtendScript write a file (click to show) + Local file in a PhotoShop or InDesign plugin (click to show) `writeFile` wraps the `File` logic in Photoshop and other ExtendScript targets. The specified path should be an absolute path: ```js #include "xlsx.extendscript.js" + /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsx'); +XLSX.writeFile(workbook, "out.xlsx"); /* at this point, out.xlsx is a file that you can distribute */ ``` @@ -37,44 +82,7 @@ The [`extendscript` demo](demos/extendscript/) includes a more complex example.
- Browser add TABLE element to page (click to show) - -The `sheet_to_html` utility function generates HTML code that can be added to -any DOM element. - -```js -var worksheet = workbook.Sheets[workbook.SheetNames[0]]; -var container = document.getElementById('tableau'); -container.innerHTML = XLSX.utils.sheet_to_html(worksheet); -``` - -
- -
- Browser upload file (ajax) (click to show) - -A complete example using XHR is [included in the XHR demo](demos/xhr/), along -with examples for fetch and wrapper libraries. This example assumes the server -can handle Base64-encoded files (see the demo for a basic nodejs server): - -```js -/* in this example, send a base64 string to the server */ -var wopts = { bookType:'xlsx', bookSST:false, type:'base64' }; - -var wbout = XLSX.write(workbook,wopts); - -var req = new XMLHttpRequest(); -req.open("POST", "/upload", true); -var formdata = new FormData(); -formdata.append('file', 'test.xlsx'); // <-- server expects `file` to hold name -formdata.append('data', wbout); // <-- `data` holds the base64-encoded data -req.send(formdata); -``` - -
- -
- Browser save file (click to show) + Download a file in the browser to the user machine (click to show) `XLSX.writeFile` wraps a few techniques for triggering a file save: @@ -88,14 +96,14 @@ There is no standard way to determine if the actual file has been downloaded. ```js /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsb'); +XLSX.writeFile(workbook, "out.xlsb"); /* at this point, out.xlsb will have been downloaded */ ```
- Browser save file (compatibility) (click to show) + Download a file in legacy browsers (click to show) `XLSX.writeFile` techniques work for most modern browsers as well as older IE. For much older browsers, there are workarounds implemented by wrapper libraries. @@ -105,7 +113,7 @@ Note: `XLSX.writeFile` will automatically call `saveAs` if available. ```js /* bookType can be any supported output type */ -var wopts = { bookType:'xlsx', bookSST:false, type:'array' }; +var wopts = { bookType:"xlsx", bookSST:false, type:"array" }; var wbout = XLSX.write(workbook,wopts); @@ -118,11 +126,11 @@ to generate local files, suitable for environments where ActiveX is unavailable: ```js Downloadify.create(id,{ - /* other options are required! read the downloadify docs for more info */ - filename: "test.xlsx", - data: function() { return XLSX.write(wb, {bookType:"xlsx", type:'base64'}); }, - append: false, - dataType: 'base64' + /* other options are required! read the downloadify docs for more info */ + filename: "test.xlsx", + data: function() { return XLSX.write(wb, {bookType:"xlsx", type:"base64"}); }, + append: false, + dataType: "base64" }); ``` @@ -130,6 +138,54 @@ The [`oldie` demo](demos/oldie/) shows an IE-compatible fallback scenario.
+
+ Browser upload file (ajax) (click to show) + +A complete example using XHR is [included in the XHR demo](demos/xhr/), along +with examples for fetch and wrapper libraries. This example assumes the server +can handle Base64-encoded files (see the demo for a basic nodejs server): + +```js +/* in this example, send a base64 string to the server */ +var wopts = { bookType:"xlsx", bookSST:false, type:"base64" }; + +var wbout = XLSX.write(workbook,wopts); + +var req = new XMLHttpRequest(); +req.open("POST", "/upload", true); +var formdata = new FormData(); +formdata.append("file", "test.xlsx"); // <-- server expects `file` to hold name +formdata.append("data", wbout); // <-- `data` holds the base64-encoded data +req.send(formdata); +``` + +
+ +
+ PhantomJS (Headless Webkit) File Generation (click to show) + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks using [PhantomJS](https://phantomjs.org/). PhantomJS +`fs.write` supports writing files from the main process but has a different +interface from the NodeJS `fs` module: + +```js +var XLSX = require('xlsx'); +var fs = require('fs'); + +/* generate a binary string */ +var bin = XLSX.write(workbook, { type:"binary", bookType: "xlsx" }); +/* write to file */ +fs.write("test.xlsx", bin, "wb"); +``` + +Note: The section ["Processing HTML Tables"](#processing-html-tables) shows how +to generate a workbook from HTML tables in a page in "Headless WebKit". + +
+ + + The [included demos](demos/) cover mobile apps and other special deployments. ### Writing Examples @@ -137,3 +193,42 @@ The [included demos](demos/) cover mobile apps and other special deployments. - exporting an HTML table - generates a simple file +### Streaming Write + +The streaming write functions are available in the `XLSX.stream` object. They +take the same arguments as the normal write functions but return a Readable +Stream. They are only exposed in NodeJS. + +- `XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. +- `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`. +- `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`. + +
+ nodejs convert to CSV and write file (click to show) + +```js +var output_file_name = "out.csv"; +var stream = XLSX.stream.to_csv(worksheet); +stream.pipe(fs.createWriteStream(output_file_name)); +``` + +
+ +
+ nodejs write JSON stream to screen (click to show) + +```js +/* to_json returns an object-mode stream */ +var stream = XLSX.stream.to_json(worksheet, {raw:true}); + +/* the following stream converts JS objects to text via JSON.stringify */ +var conv = new Transform({writableObjectMode:true}); +conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); }; + +stream.pipe(conv); conv.pipe(process.stdout); +``` + +
+ + pipes write streams to nodejs response. + diff --git a/docbits/31_writestream.md b/docbits/31_writestream.md deleted file mode 100644 index defbee4..0000000 --- a/docbits/31_writestream.md +++ /dev/null @@ -1,39 +0,0 @@ -### Streaming Write - -The streaming write functions are available in the `XLSX.stream` object. They -take the same arguments as the normal write functions but return a Readable -Stream. They are only exposed in NodeJS. - -- `XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. -- `XLSX.stream.to_html` is the streaming version of `XLSX.utils.sheet_to_html`. -- `XLSX.stream.to_json` is the streaming version of `XLSX.utils.sheet_to_json`. - -
- nodejs convert to CSV and write file (click to show) - -```js -var output_file_name = "out.csv"; -var stream = XLSX.stream.to_csv(worksheet); -stream.pipe(fs.createWriteStream(output_file_name)); -``` - -
- -
- nodejs write JSON stream to screen (click to show) - -```js -/* to_json returns an object-mode stream */ -var stream = XLSX.stream.to_json(worksheet, {raw:true}); - -/* the following stream converts JS objects to text via JSON.stringify */ -var conv = new Transform({writableObjectMode:true}); -conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); }; - -stream.pipe(conv); conv.pipe(process.stdout); -``` - -
- - pipes write streams to nodejs response. - diff --git a/docbits/32_egress.md b/docbits/32_egress.md new file mode 100644 index 0000000..562942d --- /dev/null +++ b/docbits/32_egress.md @@ -0,0 +1,217 @@ +### Generating JSON and JS Data + +JSON and JS data tend to represent single worksheets. The utility functions in +this section work with single worksheets. + +The ["Common Spreadsheet Format"](#common-spreadsheet-format) section describes +the object structure in more detail. `workbook.SheetNames` is an ordered list +of the worksheet names. `workbook.Sheets` is an object whose keys are sheet +names and whose values are worksheet objects. + +The "first worksheet" is stored at `workbook.Sheets[workbook.SheetNames[0]]`. + +**API** + +_Create an array of JS objects from a worksheet_ + +```js +var jsa = XLSX.utils.sheet_to_json(worksheet, opts); +``` + +_Create an array of arrays of JS values from a worksheet_ + +```js +var aoa = XLSX.utils.sheet_to_json(worksheet, {...opts, header: 1}); +``` + +The `sheet_to_json` utility function walks a workbook in row-major order, +generating an array of objects. The second `opts` argument controls a number of +export decisions including the type of values (JS values or formatted text). The +["JSON"](#json) section describes the argument in more detail. + +By default, `sheet_to_json` scans the first row and uses the values as headers. +With the `header: 1` option, the function exports an array of arrays of values. + +**Examples** + +[`x-spreadsheet`](https://github.com/myliang/x-spreadsheet) is an interactive +data grid for previewing and modifying structured data in the web browser. The +[`xspreadsheet` demo](/demos/xspreadsheet) includes a sample script with the +`stox` function for converting from a workbook to x-spreadsheet data object. + is a live demo. + +
+ Populating a database (SQL or no-SQL) (click to show) + +The [`database` demo](/demos/database/) includes examples of working with +databases and query results. + +
+ +
+ Numerical Computations with TensorFlow.js (click to show) + +[`@tensorflow/tfjs`](@tensorflow/tfjs) and other libraries expect data in simple +arrays, well-suited for worksheets where each column is a data vector. That is +the transpose of how most people use spreadsheets, where each row is a vector. + +A single `Array#map` can pull individual named rows from `sheet_to_json` export: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +const key = "age"; // this is the field we want to pull +const ages = XLSX.utils.sheet_to_json(worksheet).map(r => r[key]); +const tf_data = tf.tensor1d(ages); +``` + +All fields can be processed at once using a transpose of the 2D tensor generated +with the `sheet_to_json` export with `header: 1`. The first row, if it contains +header labels, should be removed with a slice: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +/* array of arrays of the data starting on the second row */ +const aoa = XLSX.utils.sheet_to_json(worksheet, {header: 1}).slice(1); +/* dataset in the "correct orientation" */ +const tf_dataset = tf.tensor2d(aoa).transpose(); +/* pull out each dataset with a slice */ +const tf_field0 = tf_dataset.slice([0,0], [1,tensor.shape[1]]).flatten(); +const tf_field1 = tf_dataset.slice([1,0], [1,tensor.shape[1]]).flatten(); +``` + +The [`array` demo](demos/array/) shows a complete example. + +
+ + +### Generating HTML Tables + +**API** + +_Generate HTML Table from Worksheet_ + +```js +var html = XLSX.utils.sheet_to_html(worksheet); +``` + +The `sheet_to_html` utility function generates HTML code based on the worksheet +data. Each cell in the worksheet is mapped to a `` element. Merged cells +in the worksheet are serialized by setting `colspan` and `rowspan` attributes. + +**Examples** + +The `sheet_to_html` utility function generates HTML code that can be added to +any DOM element by setting the `innerHTML`: + +```js +var container = document.getElementById("tavolo"); +container.innerHTML = XLSX.utils.sheet_to_html(worksheet); +``` + +Combining with `fetch`, constructing a site from a workbook is straightforward: + +
+ Vanilla JS + HTML fetch workbook and generate table previews (click to show) + +```html + + +
+ + + +``` + +
+ +
+ React fetch workbook and generate HTML table previews (click to show) + +It is generally recommended to use a React-friendly workflow, but it is possible +to generate HTML and use it in React with `dangerouslySetInnerHTML`: + +```jsx +function Tabeller(props) { + /* the workbook object is the state */ + const [workbook, setWorkbook] = React.useState(XLSX.utils.book_new()); + + /* fetch and update the workbook with an effect */ + React.useEffect(() => { (async() => { + /* fetch and parse workbook -- see the fetch example for details */ + const wb = XLSX.read(await (await fetch("sheetjs.xlsx")).arrayBuffer()); + setWorkbook(wb); + })(); }); + + return workbook.SheetNames.map(name => (<> +

name

+
+ )); +} +``` + +The [`react` demo](demos/react) includes more React examples. + +
+ +
+ VueJS fetch workbook and generate HTML table previews (click to show) + +It is generally recommended to use a VueJS-friendly workflow, but it is possible +to generate HTML and use it in VueJS with the `v-html` directive: + +```jsx +import { read, utils } from 'xlsx'; +import { reactive } from 'vue'; + +const S5SComponent = { + mounted() { (async() => { + /* fetch and parse workbook -- see the fetch example for details */ + const workbook = read(await (await fetch("sheetjs.xlsx")).arrayBuffer()); + /* loop through the worksheet names in order */ + workbook.SheetNames.forEach(name => { + /* generate HTML from the corresponding worksheets */ + const html = utils.sheet_to_html(workbook.Sheets[name]); + /* add to state */ + this.wb.wb.push({ name, html }); + }); + })(); }, + /* this state mantra is required for array updates to work */ + setup() { return { wb: reactive({ wb: [] }) }; }, + template: ` +
+

{{ ws.name }}

+
+
` +}; +``` + +The [`vuejs` demo](demos/vue) includes more React examples. + +
+ diff --git a/misc/docs/README.md b/misc/docs/README.md index cb54f59..332265e 100644 --- a/misc/docs/README.md +++ b/misc/docs/README.md @@ -40,19 +40,16 @@ port calculations to web apps; automate common spreadsheet tasks, and much more! * [JS Ecosystem Demos](#js-ecosystem-demos) - [Acquiring and Extracting Data](#acquiring-and-extracting-data) * [Parsing Workbooks](#parsing-workbooks) - + [API](#api) - + [Examples](#examples) * [Processing JSON and JS Data](#processing-json-and-js-data) - + [API](#api-1) - + [Examples](#examples-1) * [Processing HTML Tables](#processing-html-tables) - + [API](#api-2) - + [Examples](#examples-2) - [Working with the Workbook](#working-with-the-workbook) * [Parsing and Writing Examples](#parsing-and-writing-examples) -- [Writing Workbooks](#writing-workbooks) +- [Packaging and Releasing Data](#packaging-and-releasing-data) + * [Writing Workbooks](#writing-workbooks) * [Writing Examples](#writing-examples) * [Streaming Write](#streaming-write) + * [Generating JSON and JS Data](#generating-json-and-js-data) + * [Generating HTML Tables](#generating-html-tables) - [Interface](#interface) * [Parsing functions](#parsing-functions) * [Writing functions](#writing-functions) @@ -249,7 +246,6 @@ and approaches for steps 1 and 5. Utility functions help with step 3. - ### The Zen of SheetJS _Data processing should fit in any workflow_ @@ -508,7 +504,7 @@ Other examples are included in the [showcase](demos/showcase/). ### Parsing Workbooks -#### API +**API** _Extract data from spreadsheet bytes_ @@ -533,7 +529,7 @@ security risk), and attempts to read files in this way will throw an error. The second `opts` argument is optional. ["Parsing Options"](#parsing-options) covers the supported properties and behaviors. -#### Examples +**Examples** Here are a few common scenarios (click on each subtitle to see the code): @@ -546,7 +542,7 @@ var XLSX = require("xlsx"); var workbook = XLSX.readFile("test.xlsx"); ``` -For Node ESM, the `readFile` helper is not enabled. Instead, `fs.readFileSync` +For Node ESM, the `readFile` helper is not enabled. Instead, `fs.readFileSync` should be used to read the file data as a `Buffer` for use with `XLSX.read`: ```js @@ -893,8 +889,6 @@ const workbook = XLSX.read(data); More detailed examples are covered in the [included demos](demos/) - - ### Processing JSON and JS Data JSON and JS data tend to represent single worksheets. This section will use a @@ -920,7 +914,7 @@ The third argument specifies the desired worksheet name. Multiple worksheets can be added to a workbook by calling the function multiple times. -#### API +**API** _Create a worksheet from an array of arrays of JS values_ @@ -958,17 +952,62 @@ control the column order and header output. ["Array of Objects Input"](#array-of-arrays-input) describes the function and the optional `opts` argument in more detail. -#### Examples +**Examples** ["Zen of SheetJS"](#the-zen-of-sheetjs) contains a detailed example "Get Data from a JSON Endpoint and Generate a Workbook" + +[`x-spreadsheet`](https://github.com/myliang/x-spreadsheet) is an interactive +data grid for previewing and modifying structured data in the web browser. The +[`xspreadsheet` demo](/demos/xspreadsheet) includes a sample script with the +`xtos` function for converting from x-spreadsheet data object to a workbook. + is a live demo. + + The [`database` demo](/demos/database/) includes examples of working with databases and query results. + + + +[`@tensorflow/tfjs`](@tensorflow/tfjs) and other libraries expect data in simple +arrays, well-suited for worksheets where each column is a data vector. That is +the transpose of how most people use spreadsheets, where each row is a vector. + +When recovering data from `tfjs`, the returned data points are stored in a typed +array. An array of arrays can be constructed with loops. `Array#unshift` can +prepend a title row before the conversion: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +/* suppose xs and ys are vectors (1D tensors) -> tfarr will be a typed array */ +const tfdata = tf.stack([xs, ys]).transpose(); +const shape = tfdata.shape; +const tfarr = tfdata.dataSync(); + +/* construct the array of arrays */ +const aoa = []; +for(let j = 0; j < shape[0]; ++j) { + aoa[j] = []; + for(let i = 0; i < shape[1]; ++i) aoa[j][i] = tfarr[j * shape[1] + i]; +} +/* add headers to the top */ +aoa.unshift(["x", "y"]); + +/* generate worksheet */ +const worksheet = XLSX.utils.aoa_to_sheet(aoa); +``` + +The [`array` demo](demos/array/) shows a complete example. + + + ### Processing HTML Tables -#### API +**API** _Create a worksheet by scraping an HTML TABLE in the page_ @@ -996,7 +1035,7 @@ The options argument supports the same options as `table_to_sheet`, with the addition of a `sheet` property to control the worksheet name. If the property is missing or no options are specified, the default name `Sheet1` is used. -#### Examples +**Examples** Here are a few common scenarios (click on each subtitle to see the code): @@ -1071,6 +1110,107 @@ chrome.runtime.onMessage.addListener(function(msg, sender, cb) { ``` + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks. The core idea is to add the script to the page, parse +the table in the page context, generate a `base64` workbook and send it back +for further processing: + +```js +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"), puppeteer = require("puppeteer"); + +const url = `https://sheetjs.com/demos/table`; + +/* get the standalone build source (node_modules/xlsx/dist/xlsx.full.min.js) */ +const lib = readFileSync(require.resolve("xlsx/dist/xlsx.full.min.js"), "utf8"); + +(async() => { + /* start browser and go to web page */ + const browser = await puppeteer.launch(); + const page = await browser.newPage(); + await page.goto(url, {waitUntil: "networkidle2"}); + + /* inject library */ + await page.addScriptTag({content: lib}); + + /* this function `s5s` will be called by the script below, receiving the Base64-encoded file */ + await page.exposeFunction("s5s", async(b64) => { + const workbook = XLSX.read(b64, {type: "base64" }); + + /* DO SOMETHING WITH workbook HERE */ + }); + + /* generate XLSB file in webpage context and send back result */ + await page.addScriptTag({content: ` + /* call table_to_book on first table */ + var workbook = XLSX.utils.table_to_book(document.querySelector("TABLE")); + + /* generate XLSX file */ + var b64 = XLSX.write(workbook, {type: "base64", bookType: "xlsb"}); + + /* call "s5s" hook exposed from the node process */ + window.s5s(b64); + `}); + + /* cleanup */ + await browser.close(); +})(); +``` + + + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks using [PhantomJS](https://phantomjs.org/). The core idea +is to add the script to the page, parse the table in the page context, generate +a `binary` workbook and send it back for further processing: + +```js +var XLSX = require('xlsx'); +var page = require('webpage').create(); + +/* this code will be run in the page */ +var code = [ "function(){", + /* call table_to_book on first table */ + "var wb = XLSX.utils.table_to_book(document.body.getElementsByTagName('table')[0]);", + + /* generate XLSB file and return binary string */ + "return XLSX.write(wb, {type: 'binary', bookType: 'xlsb'});", +"}" ].join(""); + +page.open('https://sheetjs.com/demos/table', function() { + /* Load the browser script from the UNPKG CDN */ + page.includeJs("https://unpkg.com/xlsx/dist/xlsx.full.min.js", function() { + /* The code will return an XLSB file encoded as binary string */ + var bin = page.evaluateJavaScript(code); + + var workbook = XLSX.read(bin, {type: "binary"}); + /* DO SOMETHING WITH workbook HERE */ + + phantom.exit(); + }); +}); +``` + + + +NodeJS does not include a DOM implementation and Puppeteer requires a hefty +Chromium build. [`jsdom`](https://npm.im/jsdom) is a lightweight alternative: + +```js +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"); +const { JSDOM } = require("jsdom"); + +/* obtain HTML string. This example reads from test.html */ +const html_str = fs.readFileSync("test.html", "utf8"); +/* get first TABLE element */ +const doc = new JSDOM(html_str).window.document.querySelector("table"); +/* generate workbook */ +const workbook = XLSX.utils.table_to_book(doc); +``` + + ## Working with the Workbook The full object format is described later in this README. @@ -1145,21 +1285,65 @@ Some helper functions in `XLSX.utils` generate different views of the sheets: - `XLSX.utils.sheet_to_json` generates an array of objects - `XLSX.utils.sheet_to_formulae` generates a list of formulae -## Writing Workbooks +## Packaging and Releasing Data -For writing, the first step is to generate output data. The helper functions -`write` and `writeFile` will produce the data in various formats suitable for -dissemination. The second step is to actual share the data with the end point. -Assuming `workbook` is a workbook object: +### Writing Workbooks +**API** -`XLSX.writeFile` uses `fs.writeFileSync` in server environments: +_Generate spreadsheet bytes (file) from data_ ```js -if(typeof require !== 'undefined') XLSX = require('xlsx'); +var data = XLSX.write(workbook, opts); +``` + +The `write` method attempts to package data from the workbook into a file in +memory. By default, XLSX files are generated, but that can be controlled with +the `bookType` property of the `opts` argument. Based on the `type` option, +the data can be stored as a "binary string", JS string, `Uint8Array` or Buffer. + +The second `opts` argument is required. ["Writing Options"](#writing-options) +covers the supported properties and behaviors. + +_Generate and attempt to save file_ + +```js +XLSX.writeFile(workbook, filename, opts); +``` + +The `writeFile` method packages the data and attempts to save the new file. The +export file format is determined by the extension of `filename` (`SheetJS.xlsx` +signals XLSX export, `SheetJS.xlsb` signals XLSB export, etc). + +The `writeFile` method uses platform-specific APIs to initiate the file save. In +NodeJS, `fs.readFileSync` can create a file. In the web browser, a download is +attempted using the HTML5 `download` attribute, with fallbacks for IE. + +The second `opts` argument is optional. ["Writing Options"](#writing-options) +covers the supported properties and behaviors. + +**Examples** + + +`writeFile` uses `fs.writeFileSync` in server environments: + +```js +var XLSX = require("xlsx"); + /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsb'); -/* at this point, out.xlsb is a file that you can distribute */ +XLSX.writeFile(workbook, "out.xlsb"); +``` + +For Node ESM, the `writeFile` helper is not enabled. Instead, `fs.writeFileSync` +should be used to write the file data to a `Buffer` for use with `XLSX.write`: + +```js +import { writeFileSync } from "fs"; +import { write } from "xlsx/xlsx.mjs"; + +const buf = write(workbook, {type: "buffer", bookType: "xlsb"}); +/* buf is a Buffer */ +const workbook = writeFileSync("out.xlsb", buf); ``` @@ -1169,8 +1353,9 @@ The specified path should be an absolute path: ```js #include "xlsx.extendscript.js" + /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsx'); +XLSX.writeFile(workbook, "out.xlsx"); /* at this point, out.xlsx is a file that you can distribute */ ``` @@ -1178,37 +1363,6 @@ The [`extendscript` demo](demos/extendscript/) includes a more complex example. -The `sheet_to_html` utility function generates HTML code that can be added to -any DOM element. - -```js -var worksheet = workbook.Sheets[workbook.SheetNames[0]]; -var container = document.getElementById('tableau'); -container.innerHTML = XLSX.utils.sheet_to_html(worksheet); -``` - - - -A complete example using XHR is [included in the XHR demo](demos/xhr/), along -with examples for fetch and wrapper libraries. This example assumes the server -can handle Base64-encoded files (see the demo for a basic nodejs server): - -```js -/* in this example, send a base64 string to the server */ -var wopts = { bookType:'xlsx', bookSST:false, type:'base64' }; - -var wbout = XLSX.write(workbook,wopts); - -var req = new XMLHttpRequest(); -req.open("POST", "/upload", true); -var formdata = new FormData(); -formdata.append('file', 'test.xlsx'); // <-- server expects `file` to hold name -formdata.append('data', wbout); // <-- `data` holds the base64-encoded data -req.send(formdata); -``` - - - `XLSX.writeFile` wraps a few techniques for triggering a file save: - `URL` browser API creates an object URL for the file, which the library uses @@ -1221,7 +1375,7 @@ There is no standard way to determine if the actual file has been downloaded. ```js /* output format determined by filename */ -XLSX.writeFile(workbook, 'out.xlsb'); +XLSX.writeFile(workbook, "out.xlsb"); /* at this point, out.xlsb will have been downloaded */ ``` @@ -1235,7 +1389,7 @@ Note: `XLSX.writeFile` will automatically call `saveAs` if available. ```js /* bookType can be any supported output type */ -var wopts = { bookType:'xlsx', bookSST:false, type:'array' }; +var wopts = { bookType:"xlsx", bookSST:false, type:"array" }; var wbout = XLSX.write(workbook,wopts); @@ -1248,17 +1402,59 @@ to generate local files, suitable for environments where ActiveX is unavailable: ```js Downloadify.create(id,{ - /* other options are required! read the downloadify docs for more info */ - filename: "test.xlsx", - data: function() { return XLSX.write(wb, {bookType:"xlsx", type:'base64'}); }, - append: false, - dataType: 'base64' + /* other options are required! read the downloadify docs for more info */ + filename: "test.xlsx", + data: function() { return XLSX.write(wb, {bookType:"xlsx", type:"base64"}); }, + append: false, + dataType: "base64" }); ``` The [`oldie` demo](demos/oldie/) shows an IE-compatible fallback scenario. + +A complete example using XHR is [included in the XHR demo](demos/xhr/), along +with examples for fetch and wrapper libraries. This example assumes the server +can handle Base64-encoded files (see the demo for a basic nodejs server): + +```js +/* in this example, send a base64 string to the server */ +var wopts = { bookType:"xlsx", bookSST:false, type:"base64" }; + +var wbout = XLSX.write(workbook,wopts); + +var req = new XMLHttpRequest(); +req.open("POST", "/upload", true); +var formdata = new FormData(); +formdata.append("file", "test.xlsx"); // <-- server expects `file` to hold name +formdata.append("data", wbout); // <-- `data` holds the base64-encoded data +req.send(formdata); +``` + + + +The [`headless` demo](demos/headless/) includes a complete demo to convert HTML +files to XLSB workbooks using [PhantomJS](https://phantomjs.org/). PhantomJS +`fs.write` supports writing files from the main process but has a different +interface from the NodeJS `fs` module: + +```js +var XLSX = require('xlsx'); +var fs = require('fs'); + +/* generate a binary string */ +var bin = XLSX.write(workbook, { type:"binary", bookType: "xlsx" }); +/* write to file */ +fs.write("test.xlsx", bin, "wb"); +``` + +Note: The section ["Processing HTML Tables"](#processing-html-tables) shows how +to generate a workbook from HTML tables in a page in "Headless WebKit". + + + + The [included demos](demos/) cover mobile apps and other special deployments. ### Writing Examples @@ -1299,6 +1495,208 @@ stream.pipe(conv); conv.pipe(process.stdout); pipes write streams to nodejs response. +### Generating JSON and JS Data + +JSON and JS data tend to represent single worksheets. The utility functions in +this section work with single worksheets. + +The ["Common Spreadsheet Format"](#common-spreadsheet-format) section describes +the object structure in more detail. `workbook.SheetNames` is an ordered list +of the worksheet names. `workbook.Sheets` is an object whose keys are sheet +names and whose values are worksheet objects. + +The "first worksheet" is stored at `workbook.Sheets[workbook.SheetNames[0]]`. + +**API** + +_Create an array of JS objects from a worksheet_ + +```js +var jsa = XLSX.utils.sheet_to_json(worksheet, opts); +``` + +_Create an array of arrays of JS values from a worksheet_ + +```js +var aoa = XLSX.utils.sheet_to_json(worksheet, {...opts, header: 1}); +``` + +The `sheet_to_json` utility function walks a workbook in row-major order, +generating an array of objects. The second `opts` argument controls a number of +export decisions including the type of values (JS values or formatted text). The +["JSON"](#json) section describes the argument in more detail. + +By default, `sheet_to_json` scans the first row and uses the values as headers. +With the `header: 1` option, the function exports an array of arrays of values. + +**Examples** + +[`x-spreadsheet`](https://github.com/myliang/x-spreadsheet) is an interactive +data grid for previewing and modifying structured data in the web browser. The +[`xspreadsheet` demo](/demos/xspreadsheet) includes a sample script with the +`stox` function for converting from a workbook to x-spreadsheet data object. + is a live demo. + + +The [`database` demo](/demos/database/) includes examples of working with +databases and query results. + + + +[`@tensorflow/tfjs`](@tensorflow/tfjs) and other libraries expect data in simple +arrays, well-suited for worksheets where each column is a data vector. That is +the transpose of how most people use spreadsheets, where each row is a vector. + +A single `Array#map` can pull individual named rows from `sheet_to_json` export: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +const key = "age"; // this is the field we want to pull +const ages = XLSX.utils.sheet_to_json(worksheet).map(r => r[key]); +const tf_data = tf.tensor1d(ages); +``` + +All fields can be processed at once using a transpose of the 2D tensor generated +with the `sheet_to_json` export with `header: 1`. The first row, if it contains +header labels, should be removed with a slice: + +```js +const XLSX = require("xlsx"); +const tf = require('@tensorflow/tfjs'); + +/* array of arrays of the data starting on the second row */ +const aoa = XLSX.utils.sheet_to_json(worksheet, {header: 1}).slice(1); +/* dataset in the "correct orientation" */ +const tf_dataset = tf.tensor2d(aoa).transpose(); +/* pull out each dataset with a slice */ +const tf_field0 = tf_dataset.slice([0,0], [1,tensor.shape[1]]).flatten(); +const tf_field1 = tf_dataset.slice([1,0], [1,tensor.shape[1]]).flatten(); +``` + +The [`array` demo](demos/array/) shows a complete example. + + + +### Generating HTML Tables + +**API** + +_Generate HTML Table from Worksheet_ + +```js +var html = XLSX.utils.sheet_to_html(worksheet); +``` + +The `sheet_to_html` utility function generates HTML code based on the worksheet +data. Each cell in the worksheet is mapped to a `` element. Merged cells +in the worksheet are serialized by setting `colspan` and `rowspan` attributes. + +**Examples** + +The `sheet_to_html` utility function generates HTML code that can be added to +any DOM element by setting the `innerHTML`: + +```js +var container = document.getElementById("tavolo"); +container.innerHTML = XLSX.utils.sheet_to_html(worksheet); +``` + +Combining with `fetch`, constructing a site from a workbook is straightforward: + + +```html + + +
+ + + +``` + + + +It is generally recommended to use a React-friendly workflow, but it is possible +to generate HTML and use it in React with `dangerouslySetInnerHTML`: + +```jsx +function Tabeller(props) { + /* the workbook object is the state */ + const [workbook, setWorkbook] = React.useState(XLSX.utils.book_new()); + + /* fetch and update the workbook with an effect */ + React.useEffect(() => { (async() => { + /* fetch and parse workbook -- see the fetch example for details */ + const wb = XLSX.read(await (await fetch("sheetjs.xlsx")).arrayBuffer()); + setWorkbook(wb); + })(); }); + + return workbook.SheetNames.map(name => (<> +

name

+
+ )); +} +``` + +The [`react` demo](demos/react) includes more React examples. + + + +It is generally recommended to use a VueJS-friendly workflow, but it is possible +to generate HTML and use it in VueJS with the `v-html` directive: + +```jsx +import { read, utils } from 'xlsx'; +import { reactive } from 'vue'; + +const S5SComponent = { + mounted() { (async() => { + /* fetch and parse workbook -- see the fetch example for details */ + const workbook = read(await (await fetch("sheetjs.xlsx")).arrayBuffer()); + /* loop through the worksheet names in order */ + workbook.SheetNames.forEach(name => { + /* generate HTML from the corresponding worksheets */ + const html = utils.sheet_to_html(workbook.Sheets[name]); + /* add to state */ + this.wb.wb.push({ name, html }); + }); + })(); }, + /* this state mantra is required for array updates to work */ + setup() { return { wb: reactive({ wb: [] }) }; }, + template: ` +
+

{{ ws.name }}

+
+
` +}; +``` + +The [`vuejs` demo](demos/vue) includes more React examples. + + ## Interface `XLSX` is the exposed variable in the browser and the exported node variable diff --git a/misc/docs/SUMMARY.md b/misc/docs/SUMMARY.md index 186a8ab..5bed99b 100644 --- a/misc/docs/SUMMARY.md +++ b/misc/docs/SUMMARY.md @@ -8,19 +8,16 @@ * [JS Ecosystem Demos](README.md#js-ecosystem-demos) - [Acquiring and Extracting Data](README.md#acquiring-and-extracting-data) * [Parsing Workbooks](README.md#parsing-workbooks) - + [API](README.md#api) - + [Examples](README.md#examples) * [Processing JSON and JS Data](README.md#processing-json-and-js-data) - + [API](README.md#api-1) - + [Examples](README.md#examples-1) * [Processing HTML Tables](README.md#processing-html-tables) - + [API](README.md#api-2) - + [Examples](README.md#examples-2) - [Working with the Workbook](README.md#working-with-the-workbook) * [Parsing and Writing Examples](README.md#parsing-and-writing-examples) -- [Writing Workbooks](README.md#writing-workbooks) +- [Packaging and Releasing Data](README.md#packaging-and-releasing-data) + * [Writing Workbooks](README.md#writing-workbooks) * [Writing Examples](README.md#writing-examples) * [Streaming Write](README.md#streaming-write) + * [Generating JSON and JS Data](README.md#generating-json-and-js-data) + * [Generating HTML Tables](README.md#generating-html-tables) - [Interface](README.md#interface) * [Parsing functions](README.md#parsing-functions) * [Writing functions](README.md#writing-functions) diff --git a/types/index.d.ts b/types/index.d.ts index 080de11..d2801b4 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -22,7 +22,7 @@ export function writeFile(data: WorkBook, filename: string, opts?: WritingOption type CBFunc = () => void; export function writeFileAsync(filename: string, data: WorkBook, opts: WritingOptions | CBFunc, cb?: CBFunc): any; /** Attempts to write the workbook data */ -export function write(data: WorkBook, opts?: WritingOptions): any; +export function write(data: WorkBook, opts: WritingOptions): any; /** Utility Functions */ export const utils: XLSX$Utils; @@ -261,7 +261,7 @@ export interface WorkBook { Props?: FullProperties; /** Custom workbook Properties */ - Custprops?: any; + Custprops?: object; Workbook?: WBProps;