From a36dee9eeb854cb129de5989e5b6b171f335c032 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Thu, 18 May 2023 18:41:23 -0400 Subject: [PATCH] synthetic-dom --- .../03-net/{09-headless.md => 08-headless.md} | 0 docz/docs/03-demos/03-net/09-dom.md | 186 +++++++++ docz/docs/06-solutions/01-input.md | 23 +- docz/docs/07-csf/07-features/06-nf.md | 64 ++- docz/docs/08-api/07-utilities/07-html.md | 388 ++++++++++++++++++ docz/docs/08-api/07-utilities/index.md | 180 +------- docz/docs/08-api/index.md | 10 +- docz/static/dom/SheetJSCheerio.js | 23 ++ docz/static/dom/SheetJSTable.html | 46 +++ 9 files changed, 701 insertions(+), 219 deletions(-) rename docz/docs/03-demos/03-net/{09-headless.md => 08-headless.md} (100%) create mode 100644 docz/docs/03-demos/03-net/09-dom.md create mode 100644 docz/docs/08-api/07-utilities/07-html.md create mode 100644 docz/static/dom/SheetJSCheerio.js create mode 100644 docz/static/dom/SheetJSTable.html diff --git a/docz/docs/03-demos/03-net/09-headless.md b/docz/docs/03-demos/03-net/08-headless.md similarity index 100% rename from docz/docs/03-demos/03-net/09-headless.md rename to docz/docs/03-demos/03-net/08-headless.md diff --git a/docz/docs/03-demos/03-net/09-dom.md b/docz/docs/03-demos/03-net/09-dom.md new file mode 100644 index 0000000..4a9b00e --- /dev/null +++ b/docz/docs/03-demos/03-net/09-dom.md @@ -0,0 +1,186 @@ +--- +title: Synthetic DOM +--- + +import current from '/version.js'; +import CodeBlock from '@theme/CodeBlock'; + +`table_to_book` / `table_to_sheet` / `sheet_add_dom` act on HTML DOM elements. +Traditionally there is no DOM in server-side environments. + +:::note + +The most robust approach for server-side processing is to automate a headless +web browser. ["Browser Automation"](/docs/demos/net/headless) includes demos. + +::: + +This demo covers synthetic DOM implementations for non-browser platforms. + +## NodeJS + +### JSDOM + +JSDOM is a DOM implementation for NodeJS. Given an HTML string, a reference to +the table element plays nice with the SheetJS DOM methods: + +```js +const XLSX = require("xlsx"); +const { JSDOM } = require("jsdom"); + +/* parse HTML */ +const dom = new JSDOM(html_string); +/* get first TABLE element */ +const tbl = dom.window.document.querySelector("table"); +/* generate workbook */ +const workbook = XLSX.utils.table_to_book(tbl); +XLSX.writeFile(workbook, "SheetJSDOM.xlsx"); +``` + +
Complete Demo (click to hide) + +:::note + +This demo was last tested on 2023 May 18 against JSDOM `22.0.0` + +::: + +1) Install SheetJS and JSDOM libraries: + +{`\ +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz jsdom@22.0.0`} + + +2) Save the following script to `SheetJSDOM.js`: + +```js title="SheetJSDOM.js" +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"); +const { JSDOM } = require("jsdom"); + +/* obtain HTML string. This example reads from SheetJSTable.html */ +const html_str = readFileSync("SheetJSTable.html", "utf8"); +/* get first TABLE element */ +const doc = new JSDOM(html_str).window.document.querySelector("table"); +/* generate workbook */ +const workbook = XLSX.utils.table_to_book(doc); +XLSX.writeFile(workbook, "SheetJSDOM.xlsx"); +``` + +3) Download [the sample `SheetJSTable.html`](pathname:///dom/SheetJSTable.html): + +```bash +curl -LO https://docs.sheetjs.com/dom/SheetJSTable.html +``` + +4) Run the script: + +```bash +node SheetJSDOM.js +``` + +The script will create a file `SheetJSDOM.xlsx` that can be opened. + +
+ +### CheerioJS + +:::caution + +Cheerio does not support a number of fundamental properties out of the box. They +can be shimmed, but it is strongly recommended to use a more compliant library. + +::: + +CheerioJS provides a DOM-like framework for NodeJS. Given an HTML string, a +reference to the table element works with the SheetJS DOM methods with some +prototype fixes. [`SheetJSCheerio.js`](pathname:///dom/SheetJSCheerio.js) is a +complete script. + +
Complete Demo (click to show) + +:::note + +This demo was last tested on 2023 May 18 against Cheerio `1.0.0-rc.12` + +::: + +1) Install SheetJS and CheerioJS libraries: + +{`\ +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz cheerio@1.0.0-rc.12`} + + +2) Download [the sample script `SheetJSCheerio.js`](pathname:///dom/SheetJSCheerio.js): + +```bash +curl -LO https://docs.sheetjs.com/dom/SheetJSCheerio.js +``` + +3) Download [the sample `SheetJSTable.html`](pathname:///dom/SheetJSTable.html): + +```bash +curl -LO https://docs.sheetjs.com/dom/SheetJSTable.html +``` + +4) Run the script: + +```bash +node SheetJSCheerio.js +``` + +The script will create a file `SheetJSCheerio.xlsx` that can be opened. + +
+ +## Other Platforms + +### DenoDOM + +DenoDOM provides a DOM framework for Deno. Given an HTML string, a reference to +the table element works with the SheetJS DOM methods after patching the object. + +This example fetches [a sample table](pathname:///dom/SheetJSTable.html): + +```ts title="SheetJSDenoDOM.ts" +// @deno-types="https://cdn.sheetjs.com/xlsx-0.19.3/package/types/index.d.ts" +import * as XLSX from 'https://cdn.sheetjs.com/xlsx-0.19.3/package/xlsx.mjs'; + +import { DOMParser } from 'https://deno.land/x/deno_dom@v0.1.38/deno-dom-wasm.ts'; + +const doc = new DOMParser().parseFromString( + await (await fetch('https://docs.sheetjs.com/dom/SheetJSTable.html')).text(), + "text/html", +)!; +// highlight-start +const tbl = doc.querySelector("table"); + +/* patch DenoDOM element */ +tbl.rows = tbl.querySelectorAll("tr"); +tbl.rows.forEach(row => row.cells = row.querySelectorAll("td, th")) + +/* generate workbook */ +const workbook = XLSX.utils.table_to_book(tbl); +// highlight-end +XLSX.writeFile(workbook, "SheetJSDenoDOM.xlsx"); +``` + +
Complete Demo (click to hide) + +:::note + +This demo was last tested on 2023 May 18 against DenoDOM `0.1.38` + +::: + +1) Save the previous codeblock to `SheetJSDenoDOM.ts`. + +2) Run the script with `--allow-net` and `--allow-write` entitlements: + +```bash +deno run --allow-net --allow-write SheetJSDenoDOM.ts +``` + +The script will create a file `SheetJSDenoDOM.xlsx` that can be opened. + +
\ No newline at end of file diff --git a/docz/docs/06-solutions/01-input.md b/docz/docs/06-solutions/01-input.md index e170852..9c9d235 100644 --- a/docz/docs/06-solutions/01-input.md +++ b/docz/docs/06-solutions/01-input.md @@ -702,8 +702,8 @@ var worksheet = XLSX.utils.aoa_to_sheet([ ]); ``` -["Array of Arrays Input"](/docs/api/utilities#array-of-arrays-input) describes the function and the -optional `opts` argument in more detail. +["Array of Arrays Input"](/docs/api/utilities#array-of-arrays-input) describes +the function and the optional `opts` argument in more detail. _Create a worksheet from an array of JS objects_ @@ -752,7 +752,8 @@ var worksheet = XLSX.utils.table_to_sheet(dom_element, opts); The `table_to_sheet` utility function takes a DOM TABLE element and iterates through the rows to generate a worksheet. The `opts` argument is optional. -["HTML Table Input"](/docs/api/utilities#html-table-input) describes the function in more detail. +["HTML Table Input"](/docs/api/utilities/html#html-table-input) describes the +function in more detail. @@ -860,19 +861,7 @@ chrome.runtime.onMessage.addListener(function(msg, sender, cb) { NodeJS HTML Tables without a browser (click to show) NodeJS does not include a DOM implementation and Puppeteer requires a hefty -Chromium build. **`jsdom`** is a lightweight alternative: - -```js -const XLSX = require("xlsx"); -const { readFileSync } = require("fs"); -const { JSDOM } = require("jsdom"); - -/* obtain HTML string. This example reads from test.html */ -const html_str = fs.readFileSync("test.html", "utf8"); -/* get first TABLE element */ -const doc = new JSDOM(html_str).window.document.querySelector("table"); -/* generate workbook */ -const workbook = XLSX.utils.table_to_book(doc); -``` +Chromium build. The ["Synthetic DOM"](/docs/demos/net/dom) demo includes +examples for NodeJS. diff --git a/docz/docs/07-csf/07-features/06-nf.md b/docz/docs/07-csf/07-features/06-nf.md index 162b870..b76fbc3 100644 --- a/docz/docs/07-csf/07-features/06-nf.md +++ b/docz/docs/07-csf/07-features/06-nf.md @@ -23,41 +23,60 @@ formats, the library will guess the number format. | WK\* | | Binary encoding | | WQ\* / WB\* / QPW | | Binary encoding | | DBF | | Implied by field types | +| HTML | * | Special override | | CSV | * | N/A | | PRN | * | N/A | | DIF | * | N/A | | RTF | * | N/A | -Asterisks (*) mark formats that mix content and presentation. Synthetic number -formats may be generated for special values. +Asterisks (*) mark formats that mix content and presentation. Writers will use +formatted values if cell objects include formatted text or number formats. +Parsers may guess number formats for special values. The letter R (R) marks features parsed but not written in the format. -The following example generates a file with some common number formats: +This example generates a worksheet with common number formats. `sheet_to_html` +uses the number formats in generating the HTML table. The "Export" button +generates workbooks with number formatting. ```jsx live function SheetJSSimpleNF(props) { - const xport = React.useCallback(async () => { + const [ws, setWS] = React.useState(); + const fmt = React.useRef(null); + + /* when the page is loaded, create worksheet and show table */ + React.useEffect(() => { /* Create worksheet from simple data */ const ws = XLSX.utils.aoa_to_sheet([ - ["Currency", 3.5], - ["Thousands", 7262], - ["Percent", 0.0219] + ["General", 54337 ], + ["Currency", 3.5 ], + ["Thousands", 7262 ], + ["Percent", 0.0219 ], ]); - /* assign number formats */ - ws["B1"].z = '"$"#,##0_);\\("$"#,##0\\)'; - ws["B2"].z = '#,##0'; - ws["B3"].z = "0.00%"; + /* assign number formats */ + ws["B2"].z = '"$"#,##0.00_);\\("$"#,##0.00\\)'; + ws["B3"].z = '#,##0'; + ws["B4"].z = "0.00%"; + + setWS(ws); + }, []); + + const xport = (fmt) => { /* Export to file (start a download) */ const wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Formats"); - XLSX.writeFile(wb, "SheetJSSimpleNF.xlsx"); - }); + XLSX.writeFile(wb, `SheetJSSimpleNF.${fmt}`); + }; - return ( ); + const fmts = ["xlsx", "xls", "csv", "xlsb", "html", "ods"]; + return ( <> + + +
+ ); } ``` @@ -70,9 +89,8 @@ To simplify editing, the applications will store the underlying values and the number formats separately. For example, `$3.50` will be represented as the value `3.5` with a number format that mandates a `$` sigil and 2 decimal places. -Some file formats like CSV only support the formatted text. Native formats for -spreadsheet applications including Lotus 1-2-3 and Excel will store the value -and number format separately. +CSV and other formats only support the formatted text. Applications reading CSV +files are expected to interpret the values as numbers or dates. ### Dates and Times @@ -115,18 +133,17 @@ function SheetJSExtractNF(props) { return ( <> { /* parse workbook with cellNF: true */ - const file = e.target.files[0]; - const data = await file.arrayBuffer(); - const wb = XLSX.read(data, {cellNF: true}); + const wb = XLSX.read(await e.target.files[0].arrayBuffer(), {cellNF: true}); + /* look at each cell in each worksheet */ const formats = {}; wb.SheetNames.forEach(n => { var ws = wb.Sheets[n]; if(!ws || !ws["!ref"]) return; var ref = XLSX.utils.decode_range(ws["!ref"]); for(var R = 0; R <= ref.e.r; ++R) for(var C = 0; C <= ref.e.c; ++C) { var addr = XLSX.utils.encode_cell({r:R,c:C}); - if(!ws[addr] || !ws[addr].z) continue; - if(formats[ws[addr].z]) continue; + if(!ws[addr] || !ws[addr].z || formats[ws[addr].z]) continue; + /* when a new format is found, save the address */ formats[ws[addr].z] = `'${n}'!${addr}`; setRows(Object.entries(formats)); } @@ -216,3 +233,6 @@ set of formats as "Accounting". The exact formats in `en-US` are listed below: For other locales, the formats can be discovered by creating a file with the desired format and testing with [the Number Format Strings demo](#number-format-strings) +### HTML Override + +[**This feature is discussed in the HTML utilities section**](/docs/api/utilities/html#value-override) \ No newline at end of file diff --git a/docz/docs/08-api/07-utilities/07-html.md b/docz/docs/08-api/07-utilities/07-html.md new file mode 100644 index 0000000..4f0b08e --- /dev/null +++ b/docz/docs/08-api/07-utilities/07-html.md @@ -0,0 +1,388 @@ +--- +sidebar_position: 7 +title: HTML +--- + +HTML is a common format for presenting data in the web. While the general read +functions (`XLSX.read` and `XLSX.readFile`) can parse HTML strings and the write +functions (`XLSX.write` and `XLSX.writeFile`) can generate HTML strings, the +utility functions in this section can use DOM features. + +:::note + +SheetJS CE primarily focuses on data and number formatting. + +[SheetJS Pro](https://sheetjs.com/pro) supports CSS text and cell styles in the +HTML format and HTML table utilities. + +::: + +## HTML Table Input + +### Create New Sheet + +**Create a worksheet or workbook from a TABLE element** + +```js +var ws = XLSX.utils.table_to_sheet(elt, opts); +var wb = XLSX.utils.table_to_book(elt, opts); +``` + +`XLSX.utils.table_to_sheet` takes a table DOM element and returns a worksheet +resembling the input table. Numbers are parsed. All other data will be stored +as strings. + +`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet. + +Both functions accept options arguments: + +| Option Name | Default | Description | +| :---------- | :------: | :-------------------------------------------------- | +|`raw` | | If true, every cell will hold raw strings | +|`dateNF` | FMT 14 | Use specified date format in string output | +|`cellDates` | false | Store dates as type `d` (default is `n`) | +|`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table | +|`display` | false | If true, hidden rows and cells will not be parsed | + +Exporting a table to a spreadsheet file in the web browser involves 3 steps: +"find the table", "generate a workbook object", and "export to file". + +For example, if the HTML table has `id` attribute set to `sheetjs`: + +```html + + + + + +
NameIndex
Barack Obama44
Donald Trump45
Joseph Biden46
+``` + +`document.getElementById("sheetjs")` is a live reference to the table. + +```js +/* find the table element in the page */ +var tbl = document.getElementById('sheetjs'); +/* create a workbook */ +var wb = XLSX.utils.table_to_book(tbl); +/* export to file */ +XLSX.writeFile(wb, "SheetJSTable.xlsx"); +``` + +
Demo (click to hide) + +This HTML table has id set to `sheetjs`: + + + + + + +
NameIndex
Barack Obama44
Donald Trump45
Joseph Biden46
+ +```jsx live +function SheetJSExportTable() { return ( ); } +``` +
+ +### Add to Sheet + +**Add data from a TABLE element to an existing worksheet** + +```js +XLSX.utils.sheet_add_dom(ws, elt, opts); +``` + +`XLSX.utils.sheet_add_dom` takes a table DOM element and updates an existing +worksheet object. It follows the same process as `table_to_sheet` and accepts +an options argument: + +| Option Name | Default | Description | +| :---------- | :------: | :-------------------------------------------------- | +|`raw` | | If true, every cell will hold raw strings | +|`dateNF` | FMT 14 | Use specified date format in string output | +|`cellDates` | false | Store dates as type `d` (default is `n`) | +|`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table | +|`display` | false | If true, hidden rows and cells will not be parsed | + +`origin` is expected to be one of: + +| `origin` | Description | +| :--------------- | :-------------------------------------------------------- | +| (cell object) | Use specified cell (cell object) | +| (string) | Use specified cell (A1-Style cell) | +| (number >= 0) | Start from the first column at specified row (0-indexed) | +| -1 | Append to bottom of worksheet starting on first column | +| (default) | Start from cell `A1` | + + +A common use case for `sheet_add_dom` involves adding multiple tables to a +single worksheet, usually with a few blank rows in between each table: + +```js +/* get "table1" and create worksheet */ +const table1 = document.getElementById('table1'); +const ws = XLSX.utils.table_to_sheet(table1); + +/* get "table2" and append to the worksheet */ +const table2 = document.getElementById('table2'); +// highlight-next-line +XLSX.utils.sheet_add_dom(ws, table2, {origin: -1}); +``` + +
Multi-table Export Example (click to show) + +This demo creates a worksheet that should look like the screenshot below: + +![Multi-Table Export in Excel](pathname:///files/multitable.png) + +The `create_gap_rows` helper function expands the worksheet range, adding blank +rows between the data tables. + +```jsx live +function MultiTable() { + const headers = ["Table 1", "Table2", "Table 3"]; + + /* Callback invoked when the button is clicked */ + const xport = React.useCallback(async () => { + /* This function creates gap rows */ + function create_gap_rows(ws, nrows) { + var ref = XLSX.utils.decode_range(ws["!ref"]); // get original range + ref.e.r += nrows; // add to ending row + ws["!ref"] = XLSX.utils.encode_range(ref); // reassign row + } + + /* first table */ + const ws = XLSX.utils.aoa_to_sheet([[headers[0]]]); + XLSX.utils.sheet_add_dom(ws, document.getElementById('table1'), {origin: -1}); + create_gap_rows(ws, 1); // one row gap after first table + + /* second table */ + XLSX.utils.sheet_add_aoa(ws, [[headers[1]]], {origin: -1}); + XLSX.utils.sheet_add_dom(ws, document.getElementById('table2'), {origin: -1}); + create_gap_rows(ws, 2); // two rows gap after second table + + /* third table */ + XLSX.utils.sheet_add_aoa(ws, [[headers[2]]], {origin: -1}); + XLSX.utils.sheet_add_dom(ws, document.getElementById('table3'), {origin: -1}); + + /* create workbook and export */ + const wb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(wb, ws, "Export"); + XLSX.writeFile(wb, "SheetJSMultiTablexport.xlsx"); + }); + + return ( <> +

+ {headers[0]}
+ + + +
A2B2
A3B3
+ {headers[1]}
+ + + +
A6B6C6
A7B7C7
+
+ {headers[2]}
+ + + +
A11B11
A12B12
+ ); +} +``` + +
+ +### HTML Strings + +**Create a worksheet or workbook from HTML string** + +`table_to_book` / `table_to_sheet` / `sheet_add_dom` act on HTML DOM elements. +Starting from an HTML string, there are two parsing approaches: + +A) Table Phantasm: create a DIV whose `innerHTML` is set to the HTML string, +generate worksheet using the DOM element, then remove the DIV: + +```js +/* create element from the source */ +var elt = document.createElement("div"); +elt.innerHTML = html_source; +document.body.appendChild(elt); + +/* generate worksheet */ +var ws = XLSX.utils.table_to_sheet(elt.getElementsByTagName("TABLE")[0]); + +/* remove element */ +document.body.removeChild(elt); +``` + +
Phantasm Demo (click to show) + +The `html` variable in the demo is an editable HTML string + +```jsx live +function SheetJSTablePhantasm() { + /* HTML stored as a string */ + const html = `\ + + + + + +
NameIndex
Barack Obama44
Donald Trump45
Joseph Biden46
+`; + return ( <> + +
HTML:
{html}
+ ); +} +``` + +
+ +B) Raw HTML: use `XLSX.read` to read the text in the same manner as CSV. + +```js +var wb = XLSX.read(html_source, { type: "string" }); +var ws = wb.Sheets[wb.SheetNames[0]]; +``` + +
Raw HTML Demo (click to show) + +The `html` variable in the demo is an editable HTML string + +```jsx live +function SheetJSRawHTMLToXLSX() { + /* HTML stored as a string */ + const html = `\ + + + + + +
NameIndex
Barack Obama44
Donald Trump45
Joseph Biden46
+`; + return ( <> + +
HTML:
{html}
+ ); +} +``` + +
+ +### Value Override + +When the `raw: true` option is specified, the parser will generate text cells. +When the option is not specified or when it is set to false, the parser will +try to interpret the text of each TD element. + +To override the conversion for a specific cell, the following data attributes +can be added to the individual TD elements: + +| Attribute | Description | +|:----------|:-------------------------------------------------------------| +| `data-t` | Override [Cell Type](/docs/csf/cell#data-types) | +| `data-v` | Override Cell Value | +| `data-z` | Override [Number Format](/docs/csf/features/nf) | + +For example: + +```html + +2012-12-03 + + +2012-12-03 + + +2012-12-03 + + +2012-12-03 +``` + +
HTML Value Examples (click to hide) + +```jsx live +function SheetJSHTMLValueOverride() { + /* HTML stored as a string */ + const html = `\ + + + + + + +
Celldata-tdata-vdata-z
2012-12-03
2012-12-03s
2012-12-03n41246
2012-12-03n41246yyyy-mm-dd
+`; + return ( <> + +
HTML String:
{html}
TABLE:
+
+ ); +} +``` + +
+ +### Synthetic DOM + +`table_to_book` / `table_to_sheet` / `sheet_add_dom` act on HTML DOM elements. +Traditionally there is no DOM in server-side environments including NodeJS. + +:::note + +The simplest approach for server-side processing is to automate a headless web +browser. ["Browser Automation"](/docs/demos/net/headless) covers some browsers. + +::: + +Some ecosystems provide DOM-like frameworks that are compatible with SheetJS. +Examples are included in the ["Synthetic DOM"](/docs/demos/net/dom) demo diff --git a/docz/docs/08-api/07-utilities/index.md b/docz/docs/08-api/07-utilities/index.md index a19fff0..5d4008d 100644 --- a/docz/docs/08-api/07-utilities/index.md +++ b/docz/docs/08-api/07-utilities/index.md @@ -276,187 +276,11 @@ function SheetJSHeaderOrder() { ## HTML Table Input -**Create a worksheet or workbook from a TABLE element** - -```js -var ws = XLSX.utils.table_to_sheet(elt, opts); -var wb = XLSX.utils.table_to_book(elt, opts); -``` - -`XLSX.utils.table_to_sheet` takes a table DOM element and returns a worksheet -resembling the input table. Numbers are parsed. All other data will be stored -as strings. - -`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet. - -Both functions accept options arguments: - -| Option Name | Default | Description | -| :---------- | :------: | :-------------------------------------------------- | -|`raw` | | If true, every cell will hold raw strings | -|`dateNF` | FMT 14 | Use specified date format in string output | -|`cellDates` | false | Store dates as type `d` (default is `n`) | -|`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table | -|`display` | false | If true, hidden rows and cells will not be parsed | - - -To generate the example sheet, assuming the table has ID `sheetjs`: - -```js -var tbl = document.getElementById('sheetjs'); -var ws = XLSX.utils.table_to_sheet(tbl); -``` - -:::note - -`table_to_book` and `table_to_sheet` act on HTML DOM elements. Starting from -an HTML string, there are two parsing approaches: - -A) Table Phantasm: create a DIV with the desired HTML. - -```js -/* create element from the source */ -var elt = document.createElement("div"); -elt.innerHTML = html_source; -document.body.appendChild(elt); - -/* generate worksheet */ -var ws = XLSX.utils.table_to_sheet(elt.getElementsByTagName("TABLE")[0]); - -/* remove element */ -document.body.removeChild(elt); -``` - -B) Raw HTML: use `XLSX.read` to read the text in the same manner as CSV. - -```js -var wb = XLSX.read(html_source, { type: "string" }); -var ws = wb.Sheets[wb.SheetNames[0]]; -``` - -::: - -**Add data from a TABLE element to an existing worksheet** - -```js -XLSX.utils.sheet_add_dom(ws, elt, opts); -``` - -`XLSX.utils.sheet_add_dom` takes a table DOM element and updates an existing -worksheet object. It follows the same process as `table_to_sheet` and accepts -an options argument: - -| Option Name | Default | Description | -| :---------- | :------: | :-------------------------------------------------- | -|`raw` | | If true, every cell will hold raw strings | -|`dateNF` | FMT 14 | Use specified date format in string output | -|`cellDates` | false | Store dates as type `d` (default is `n`) | -|`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table | -|`display` | false | If true, hidden rows and cells will not be parsed | - -`origin` is expected to be one of: - -| `origin` | Description | -| :--------------- | :-------------------------------------------------------- | -| (cell object) | Use specified cell (cell object) | -| (string) | Use specified cell (A1-Style cell) | -| (number >= 0) | Start from the first column at specified row (0-indexed) | -| -1 | Append to bottom of worksheet starting on first column | -| (default) | Start from cell `A1` | - - -A common use case for `sheet_add_dom` involves adding multiple tables to a -single worksheet, usually with a few blank rows in between each table: - -![Multi-Table Export in Excel](pathname:///files/multitable.png) - -```jsx live -function MultiTable() { - const headers = ["Table 1", "Table2", "Table 3"]; - - /* Callback invoked when the button is clicked */ - const xport = React.useCallback(async () => { - /* This function creates gap rows */ - function create_gap_rows(ws, nrows) { - var ref = XLSX.utils.decode_range(ws["!ref"]); // get original range - ref.e.r += nrows; // add to ending row - ws["!ref"] = XLSX.utils.encode_range(ref); // reassign row - } - - /* first table */ - const ws = XLSX.utils.aoa_to_sheet([[headers[0]]]); - XLSX.utils.sheet_add_dom(ws, document.getElementById('table1'), {origin: -1}); - create_gap_rows(ws, 1); // one row gap after first table - - /* second table */ - XLSX.utils.sheet_add_aoa(ws, [[headers[1]]], {origin: -1}); - XLSX.utils.sheet_add_dom(ws, document.getElementById('table2'), {origin: -1}); - create_gap_rows(ws, 2); // two rows gap after second table - - /* third table */ - XLSX.utils.sheet_add_aoa(ws, [[headers[2]]], {origin: -1}); - XLSX.utils.sheet_add_dom(ws, document.getElementById('table3'), {origin: -1}); - - /* create workbook and export */ - const wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(wb, ws, "Export"); - XLSX.writeFile(wb, "SheetJSMultiTablexport.xlsx"); - }); - - return ( - <> -

- {headers[0]}
- - - -
A2B2
A3B3
- {headers[1]}
- - - -
A6B6C6
A7B7C7
-
- {headers[2]}
- - - -
A11B11
A12B12
- - ); -} -``` +[**This has been moved to a separate page**](/docs/api/utilities/html#html-table-input) ### Value Override -When the `raw: true` option is specified, the parser will generate text cells. -When the option is not specified or when it is set to false, the parser will -try to interpret the text of each TD element. - -To override the conversion for a specific cell, the following data attributes -can be added to the individual TD elements: - -| Attribute | Description | -|:----------|:-------------------------------------------------------------| -| `data-t` | Override [Cell Type](/docs/csf/cell#data-types) | -| `data-v` | Override Cell Value | -| `data-z` | Override [Number Format](/docs/csf/features/nf) | - -For example: - -```html - -2012-12-03 - - -2012-12-03 - - -2012-12-03 - - -2012-12-03 -``` +[**This has been moved to a separate page**](/docs/api/utilities/html#value-override) ## Delimiter-Separated Output diff --git a/docz/docs/08-api/index.md b/docz/docs/08-api/index.md index 4b110cd..b8c8ca6 100644 --- a/docz/docs/08-api/index.md +++ b/docz/docs/08-api/index.md @@ -51,6 +51,14 @@ The following are described in [`A1` Utilities](/docs/csf/general#utilities) - `encode_cell / decode_cell` converts cell addresses. - `encode_range / decode_range` converts cell ranges. +The following are described in ["HTML" section of "Utility Functions"](/docs/api/utilities/html): + +**Reading from HTML:** + +- `table_to_sheet` converts a DOM TABLE element to a worksheet. +- `table_to_book` converts a DOM TABLE element to a worksheet. +- `sheet_add_dom` adds data from a DOM TABLE element to an existing worksheet. + The following are described in the [Utility Functions](/docs/api/utilities): **Constructing:** @@ -62,10 +70,8 @@ The following are described in the [Utility Functions](/docs/api/utilities): - `aoa_to_sheet` converts an array of arrays of JS data to a worksheet. - `json_to_sheet` converts an array of JS objects to a worksheet. -- `table_to_sheet` converts a DOM TABLE element to a worksheet. - `sheet_add_aoa` adds an array of arrays of JS data to an existing worksheet. - `sheet_add_json` adds an array of JS objects to an existing worksheet. -- `sheet_add_dom` adds data from a DOM TABLE element to an existing worksheet. **Exporting:** diff --git a/docz/static/dom/SheetJSCheerio.js b/docz/static/dom/SheetJSCheerio.js new file mode 100644 index 0000000..5bac5f0 --- /dev/null +++ b/docz/static/dom/SheetJSCheerio.js @@ -0,0 +1,23 @@ +const XLSX = require("xlsx"); +const { readFileSync } = require("fs"); +const cheerio = require("cheerio"); + +/* obtain HTML string. This example reads from test.html */ +const html_str = readFileSync("SheetJSTable.html", "utf8"); +/* get first TABLE element */ +const $ = cheerio.load(html_str); +const doc = $("TABLE").first()[0]; + +/* FIX THE CHEERIO LIBRARY */ +Object.defineProperty(doc.__proto__, "tagName", { get: function() { return Object.entries(this).find(r => r[0] == "tagName" || r[0] == "name")[1].toUpperCase(); }}); +Object.defineProperty(doc.__proto__, "rows", { get: function() { return $(this).children("tbody").children("tr"); }}); +Object.defineProperty(doc.__proto__, "cells", { get: function() { return $(this).children("td, th"); }}); +Object.defineProperty(doc.__proto__, "ownerDocument", { get: function() { return {}; }}); +doc.__proto__.hasAttribute = function(name) { return Object.hasOwnProperty.call(this.attribs, name); } +doc.__proto__.getAttribute = function(name) { return this.attribs[name]; } +Object.defineProperty(doc.__proto__, "innerHTML", { get: function() { return $(this).prop('innerHTML'); }}); +doc.__proto__.getElementsByTagName = function(name) { return ($(this).children(name))} + +/* generate workbook */ +const workbook = XLSX.utils.table_to_book(doc); +XLSX.writeFile(workbook, "SheetJSCheerio.xlsx"); diff --git a/docz/static/dom/SheetJSTable.html b/docz/static/dom/SheetJSTable.html new file mode 100644 index 0000000..ad01ee1 --- /dev/null +++ b/docz/static/dom/SheetJSTable.html @@ -0,0 +1,46 @@ + + + + + + + + + SheetJS Table Example + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
ThisisaTest
வணக்கம்สวัสดี你好가지마
1234
Clicktoeditcells
Generated by SheetJS
+ + + \ No newline at end of file