From 65f1c7e58b33fbfa0d7c84b55e7d903988685266 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Wed, 29 Mar 2017 15:14:15 -0400 Subject: [PATCH] HTML DOM Element read - DOM element parsing (fixes #576 h/t @axolo) - removed InterfaceHdr check (fixes #209 h/t @Batistleman) - gitbook docs --- .flowconfig | 1 + .gitignore | 1 + CHANGELOG.md | 5 +++- Makefile | 5 ++++ README.md | 55 ++++++++++++++++++++++++++++++++++++++++++- bits/39_xlsbiff.js | 2 +- bits/79_html.js | 32 +++++++++++++++++++++++++ bits/90_utils.js | 2 ++ book.json | 27 +++++++++++++++++++++ docbits/20_import.md | 7 ++++++ docbits/82_util.md | 27 +++++++++++++++++++++ misc/docs/README.md | 1 + misc/docs/SUMMARY.md | 55 +++++++++++++++++++++++++++++++++++++++++++ misc/docs/formats.png | 1 + misc/docs/style.css | 19 +++++++++++++++ xlsx.flow.js | 36 +++++++++++++++++++++++++++- xlsx.js | 36 +++++++++++++++++++++++++++- 17 files changed, 307 insertions(+), 5 deletions(-) create mode 100644 book.json create mode 120000 misc/docs/README.md create mode 100644 misc/docs/SUMMARY.md create mode 120000 misc/docs/formats.png create mode 100644 misc/docs/style.css diff --git a/.flowconfig b/.flowconfig index 8462e3d..5f936d4 100644 --- a/.flowconfig +++ b/.flowconfig @@ -8,6 +8,7 @@ .*/ctest/.* .*/misc/.* .*/perf/.* +.*/_book/.* .*/demo/browser.js .*/shim.js diff --git a/.gitignore b/.gitignore index ca3f28a..e4ab83c 100644 --- a/.gitignore +++ b/.gitignore @@ -1,5 +1,6 @@ node_modules *.tgz +_book/ misc/coverage.html misc/prof.js v8.log diff --git a/CHANGELOG.md b/CHANGELOG.md index e7d7cbe..e91f3dd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,9 @@ changes may not be included if they are not expected to break existing code. ## Unreleased + +## 0.9.7 (2017-03-28) + * XLS legacy `!range` field removed * Hyperlink tooltip is stored in the `Tooltip` field @@ -15,7 +18,7 @@ changes may not be included if they are not expected to break existing code. * `sheet_to_json` now passes `null` values when `raw` is set to `true` * `sheet_to_json` treats `null` stub cells as values in conjunction with `raw` -## 0.9.5 (2017-03-22) +## 0.9.5 (2017-03-22) * `cellDates` affects parsing in non-XLSX formats diff --git a/Makefile b/Makefile index d46769e..097624a 100644 --- a/Makefile +++ b/Makefile @@ -173,6 +173,11 @@ README.md: $(READEPS) readme: README.md ## Update README Table of Contents markdown-toc -i README.md +.PHONY: book +book: README.md ## Update summary for documentation + printf "# Summary\n\n- [xlsx](README.md#xlsx)\n" > misc/docs/SUMMARY.md + markdown-toc README.md | sed 's/(#/(README.md#/g'>> misc/docs/SUMMARY.md + .PHONY: help help: @grep -hE '(^[a-zA-Z_-][ a-zA-Z_-]*:.*?|^#[#*])' $(MAKEFILE_LIST) | bash misc/help.sh diff --git a/README.md b/README.md index 9798654..8800732 100644 --- a/README.md +++ b/README.md @@ -43,6 +43,7 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6. * [Document Features](#document-features) + [Formulae](#formulae) + [Column Properties](#column-properties) + + [Hyperlinks](#hyperlinks) - [Parsing Options](#parsing-options) * [Input Type](#input-type) * [Guessing File Type](#guessing-file-type) @@ -51,6 +52,7 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6. * [Output Type](#output-type) - [Utility Functions](#utility-functions) * [Array of Arrays Input](#array-of-arrays-input) + * [HTML Table Input](#html-table-input) * [Formulae Output](#formulae-output) * [CSV and general DSV Output](#csv-and-general-dsv-output) * [JSON](#json) @@ -161,6 +163,13 @@ var workbook = XLSX.readFile('test.xlsx'); /* DO SOMETHING WITH workbook HERE */ ``` +- Browser DOM Table element: + +```js +var worksheet = XLSX.utils.table_to_book(document.getElementById('tableau')); +/* DO SOMETHING WITH workbook HERE */ +``` + - ajax (for a more complete example that works in older browsers, check the demo at ): @@ -449,7 +458,7 @@ for(var R = range.s.r; R <= range.e.r; ++R) { | `h` | HTML rendering of the rich text (if applicable) | | `c` | comments associated with the cell | | `z` | number format string associated with the cell (if requested) | -| `l` | cell hyperlink object (.Target holds link, .tooltip is tooltip) | +| `l` | cell hyperlink object (.Target holds link, .Tooltip is tooltip) | | `s` | the style/theme of the cell (if applicable) | Built-in export utilities (such as the CSV exporter) will use the `w` text if it @@ -688,6 +697,23 @@ follow the priority order: 2) use `wpx` pixel width if available 3) use `wch` character count if available +#### Hyperlinks + +Hyperlinks are stored in the `l` key of cell objects. The `Target` field of the +hyperlink object is the target of the link, including the URI fragment. Tooltips +are stored in the `Tooltip` field and are displayed when you move your mouse +over the text. + +For example, the following snippet creates a link from cell `A3` to + with the tip `"Find us @ SheetJS.com!"`: + +```js +ws['A3'].l = { Target:"http://sheetjs.com", Tooltip:"Find us @ SheetJS.com!" }; +``` + +Note that Excel does not automatically style hyperlinks -- they will generally +be displayed as normal text. + ## Parsing Options The exported `read` and `readFile` functions accept an options argument: @@ -852,6 +878,33 @@ var ws = XLSX.utils.aoa_to_sheet([ ]); ``` +### HTML Table Input + +`XLSX.utils.table_to_sheet` takes a table DOM element and returns a worksheet +resembling the input table. Numbers are parsed. All other data will be stored +as strings. + +`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet. + +To generate the example sheet, start with the HTML table: + +```html +
+ + + +
SheetJS
1234567
2345678
+``` + +To process the table: + +```js +var tbl = document.getElementById('sheetjs'); +var wb = XLSX.utils.table_to_book(tbl); +``` + +Note: `XLSX.read` can handle HTML represented as strings. + ### Formulae Output `XLSX.utils.sheet_to_formulae` generates an array of commands that represent diff --git a/bits/39_xlsbiff.js b/bits/39_xlsbiff.js index 2845200..fa81378 100644 --- a/bits/39_xlsbiff.js +++ b/bits/39_xlsbiff.js @@ -158,7 +158,7 @@ function parse_BOF(blob, length) { function parse_InterfaceHdr(blob, length) { if(length === 0) return 0x04b0; var q; - if((q=blob.read_shift(2))!==0x04b0) throw new Error("InterfaceHdr codePage " + q); + if((q=blob.read_shift(2))!==0x04b0){} return 0x04b0; } diff --git a/bits/79_html.js b/bits/79_html.js index 2fb79fd..97a43c7 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -33,3 +33,35 @@ function parse_html(str/*:string*/, opts)/*:Workbook*/ { ws['!ref'] = encode_range(range); return o; } + +function parse_dom_table(table/*:HTMLElement*/, opts/*:?any*/)/*:Worksheet*/ { + var ws/*:Worksheet*/ = ({}/*:any*/); + var rows = table.getElementsByTagName('tr'); + var range = {s:{r:0,c:0},e:{r:rows.length - 1,c:0}}; + var merges = [], midx = 0; + var R = 0, _C = 0, C = 0, RS = 0, CS = 0; + for(; R < rows.length; ++R) { + var row = rows[R]; + var elts = row.children; + for(_C = C = 0; _C < elts.length; ++_C) { + var elt = elts[_C], v = elts[_C].innerText; + for(midx = 0; midx < merges.length; ++midx) { + var m = merges[midx]; + if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + } + /* TODO: figure out how to extract nonstandard mso- style */ + CS = +elt.getAttribute("colspan") || 1; + if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}}); + var o = {t:'s', v:v}; + if(!isNaN(Number(v))) o = {t:'n', v:Number(v)}; + ws[encode_cell({c:C, r:R})] = o; + C += CS; + } + } + ws['!merges'] = merges; + return ws; +} + +function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ { + return sheet_to_workbook(parse_dom_table(table, opts), opts); +} diff --git a/bits/90_utils.js b/bits/90_utils.js index 0e88e9d..8f0f6ee 100644 --- a/bits/90_utils.js +++ b/bits/90_utils.js @@ -240,6 +240,8 @@ var utils = { make_json: sheet_to_json, make_formulae: sheet_to_formulae, aoa_to_sheet: aoa_to_sheet, + table_to_sheet: parse_dom_table, + table_to_book: table_to_book, sheet_to_csv: sheet_to_csv, sheet_to_json: sheet_to_json, sheet_to_formulae: sheet_to_formulae, diff --git a/book.json b/book.json new file mode 100644 index 0000000..f1e3a11 --- /dev/null +++ b/book.json @@ -0,0 +1,27 @@ +{ + "root": "./misc/docs", + "title": "SheetJS js-xlsx", + "author": "sheetjs", + "gitbook": "3.2.2", + "plugins": ["anchorjs", "ga", "sidebar-ad", "-sharing", "advanced-emoji"], + "pluginsConfig": { + "anchorjs": { + "icon": "#", + "placement": "left", + "visible": "always" + }, + "ga": { + "token": "UA-36810333-1" + }, + "sidebar-ad": { + "imageUrl": "http://oss.sheetjs.com/assets/img/logo.png", + "url": "http://sheetjs.com" + }, + "theme-default": { + "showLevel": false, + "styles": { + "website": "style.css" + } + } + } +} diff --git a/docbits/20_import.md b/docbits/20_import.md index 05d8b7e..26ac49f 100644 --- a/docbits/20_import.md +++ b/docbits/20_import.md @@ -11,6 +11,13 @@ var workbook = XLSX.readFile('test.xlsx'); /* DO SOMETHING WITH workbook HERE */ ``` +- Browser DOM Table element: + +```js +var worksheet = XLSX.utils.table_to_book(document.getElementById('tableau')); +/* DO SOMETHING WITH workbook HERE */ +``` + - ajax (for a more complete example that works in older browsers, check the demo at ): diff --git a/docbits/82_util.md b/docbits/82_util.md index 2dd456c..38199ce 100644 --- a/docbits/82_util.md +++ b/docbits/82_util.md @@ -38,6 +38,33 @@ var ws = XLSX.utils.aoa_to_sheet([ ]); ``` +### HTML Table Input + +`XLSX.utils.table_to_sheet` takes a table DOM element and returns a worksheet +resembling the input table. Numbers are parsed. All other data will be stored +as strings. + +`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet. + +To generate the example sheet, start with the HTML table: + +```html + + + + +
SheetJS
1234567
2345678
+``` + +To process the table: + +```js +var tbl = document.getElementById('sheetjs'); +var wb = XLSX.utils.table_to_book(tbl); +``` + +Note: `XLSX.read` can handle HTML represented as strings. + ### Formulae Output `XLSX.utils.sheet_to_formulae` generates an array of commands that represent diff --git a/misc/docs/README.md b/misc/docs/README.md new file mode 120000 index 0000000..fe84005 --- /dev/null +++ b/misc/docs/README.md @@ -0,0 +1 @@ +../../README.md \ No newline at end of file diff --git a/misc/docs/SUMMARY.md b/misc/docs/SUMMARY.md new file mode 100644 index 0000000..c1df885 --- /dev/null +++ b/misc/docs/SUMMARY.md @@ -0,0 +1,55 @@ +# Summary + +- [xlsx](README.md#xlsx) +- [Installation](README.md#installation) + * [JS Ecosystem Demos](README.md#js-ecosystem-demos) + * [Optional Modules](README.md#optional-modules) + * [ECMAScript 5 Compatibility](README.md#ecmascript-5-compatibility) +- [Parsing Workbooks](README.md#parsing-workbooks) +- [Working with the Workbook](README.md#working-with-the-workbook) +- [Writing Workbooks](README.md#writing-workbooks) +- [Interface](README.md#interface) + * [Parsing functions](README.md#parsing-functions) + * [Writing functions](README.md#writing-functions) + * [Utilities](README.md#utilities) +- [Workbook / Worksheet / Cell Object Description](README.md#workbook--worksheet--cell-object-description) + * [General Structures](README.md#general-structures) + * [Cell Object](README.md#cell-object) + + [Data Types](README.md#data-types) + + [Dates](README.md#dates) + * [Worksheet Object](README.md#worksheet-object) + * [Chartsheet Object](README.md#chartsheet-object) + * [Workbook Object](README.md#workbook-object) + * [Document Features](README.md#document-features) + + [Formulae](README.md#formulae) + + [Column Properties](README.md#column-properties) + + [Hyperlinks](README.md#hyperlinks) +- [Parsing Options](README.md#parsing-options) + * [Input Type](README.md#input-type) + * [Guessing File Type](README.md#guessing-file-type) +- [Writing Options](README.md#writing-options) + * [Supported Output Formats](README.md#supported-output-formats) + * [Output Type](README.md#output-type) +- [Utility Functions](README.md#utility-functions) + * [Array of Arrays Input](README.md#array-of-arrays-input) + * [HTML Table Input](README.md#html-table-input) + * [Formulae Output](README.md#formulae-output) + * [CSV and general DSV Output](README.md#csv-and-general-dsv-output) + * [JSON](README.md#json) +- [File Formats](README.md#file-formats) + * [Excel 2007+ XML (XLSX/XLSM)](README.md#excel-2007-xml-xlsxxlsm) + * [Excel 2.0-95 (BIFF2/BIFF3/BIFF4/BIFF5)](README.md#excel-20-95-biff2biff3biff4biff5) + * [Excel 97-2004 Binary (BIFF8)](README.md#excel-97-2004-binary-biff8) + * [Excel 2003-2004 (SpreadsheetML)](README.md#excel-2003-2004-spreadsheetml) + * [Excel 2007+ Binary (XLSB, BIFF12)](README.md#excel-2007-binary-xlsb-biff12) + * [OpenDocument Spreadsheet (ODS/FODS) and Uniform Office Spreadsheet (UOS1/2)](README.md#opendocument-spreadsheet-odsfods-and-uniform-office-spreadsheet-uos12) + * [dBASE and Visual FoxPro (DBF)](README.md#dbase-and-visual-foxpro-dbf) + * [Comma-Separated Values](README.md#comma-separated-values) + * [HTML](README.md#html) +- [Testing](README.md#testing) + * [Tested Environments](README.md#tested-environments) + * [Test Files](README.md#test-files) +- [Contributing](README.md#contributing) +- [License](README.md#license) +- [References](README.md#references) +- [Badges](README.md#badges) diff --git a/misc/docs/formats.png b/misc/docs/formats.png new file mode 120000 index 0000000..2a0a4df --- /dev/null +++ b/misc/docs/formats.png @@ -0,0 +1 @@ +../../formats.png \ No newline at end of file diff --git a/misc/docs/style.css b/misc/docs/style.css new file mode 100644 index 0000000..ba1f718 --- /dev/null +++ b/misc/docs/style.css @@ -0,0 +1,19 @@ +a.anchorjs-link { + color: rgba(65, 131, 196, 0.1); + font-weight: 400; + text-decoration: none; + transition: color 100ms ease-out; + z-index: 999; +} + +a.anchorjs-link:hover { + color: rgba(65, 131, 196, 1); +} + +.gitbook-link { + display: none !important; +} + +#book-search-input { + display: none !important; +} diff --git a/xlsx.flow.js b/xlsx.flow.js index 4d53198..bda8f26 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -3859,7 +3859,7 @@ function parse_BOF(blob, length) { function parse_InterfaceHdr(blob, length) { if(length === 0) return 0x04b0; var q; - if((q=blob.read_shift(2))!==0x04b0) throw new Error("InterfaceHdr codePage " + q); + if((q=blob.read_shift(2))!==0x04b0){} return 0x04b0; } @@ -13545,6 +13545,38 @@ function parse_html(str/*:string*/, opts)/*:Workbook*/ { ws['!ref'] = encode_range(range); return o; } + +function parse_dom_table(table/*:HTMLElement*/, opts/*:?any*/)/*:Worksheet*/ { + var ws/*:Worksheet*/ = ({}/*:any*/); + var rows = table.getElementsByTagName('tr'); + var range = {s:{r:0,c:0},e:{r:rows.length - 1,c:0}}; + var merges = [], midx = 0; + var R = 0, _C = 0, C = 0, RS = 0, CS = 0; + for(; R < rows.length; ++R) { + var row = rows[R]; + var elts = row.children; + for(_C = C = 0; _C < elts.length; ++_C) { + var elt = elts[_C], v = elts[_C].innerText; + for(midx = 0; midx < merges.length; ++midx) { + var m = merges[midx]; + if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + } + /* TODO: figure out how to extract nonstandard mso- style */ + CS = +elt.getAttribute("colspan") || 1; + if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}}); + var o = {t:'s', v:v}; + if(!isNaN(Number(v))) o = {t:'n', v:Number(v)}; + ws[encode_cell({c:C, r:R})] = o; + C += CS; + } + } + ws['!merges'] = merges; + return ws; +} + +function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ { + return sheet_to_workbook(parse_dom_table(table, opts), opts); +} var parse_content_xml = (function() { var parse_text_p = function(text, tag) { @@ -14762,6 +14794,8 @@ var utils = { make_json: sheet_to_json, make_formulae: sheet_to_formulae, aoa_to_sheet: aoa_to_sheet, + table_to_sheet: parse_dom_table, + table_to_book: table_to_book, sheet_to_csv: sheet_to_csv, sheet_to_json: sheet_to_json, sheet_to_formulae: sheet_to_formulae, diff --git a/xlsx.js b/xlsx.js index e7a6a5f..0b124e5 100644 --- a/xlsx.js +++ b/xlsx.js @@ -3805,7 +3805,7 @@ function parse_BOF(blob, length) { function parse_InterfaceHdr(blob, length) { if(length === 0) return 0x04b0; var q; - if((q=blob.read_shift(2))!==0x04b0) throw new Error("InterfaceHdr codePage " + q); + if((q=blob.read_shift(2))!==0x04b0){} return 0x04b0; } @@ -13486,6 +13486,38 @@ function parse_html(str, opts) { ws['!ref'] = encode_range(range); return o; } + +function parse_dom_table(table, opts) { + var ws = ({}); + var rows = table.getElementsByTagName('tr'); + var range = {s:{r:0,c:0},e:{r:rows.length - 1,c:0}}; + var merges = [], midx = 0; + var R = 0, _C = 0, C = 0, RS = 0, CS = 0; + for(; R < rows.length; ++R) { + var row = rows[R]; + var elts = row.children; + for(_C = C = 0; _C < elts.length; ++_C) { + var elt = elts[_C], v = elts[_C].innerText; + for(midx = 0; midx < merges.length; ++midx) { + var m = merges[midx]; + if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + } + /* TODO: figure out how to extract nonstandard mso- style */ + CS = +elt.getAttribute("colspan") || 1; + if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}}); + var o = {t:'s', v:v}; + if(!isNaN(Number(v))) o = {t:'n', v:Number(v)}; + ws[encode_cell({c:C, r:R})] = o; + C += CS; + } + } + ws['!merges'] = merges; + return ws; +} + +function table_to_book(table, opts) { + return sheet_to_workbook(parse_dom_table(table, opts), opts); +} var parse_content_xml = (function() { var parse_text_p = function(text, tag) { @@ -14694,6 +14726,8 @@ var utils = { make_json: sheet_to_json, make_formulae: sheet_to_formulae, aoa_to_sheet: aoa_to_sheet, + table_to_sheet: parse_dom_table, + table_to_book: table_to_book, sheet_to_csv: sheet_to_csv, sheet_to_json: sheet_to_json, sheet_to_formulae: sheet_to_formulae,