HTML DOM Element read

- DOM <table> element parsing (fixes #576 h/t @axolo)
- removed InterfaceHdr check (fixes #209 h/t @Batistleman)
- gitbook docs
This commit is contained in:
SheetJS 2017-03-29 15:14:15 -04:00
parent b89a876076
commit 65f1c7e58b
17 changed files with 307 additions and 5 deletions

@ -8,6 +8,7 @@
.*/ctest/.*
.*/misc/.*
.*/perf/.*
.*/_book/.*
.*/demo/browser.js
.*/shim.js

1
.gitignore vendored

@ -1,5 +1,6 @@
node_modules
*.tgz
_book/
misc/coverage.html
misc/prof.js
v8.log

@ -7,6 +7,9 @@ changes may not be included if they are not expected to break existing code.
## Unreleased
## 0.9.7 (2017-03-28)
* XLS legacy `!range` field removed
* Hyperlink tooltip is stored in the `Tooltip` field
@ -15,7 +18,7 @@ changes may not be included if they are not expected to break existing code.
* `sheet_to_json` now passes `null` values when `raw` is set to `true`
* `sheet_to_json` treats `null` stub cells as values in conjunction with `raw`
## 0.9.5 (2017-03-22)
## 0.9.5 (2017-03-22)
* `cellDates` affects parsing in non-XLSX formats

@ -173,6 +173,11 @@ README.md: $(READEPS)
readme: README.md ## Update README Table of Contents
markdown-toc -i README.md
.PHONY: book
book: README.md ## Update summary for documentation
printf "# Summary\n\n- [xlsx](README.md#xlsx)\n" > misc/docs/SUMMARY.md
markdown-toc README.md | sed 's/(#/(README.md#/g'>> misc/docs/SUMMARY.md
.PHONY: help
help:
@grep -hE '(^[a-zA-Z_-][ a-zA-Z_-]*:.*?|^#[#*])' $(MAKEFILE_LIST) | bash misc/help.sh

@ -43,6 +43,7 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6.
* [Document Features](#document-features)
+ [Formulae](#formulae)
+ [Column Properties](#column-properties)
+ [Hyperlinks](#hyperlinks)
- [Parsing Options](#parsing-options)
* [Input Type](#input-type)
* [Guessing File Type](#guessing-file-type)
@ -51,6 +52,7 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6.
* [Output Type](#output-type)
- [Utility Functions](#utility-functions)
* [Array of Arrays Input](#array-of-arrays-input)
* [HTML Table Input](#html-table-input)
* [Formulae Output](#formulae-output)
* [CSV and general DSV Output](#csv-and-general-dsv-output)
* [JSON](#json)
@ -161,6 +163,13 @@ var workbook = XLSX.readFile('test.xlsx');
/* DO SOMETHING WITH workbook HERE */
```
- Browser DOM Table element:
```js
var worksheet = XLSX.utils.table_to_book(document.getElementById('tableau'));
/* DO SOMETHING WITH workbook HERE */
```
- ajax (for a more complete example that works in older browsers, check the demo
at <http://oss.sheetjs.com/js-xlsx/ajax.html>):
@ -449,7 +458,7 @@ for(var R = range.s.r; R <= range.e.r; ++R) {
| `h` | HTML rendering of the rich text (if applicable) |
| `c` | comments associated with the cell |
| `z` | number format string associated with the cell (if requested) |
| `l` | cell hyperlink object (.Target holds link, .tooltip is tooltip) |
| `l` | cell hyperlink object (.Target holds link, .Tooltip is tooltip) |
| `s` | the style/theme of the cell (if applicable) |
Built-in export utilities (such as the CSV exporter) will use the `w` text if it
@ -688,6 +697,23 @@ follow the priority order:
2) use `wpx` pixel width if available
3) use `wch` character count if available
#### Hyperlinks
Hyperlinks are stored in the `l` key of cell objects. The `Target` field of the
hyperlink object is the target of the link, including the URI fragment. Tooltips
are stored in the `Tooltip` field and are displayed when you move your mouse
over the text.
For example, the following snippet creates a link from cell `A3` to
<http://sheetjs.com> with the tip `"Find us @ SheetJS.com!"`:
```js
ws['A3'].l = { Target:"http://sheetjs.com", Tooltip:"Find us @ SheetJS.com!" };
```
Note that Excel does not automatically style hyperlinks -- they will generally
be displayed as normal text.
## Parsing Options
The exported `read` and `readFile` functions accept an options argument:
@ -852,6 +878,33 @@ var ws = XLSX.utils.aoa_to_sheet([
]);
```
### HTML Table Input
`XLSX.utils.table_to_sheet` takes a table DOM element and returns a worksheet
resembling the input table. Numbers are parsed. All other data will be stored
as strings.
`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet.
To generate the example sheet, start with the HTML table:
```html
<table id="sheetjs">
<tr><td>S</td><td>h</td><td>e</td><td>e</td><td>t</td><td>J</td><td>S</td></tr>
<tr><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td></tr>
<tr><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td></tr>
</table>
```
To process the table:
```js
var tbl = document.getElementById('sheetjs');
var wb = XLSX.utils.table_to_book(tbl);
```
Note: `XLSX.read` can handle HTML represented as strings.
### Formulae Output
`XLSX.utils.sheet_to_formulae` generates an array of commands that represent

@ -158,7 +158,7 @@ function parse_BOF(blob, length) {
function parse_InterfaceHdr(blob, length) {
if(length === 0) return 0x04b0;
var q;
if((q=blob.read_shift(2))!==0x04b0) throw new Error("InterfaceHdr codePage " + q);
if((q=blob.read_shift(2))!==0x04b0){}
return 0x04b0;
}

@ -33,3 +33,35 @@ function parse_html(str/*:string*/, opts)/*:Workbook*/ {
ws['!ref'] = encode_range(range);
return o;
}
function parse_dom_table(table/*:HTMLElement*/, opts/*:?any*/)/*:Worksheet*/ {
var ws/*:Worksheet*/ = ({}/*:any*/);
var rows = table.getElementsByTagName('tr');
var range = {s:{r:0,c:0},e:{r:rows.length - 1,c:0}};
var merges = [], midx = 0;
var R = 0, _C = 0, C = 0, RS = 0, CS = 0;
for(; R < rows.length; ++R) {
var row = rows[R];
var elts = row.children;
for(_C = C = 0; _C < elts.length; ++_C) {
var elt = elts[_C], v = elts[_C].innerText;
for(midx = 0; midx < merges.length; ++midx) {
var m = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
}
/* TODO: figure out how to extract nonstandard mso- style */
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}});
var o = {t:'s', v:v};
if(!isNaN(Number(v))) o = {t:'n', v:Number(v)};
ws[encode_cell({c:C, r:R})] = o;
C += CS;
}
}
ws['!merges'] = merges;
return ws;
}
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
}

@ -240,6 +240,8 @@ var utils = {
make_json: sheet_to_json,
make_formulae: sheet_to_formulae,
aoa_to_sheet: aoa_to_sheet,
table_to_sheet: parse_dom_table,
table_to_book: table_to_book,
sheet_to_csv: sheet_to_csv,
sheet_to_json: sheet_to_json,
sheet_to_formulae: sheet_to_formulae,

27
book.json Normal file

@ -0,0 +1,27 @@
{
"root": "./misc/docs",
"title": "SheetJS js-xlsx",
"author": "sheetjs",
"gitbook": "3.2.2",
"plugins": ["anchorjs", "ga", "sidebar-ad", "-sharing", "advanced-emoji"],
"pluginsConfig": {
"anchorjs": {
"icon": "#",
"placement": "left",
"visible": "always"
},
"ga": {
"token": "UA-36810333-1"
},
"sidebar-ad": {
"imageUrl": "http://oss.sheetjs.com/assets/img/logo.png",
"url": "http://sheetjs.com"
},
"theme-default": {
"showLevel": false,
"styles": {
"website": "style.css"
}
}
}
}

@ -11,6 +11,13 @@ var workbook = XLSX.readFile('test.xlsx');
/* DO SOMETHING WITH workbook HERE */
```
- Browser DOM Table element:
```js
var worksheet = XLSX.utils.table_to_book(document.getElementById('tableau'));
/* DO SOMETHING WITH workbook HERE */
```
- ajax (for a more complete example that works in older browsers, check the demo
at <http://oss.sheetjs.com/js-xlsx/ajax.html>):

@ -38,6 +38,33 @@ var ws = XLSX.utils.aoa_to_sheet([
]);
```
### HTML Table Input
`XLSX.utils.table_to_sheet` takes a table DOM element and returns a worksheet
resembling the input table. Numbers are parsed. All other data will be stored
as strings.
`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet.
To generate the example sheet, start with the HTML table:
```html
<table id="sheetjs">
<tr><td>S</td><td>h</td><td>e</td><td>e</td><td>t</td><td>J</td><td>S</td></tr>
<tr><td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td></tr>
<tr><td>2</td><td>3</td><td>4</td><td>5</td><td>6</td><td>7</td><td>8</td></tr>
</table>
```
To process the table:
```js
var tbl = document.getElementById('sheetjs');
var wb = XLSX.utils.table_to_book(tbl);
```
Note: `XLSX.read` can handle HTML represented as strings.
### Formulae Output
`XLSX.utils.sheet_to_formulae` generates an array of commands that represent

1
misc/docs/README.md Symbolic link

@ -0,0 +1 @@
../../README.md

55
misc/docs/SUMMARY.md Normal file

@ -0,0 +1,55 @@
# Summary
- [xlsx](README.md#xlsx)
- [Installation](README.md#installation)
* [JS Ecosystem Demos](README.md#js-ecosystem-demos)
* [Optional Modules](README.md#optional-modules)
* [ECMAScript 5 Compatibility](README.md#ecmascript-5-compatibility)
- [Parsing Workbooks](README.md#parsing-workbooks)
- [Working with the Workbook](README.md#working-with-the-workbook)
- [Writing Workbooks](README.md#writing-workbooks)
- [Interface](README.md#interface)
* [Parsing functions](README.md#parsing-functions)
* [Writing functions](README.md#writing-functions)
* [Utilities](README.md#utilities)
- [Workbook / Worksheet / Cell Object Description](README.md#workbook--worksheet--cell-object-description)
* [General Structures](README.md#general-structures)
* [Cell Object](README.md#cell-object)
+ [Data Types](README.md#data-types)
+ [Dates](README.md#dates)
* [Worksheet Object](README.md#worksheet-object)
* [Chartsheet Object](README.md#chartsheet-object)
* [Workbook Object](README.md#workbook-object)
* [Document Features](README.md#document-features)
+ [Formulae](README.md#formulae)
+ [Column Properties](README.md#column-properties)
+ [Hyperlinks](README.md#hyperlinks)
- [Parsing Options](README.md#parsing-options)
* [Input Type](README.md#input-type)
* [Guessing File Type](README.md#guessing-file-type)
- [Writing Options](README.md#writing-options)
* [Supported Output Formats](README.md#supported-output-formats)
* [Output Type](README.md#output-type)
- [Utility Functions](README.md#utility-functions)
* [Array of Arrays Input](README.md#array-of-arrays-input)
* [HTML Table Input](README.md#html-table-input)
* [Formulae Output](README.md#formulae-output)
* [CSV and general DSV Output](README.md#csv-and-general-dsv-output)
* [JSON](README.md#json)
- [File Formats](README.md#file-formats)
* [Excel 2007+ XML (XLSX/XLSM)](README.md#excel-2007-xml-xlsxxlsm)
* [Excel 2.0-95 (BIFF2/BIFF3/BIFF4/BIFF5)](README.md#excel-20-95-biff2biff3biff4biff5)
* [Excel 97-2004 Binary (BIFF8)](README.md#excel-97-2004-binary-biff8)
* [Excel 2003-2004 (SpreadsheetML)](README.md#excel-2003-2004-spreadsheetml)
* [Excel 2007+ Binary (XLSB, BIFF12)](README.md#excel-2007-binary-xlsb-biff12)
* [OpenDocument Spreadsheet (ODS/FODS) and Uniform Office Spreadsheet (UOS1/2)](README.md#opendocument-spreadsheet-odsfods-and-uniform-office-spreadsheet-uos12)
* [dBASE and Visual FoxPro (DBF)](README.md#dbase-and-visual-foxpro-dbf)
* [Comma-Separated Values](README.md#comma-separated-values)
* [HTML](README.md#html)
- [Testing](README.md#testing)
* [Tested Environments](README.md#tested-environments)
* [Test Files](README.md#test-files)
- [Contributing](README.md#contributing)
- [License](README.md#license)
- [References](README.md#references)
- [Badges](README.md#badges)

1
misc/docs/formats.png Symbolic link

@ -0,0 +1 @@
../../formats.png

19
misc/docs/style.css Normal file

@ -0,0 +1,19 @@
a.anchorjs-link {
color: rgba(65, 131, 196, 0.1);
font-weight: 400;
text-decoration: none;
transition: color 100ms ease-out;
z-index: 999;
}
a.anchorjs-link:hover {
color: rgba(65, 131, 196, 1);
}
.gitbook-link {
display: none !important;
}
#book-search-input {
display: none !important;
}

@ -3859,7 +3859,7 @@ function parse_BOF(blob, length) {
function parse_InterfaceHdr(blob, length) {
if(length === 0) return 0x04b0;
var q;
if((q=blob.read_shift(2))!==0x04b0) throw new Error("InterfaceHdr codePage " + q);
if((q=blob.read_shift(2))!==0x04b0){}
return 0x04b0;
}
@ -13545,6 +13545,38 @@ function parse_html(str/*:string*/, opts)/*:Workbook*/ {
ws['!ref'] = encode_range(range);
return o;
}
function parse_dom_table(table/*:HTMLElement*/, opts/*:?any*/)/*:Worksheet*/ {
var ws/*:Worksheet*/ = ({}/*:any*/);
var rows = table.getElementsByTagName('tr');
var range = {s:{r:0,c:0},e:{r:rows.length - 1,c:0}};
var merges = [], midx = 0;
var R = 0, _C = 0, C = 0, RS = 0, CS = 0;
for(; R < rows.length; ++R) {
var row = rows[R];
var elts = row.children;
for(_C = C = 0; _C < elts.length; ++_C) {
var elt = elts[_C], v = elts[_C].innerText;
for(midx = 0; midx < merges.length; ++midx) {
var m = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
}
/* TODO: figure out how to extract nonstandard mso- style */
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}});
var o = {t:'s', v:v};
if(!isNaN(Number(v))) o = {t:'n', v:Number(v)};
ws[encode_cell({c:C, r:R})] = o;
C += CS;
}
}
ws['!merges'] = merges;
return ws;
}
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
}
var parse_content_xml = (function() {
var parse_text_p = function(text, tag) {
@ -14762,6 +14794,8 @@ var utils = {
make_json: sheet_to_json,
make_formulae: sheet_to_formulae,
aoa_to_sheet: aoa_to_sheet,
table_to_sheet: parse_dom_table,
table_to_book: table_to_book,
sheet_to_csv: sheet_to_csv,
sheet_to_json: sheet_to_json,
sheet_to_formulae: sheet_to_formulae,

36
xlsx.js

@ -3805,7 +3805,7 @@ function parse_BOF(blob, length) {
function parse_InterfaceHdr(blob, length) {
if(length === 0) return 0x04b0;
var q;
if((q=blob.read_shift(2))!==0x04b0) throw new Error("InterfaceHdr codePage " + q);
if((q=blob.read_shift(2))!==0x04b0){}
return 0x04b0;
}
@ -13486,6 +13486,38 @@ function parse_html(str, opts) {
ws['!ref'] = encode_range(range);
return o;
}
function parse_dom_table(table, opts) {
var ws = ({});
var rows = table.getElementsByTagName('tr');
var range = {s:{r:0,c:0},e:{r:rows.length - 1,c:0}};
var merges = [], midx = 0;
var R = 0, _C = 0, C = 0, RS = 0, CS = 0;
for(; R < rows.length; ++R) {
var row = rows[R];
var elts = row.children;
for(_C = C = 0; _C < elts.length; ++_C) {
var elt = elts[_C], v = elts[_C].innerText;
for(midx = 0; midx < merges.length; ++midx) {
var m = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
}
/* TODO: figure out how to extract nonstandard mso- style */
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}});
var o = {t:'s', v:v};
if(!isNaN(Number(v))) o = {t:'n', v:Number(v)};
ws[encode_cell({c:C, r:R})] = o;
C += CS;
}
}
ws['!merges'] = merges;
return ws;
}
function table_to_book(table, opts) {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
}
var parse_content_xml = (function() {
var parse_text_p = function(text, tag) {
@ -14694,6 +14726,8 @@ var utils = {
make_json: sheet_to_json,
make_formulae: sheet_to_formulae,
aoa_to_sheet: aoa_to_sheet,
table_to_sheet: parse_dom_table,
table_to_book: table_to_book,
sheet_to_csv: sheet_to_csv,
sheet_to_json: sheet_to_json,
sheet_to_formulae: sheet_to_formulae,