DOM TABLE parse element visibility

- Hidden rows are marked as hidden by default (mimics Excel behavior).
- `display` option for `table_to_*` replicates table as it appears.

Fixes #1115
This commit is contained in:
Surgie Finesse 2018-05-27 17:27:47 +10:00
parent 4737d80db4
commit 2918185249
9 changed files with 174 additions and 25 deletions

@ -2055,6 +2055,7 @@ Both functions accept options arguments:
|`dateNF` | FMT 14 | Use specified date format in string output |
|`cellDates` | false | Store dates as type `d` (default is `n`) |
|`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table |
|`display` | false | If true, hidden rows and cells will not be parsed |
<details>

@ -119,15 +119,22 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
if(DENSE != null) opts.dense = DENSE;
var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/);
var rows/*:HTMLCollection<HTMLTableRowElement>*/ = table.getElementsByTagName('tr');
var sheetRows = Math.min(opts.sheetRows||10000000, rows.length);
var range/*:Range*/ = {s:{r:0,c:0},e:{r:sheetRows - 1,c:0}};
var sheetRows = opts.sheetRows || 10000000;
var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}};
var merges/*:Array<Range>*/ = [], midx = 0;
var R = 0, _C = 0, C = 0, RS = 0, CS = 0;
for(; R < sheetRows; ++R) {
var row/*:HTMLTableRowElement*/ = rows[R];
var rowinfo/*:Array<RowInfo>*/ = [];
var _R = 0, R = 0, _C, C, RS, CS;
for(; _R < rows.length && R < sheetRows; ++_R) {
var row/*:HTMLTableRowElement*/ = rows[_R];
if (is_dom_element_hidden(row)) {
if (opts.display) continue;
rowinfo[R] = {hidden: true};
}
var elts/*:HTMLCollection<HTMLTableCellElement>*/ = (row.children/*:any*/);
for(_C = C = 0; _C < elts.length; ++_C) {
var elt/*:HTMLTableCellElement*/ = elts[_C], v = htmldecode(elts[_C].innerHTML);
var elt/*:HTMLTableCellElement*/ = elts[_C];
if (opts.display && is_dom_element_hidden(elt)) continue;
var v/*:string*/ = htmldecode(elt.innerHTML);
for(midx = 0; midx < merges.length; ++midx) {
var m/*:Range*/ = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
@ -154,13 +161,33 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
if(range.e.c < C) range.e.c = C;
C += CS;
}
++R;
}
if(merges.length) ws['!merges'] = merges;
if(rowinfo.length) ws['!rows'] = rowinfo;
range.e.r = R - 1;
ws['!ref'] = encode_range(range);
if(sheetRows < rows.length) ws['!fullref'] = encode_range((range.e.r = rows.length-1,range));
if(R >= sheetRows) ws['!fullref'] = encode_range((range.e.r = rows.length-_R+R-1,range)); // We can count the real number of rows to parse but we don't to improve the performance
return ws;
}
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
}
function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ {
var display/*:string*/ = '';
var get_computed_style/*:?function*/ = get_get_computed_style_function(element);
if(get_computed_style) display = get_computed_style(element).getPropertyValue('display');
if(!display) display = element.style.display; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document)
return display === 'none';
}
/* global getComputedStyle */
function get_get_computed_style_function(element/*:HTMLElement*/)/*:?function*/ {
// The proper getComputedStyle implementation is the one defined in the element window
if(element.ownerDocument.defaultView && typeof element.ownerDocument.defaultView.getComputedStyle === 'function') return element.ownerDocument.defaultView.getComputedStyle;
// If it is not available, try to get one from the global namespace
if(typeof getComputedStyle === 'function') return getComputedStyle;
return null;
}

@ -216,6 +216,7 @@ Both functions accept options arguments:
|`dateNF` | FMT 14 | Use specified date format in string output |
|`cellDates` | false | Store dates as type `d` (default is `n`) |
|`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table |
|`display` | false | If true, hidden rows and cells will not be parsed |
<details>

@ -147,3 +147,8 @@ var IE_LoadFile = (function() { try {
}
return function(filename) { return fix_data(IE_LoadFile_Impl(filename)); };
} catch(e) { return void 0; }})();
// getComputedStyle polyfill from https://gist.github.com/8HNHoFtE/5891086
if(typeof window !== 'undefined' && typeof window.getComputedStyle !== 'function') {
window.getComputedStyle = function(e,t){return this.el=e,this.getPropertyValue=function(t){var n=/(\-([a-z]){1})/g;return t=="float"&&(t="styleFloat"),n.test(t)&&(t=t.replace(n,function(){return arguments[2].toUpperCase()})),e.currentStyle[t]?e.currentStyle[t]:null},this}
}

33
test.js

@ -2025,19 +2025,29 @@ if(fs.existsSync(dir + 'dbf/d11.dbf')) describe('dbf', function() {
var JSDOM = null;
// $FlowIgnore
var domtest = browser || (function(){try{return !!(JSDOM=require('jsdom').JSDOM);}catch(e){return 0;}})();
var inserted_dom_elements = [];
function get_dom_element(html) {
if(browser) {
var domelt = document.createElement('div');
domelt.innerHTML = html;
return domelt;
document.body.appendChild(domelt);
inserted_dom_elements.push(domelt);
return domelt.children[0];
}
if(!JSDOM) throw new Error("Browser test fail");
return new JSDOM(html).window.document.body.children[0];
}
describe('HTML', function() {
describe('input string', function(){
afterEach(function () {
// Remove the DOM elements inserted to the page by get_dom_element
inserted_dom_elements.forEach(function (element) {
if(element.parentNode) element.parentNode.removeChild(element);
});
inserted_dom_elements = [];
});
describe('input string', function() {
it('should interpret values by default', function() { plaintext_test(X.read(html_bstr, {type:"binary"}), false); });
it('should generate strings if raw option is passed', function() { plaintext_test(X.read(html_bstr, {type:"binary", raw:true}), true); });
it('should handle "string" type', function() { plaintext_test(X.read(html_str, {type:"string"}), false); });
@ -2071,6 +2081,7 @@ describe('HTML', function() {
});
if(domtest) it('should honor sheetRows', function() {
var html = X.utils.sheet_to_html(X.utils.aoa_to_sheet([[1,2],[3,4],[5,6]]));
html = /<body[^>]*>([\s\S]*)<\/body>/i.exec(html)[1];
var ws = X.utils.table_to_sheet(get_dom_element(html));
assert.equal(ws['!ref'], "A1:B3");
ws = X.utils.table_to_sheet(get_dom_element(html), {sheetRows:1});
@ -2080,6 +2091,24 @@ describe('HTML', function() {
assert.equal(ws['!ref'], "A1:B2");
assert.equal(ws['!fullref'], "A1:B3");
});
if(domtest) it('should hide hidden rows', function() {
var html = "<table><tr style='display: none;'><td>Foo</td></tr><tr><td style='display: none;'>Bar</td></tr><tr class='hidden'><td>Baz</td></tr></table><style>.hidden {display: none}</style>";
var ws = X.utils.table_to_sheet(get_dom_element(html));
var expected_rows = [];
expected_rows[0] = expected_rows[2] = {hidden: true};
assert.equal(ws['!ref'], "A1:A3");
assert.deepEqual(ws['!rows'], expected_rows);
assert.equal(get_cell(ws, "A1").v, "Foo");
assert.equal(get_cell(ws, "A2").v, "Bar");
assert.equal(get_cell(ws, "A3").v, "Baz");
});
if(domtest) it('should ignore hidden rows and cells when the `display` option is on', function() {
var html = "<table><tr style='display: none;'><td>1</td><td>2</td><td>3</td></tr><tr><td class='hidden'>Foo</td><td>Bar</td><td style='display: none;'>Baz</td></tr></table><style>.hidden {display: none}</style>";
var ws = X.utils.table_to_sheet(get_dom_element(html), {display: true});
assert.equal(ws['!ref'], "A1");
assert.equal(ws.hasOwnProperty('!rows'), false);
assert.equal(get_cell(ws, "A1").v, "Bar");
});
describe('type override', function() {
function chk(ws) {
assert.equal(get_cell(ws, "A1").t, "s");

33
tests/core.js generated

@ -2025,19 +2025,29 @@ if(fs.existsSync(dir + 'dbf/d11.dbf')) describe('dbf', function() {
var JSDOM = null;
// $FlowIgnore
var domtest = browser || (function(){try{return !!(JSDOM=require('jsdom').JSDOM);}catch(e){return 0;}})();
var inserted_dom_elements = [];
function get_dom_element(html) {
if(browser) {
var domelt = document.createElement('div');
domelt.innerHTML = html;
return domelt;
document.body.appendChild(domelt);
inserted_dom_elements.push(domelt);
return domelt.children[0];
}
if(!JSDOM) throw new Error("Browser test fail");
return new JSDOM(html).window.document.body.children[0];
}
describe('HTML', function() {
describe('input string', function(){
afterEach(function () {
// Remove the DOM elements inserted to the page by get_dom_element
inserted_dom_elements.forEach(function (element) {
if(element.parentNode) element.parentNode.removeChild(element);
});
inserted_dom_elements = [];
});
describe('input string', function() {
it('should interpret values by default', function() { plaintext_test(X.read(html_bstr, {type:"binary"}), false); });
it('should generate strings if raw option is passed', function() { plaintext_test(X.read(html_bstr, {type:"binary", raw:true}), true); });
it('should handle "string" type', function() { plaintext_test(X.read(html_str, {type:"string"}), false); });
@ -2071,6 +2081,7 @@ describe('HTML', function() {
});
if(domtest) it('should honor sheetRows', function() {
var html = X.utils.sheet_to_html(X.utils.aoa_to_sheet([[1,2],[3,4],[5,6]]));
html = /<body[^>]*>([\s\S]*)<\/body>/i.exec(html)[1];
var ws = X.utils.table_to_sheet(get_dom_element(html));
assert.equal(ws['!ref'], "A1:B3");
ws = X.utils.table_to_sheet(get_dom_element(html), {sheetRows:1});
@ -2080,6 +2091,24 @@ describe('HTML', function() {
assert.equal(ws['!ref'], "A1:B2");
assert.equal(ws['!fullref'], "A1:B3");
});
if(domtest) it('should hide hidden rows', function() {
var html = "<table><tr style='display: none;'><td>Foo</td></tr><tr><td style='display: none;'>Bar</td></tr><tr class='hidden'><td>Baz</td></tr></table><style>.hidden {display: none}</style>";
var ws = X.utils.table_to_sheet(get_dom_element(html));
var expected_rows = [];
expected_rows[0] = expected_rows[2] = {hidden: true};
assert.equal(ws['!ref'], "A1:A3");
assert.deepEqual(ws['!rows'], expected_rows);
assert.equal(get_cell(ws, "A1").v, "Foo");
assert.equal(get_cell(ws, "A2").v, "Bar");
assert.equal(get_cell(ws, "A3").v, "Baz");
});
if(domtest) it('should ignore hidden rows and cells when the `display` option is on', function() {
var html = "<table><tr style='display: none;'><td>1</td><td>2</td><td>3</td></tr><tr><td class='hidden'>Foo</td><td>Bar</td><td style='display: none;'>Baz</td></tr></table><style>.hidden {display: none}</style>";
var ws = X.utils.table_to_sheet(get_dom_element(html), {display: true});
assert.equal(ws['!ref'], "A1");
assert.equal(ws.hasOwnProperty('!rows'), false);
assert.equal(get_cell(ws, "A1").v, "Bar");
});
describe('type override', function() {
function chk(ws) {
assert.equal(get_cell(ws, "A1").t, "s");

3
types/index.d.ts vendored

@ -663,6 +663,9 @@ export interface Table2SheetOpts extends CommonOptions, DateNFOption {
* @default 0
*/
sheetRows?: number;
/** If true, hidden rows and cells will not be parsed */
display?: boolean;
}
/** General utilities */

@ -18281,15 +18281,22 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
if(DENSE != null) opts.dense = DENSE;
var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/);
var rows/*:HTMLCollection<HTMLTableRowElement>*/ = table.getElementsByTagName('tr');
var sheetRows = Math.min(opts.sheetRows||10000000, rows.length);
var range/*:Range*/ = {s:{r:0,c:0},e:{r:sheetRows - 1,c:0}};
var sheetRows = opts.sheetRows || 10000000;
var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}};
var merges/*:Array<Range>*/ = [], midx = 0;
var R = 0, _C = 0, C = 0, RS = 0, CS = 0;
for(; R < sheetRows; ++R) {
var row/*:HTMLTableRowElement*/ = rows[R];
var rowinfo/*:Array<RowInfo>*/ = [];
var _R = 0, R = 0, _C, C, RS, CS;
for(; _R < rows.length && R < sheetRows; ++_R) {
var row/*:HTMLTableRowElement*/ = rows[_R];
if (is_dom_element_hidden(row)) {
if (opts.display) continue;
rowinfo[R] = {hidden: true};
}
var elts/*:HTMLCollection<HTMLTableCellElement>*/ = (row.children/*:any*/);
for(_C = C = 0; _C < elts.length; ++_C) {
var elt/*:HTMLTableCellElement*/ = elts[_C], v = htmldecode(elts[_C].innerHTML);
var elt/*:HTMLTableCellElement*/ = elts[_C];
if (opts.display && is_dom_element_hidden(elt)) continue;
var v/*:string*/ = htmldecode(elt.innerHTML);
for(midx = 0; midx < merges.length; ++midx) {
var m/*:Range*/ = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
@ -18316,16 +18323,36 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
if(range.e.c < C) range.e.c = C;
C += CS;
}
++R;
}
if(merges.length) ws['!merges'] = merges;
if(rowinfo.length) ws['!rows'] = rowinfo;
range.e.r = R - 1;
ws['!ref'] = encode_range(range);
if(sheetRows < rows.length) ws['!fullref'] = encode_range((range.e.r = rows.length-1,range));
if(R >= sheetRows) ws['!fullref'] = encode_range((range.e.r = rows.length-_R+R-1,range)); // We can count the real number of rows to parse but we don't to improve the performance
return ws;
}
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
}
function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ {
var display/*:string*/ = '';
var get_computed_style/*:?function*/ = get_get_computed_style_function(element);
if(get_computed_style) display = get_computed_style(element).getPropertyValue('display');
if(!display) display = element.style.display; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document)
return display === 'none';
}
/* global getComputedStyle */
function get_get_computed_style_function(element/*:HTMLElement*/)/*:?function*/ {
// The proper getComputedStyle implementation is the one defined in the element window
if(element.ownerDocument.defaultView && typeof element.ownerDocument.defaultView.getComputedStyle === 'function') return element.ownerDocument.defaultView.getComputedStyle;
// If it is not available, try to get one from the global namespace
if(typeof getComputedStyle === 'function') return getComputedStyle;
return null;
}
/* OpenDocument */
var parse_content_xml = (function() {

41
xlsx.js generated

@ -18168,15 +18168,22 @@ function parse_dom_table(table, _opts) {
if(DENSE != null) opts.dense = DENSE;
var ws = opts.dense ? ([]) : ({});
var rows = table.getElementsByTagName('tr');
var sheetRows = Math.min(opts.sheetRows||10000000, rows.length);
var range = {s:{r:0,c:0},e:{r:sheetRows - 1,c:0}};
var sheetRows = opts.sheetRows || 10000000;
var range = {s:{r:0,c:0},e:{r:0,c:0}};
var merges = [], midx = 0;
var R = 0, _C = 0, C = 0, RS = 0, CS = 0;
for(; R < sheetRows; ++R) {
var row = rows[R];
var rowinfo = [];
var _R = 0, R = 0, _C, C, RS, CS;
for(; _R < rows.length && R < sheetRows; ++_R) {
var row = rows[_R];
if (is_dom_element_hidden(row)) {
if (opts.display) continue;
rowinfo[R] = {hidden: true};
}
var elts = (row.children);
for(_C = C = 0; _C < elts.length; ++_C) {
var elt = elts[_C], v = htmldecode(elts[_C].innerHTML);
var elt = elts[_C];
if (opts.display && is_dom_element_hidden(elt)) continue;
var v = htmldecode(elt.innerHTML);
for(midx = 0; midx < merges.length; ++midx) {
var m = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
@ -18203,16 +18210,36 @@ function parse_dom_table(table, _opts) {
if(range.e.c < C) range.e.c = C;
C += CS;
}
++R;
}
if(merges.length) ws['!merges'] = merges;
if(rowinfo.length) ws['!rows'] = rowinfo;
range.e.r = R - 1;
ws['!ref'] = encode_range(range);
if(sheetRows < rows.length) ws['!fullref'] = encode_range((range.e.r = rows.length-1,range));
if(R >= sheetRows) ws['!fullref'] = encode_range((range.e.r = rows.length-_R+R-1,range)); // We can count the real number of rows to parse but we don't to improve the performance
return ws;
}
function table_to_book(table, opts) {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
}
function is_dom_element_hidden(element) {
var display = '';
var get_computed_style = get_get_computed_style_function(element);
if(get_computed_style) display = get_computed_style(element).getPropertyValue('display');
if(!display) display = element.style.display; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document)
return display === 'none';
}
/* global getComputedStyle */
function get_get_computed_style_function(element) {
// The proper getComputedStyle implementation is the one defined in the element window
if(element.ownerDocument.defaultView && typeof element.ownerDocument.defaultView.getComputedStyle === 'function') return element.ownerDocument.defaultView.getComputedStyle;
// If it is not available, try to get one from the global namespace
if(typeof getComputedStyle === 'function') return getComputedStyle;
return null;
}
/* OpenDocument */
var parse_content_xml = (function() {