From 291818524953f0f180d6dc6c275cbf8fbae2ae21 Mon Sep 17 00:00:00 2001 From: Surgie Finesse Date: Sun, 27 May 2018 17:27:47 +1000 Subject: [PATCH] DOM TABLE parse element visibility - Hidden rows are marked as hidden by default (mimics Excel behavior). - `display` option for `table_to_*` replicates table as it appears. Fixes #1115 --- README.md | 1 + bits/79_html.js | 41 ++++++++++++++++++++++++++++++++++------- docbits/82_util.md | 1 + shim.js | 5 +++++ test.js | 33 +++++++++++++++++++++++++++++++-- tests/core.js | 33 +++++++++++++++++++++++++++++++-- types/index.d.ts | 3 +++ xlsx.flow.js | 41 ++++++++++++++++++++++++++++++++++------- xlsx.js | 41 ++++++++++++++++++++++++++++++++++------- 9 files changed, 174 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index c455874..cff83dc 100644 --- a/README.md +++ b/README.md @@ -2055,6 +2055,7 @@ Both functions accept options arguments: |`dateNF` | FMT 14 | Use specified date format in string output | |`cellDates` | false | Store dates as type `d` (default is `n`) | |`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table | +|`display` | false | If true, hidden rows and cells will not be parsed |
diff --git a/bits/79_html.js b/bits/79_html.js index e9a519f..4e7c2be 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -119,15 +119,22 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { if(DENSE != null) opts.dense = DENSE; var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); var rows/*:HTMLCollection*/ = table.getElementsByTagName('tr'); - var sheetRows = Math.min(opts.sheetRows||10000000, rows.length); - var range/*:Range*/ = {s:{r:0,c:0},e:{r:sheetRows - 1,c:0}}; + var sheetRows = opts.sheetRows || 10000000; + var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}}; var merges/*:Array*/ = [], midx = 0; - var R = 0, _C = 0, C = 0, RS = 0, CS = 0; - for(; R < sheetRows; ++R) { - var row/*:HTMLTableRowElement*/ = rows[R]; + var rowinfo/*:Array*/ = []; + var _R = 0, R = 0, _C, C, RS, CS; + for(; _R < rows.length && R < sheetRows; ++_R) { + var row/*:HTMLTableRowElement*/ = rows[_R]; + if (is_dom_element_hidden(row)) { + if (opts.display) continue; + rowinfo[R] = {hidden: true}; + } var elts/*:HTMLCollection*/ = (row.children/*:any*/); for(_C = C = 0; _C < elts.length; ++_C) { - var elt/*:HTMLTableCellElement*/ = elts[_C], v = htmldecode(elts[_C].innerHTML); + var elt/*:HTMLTableCellElement*/ = elts[_C]; + if (opts.display && is_dom_element_hidden(elt)) continue; + var v/*:string*/ = htmldecode(elt.innerHTML); for(midx = 0; midx < merges.length; ++midx) { var m/*:Range*/ = merges[midx]; if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } @@ -154,13 +161,33 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { if(range.e.c < C) range.e.c = C; C += CS; } + ++R; } if(merges.length) ws['!merges'] = merges; + if(rowinfo.length) ws['!rows'] = rowinfo; + range.e.r = R - 1; ws['!ref'] = encode_range(range); - if(sheetRows < rows.length) ws['!fullref'] = encode_range((range.e.r = rows.length-1,range)); + if(R >= sheetRows) ws['!fullref'] = encode_range((range.e.r = rows.length-_R+R-1,range)); // We can count the real number of rows to parse but we don't to improve the performance return ws; } function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ { return sheet_to_workbook(parse_dom_table(table, opts), opts); } + +function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ { + var display/*:string*/ = ''; + var get_computed_style/*:?function*/ = get_get_computed_style_function(element); + if(get_computed_style) display = get_computed_style(element).getPropertyValue('display'); + if(!display) display = element.style.display; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document) + return display === 'none'; +} + +/* global getComputedStyle */ +function get_get_computed_style_function(element/*:HTMLElement*/)/*:?function*/ { + // The proper getComputedStyle implementation is the one defined in the element window + if(element.ownerDocument.defaultView && typeof element.ownerDocument.defaultView.getComputedStyle === 'function') return element.ownerDocument.defaultView.getComputedStyle; + // If it is not available, try to get one from the global namespace + if(typeof getComputedStyle === 'function') return getComputedStyle; + return null; +} diff --git a/docbits/82_util.md b/docbits/82_util.md index fd3096a..f7404fd 100644 --- a/docbits/82_util.md +++ b/docbits/82_util.md @@ -216,6 +216,7 @@ Both functions accept options arguments: |`dateNF` | FMT 14 | Use specified date format in string output | |`cellDates` | false | Store dates as type `d` (default is `n`) | |`sheetRows` | 0 | If >0, read the first `sheetRows` rows of the table | +|`display` | false | If true, hidden rows and cells will not be parsed |
diff --git a/shim.js b/shim.js index 47ed31d..c1f5c7a 100644 --- a/shim.js +++ b/shim.js @@ -147,3 +147,8 @@ var IE_LoadFile = (function() { try { } return function(filename) { return fix_data(IE_LoadFile_Impl(filename)); }; } catch(e) { return void 0; }})(); + +// getComputedStyle polyfill from https://gist.github.com/8HNHoFtE/5891086 +if(typeof window !== 'undefined' && typeof window.getComputedStyle !== 'function') { + window.getComputedStyle = function(e,t){return this.el=e,this.getPropertyValue=function(t){var n=/(\-([a-z]){1})/g;return t=="float"&&(t="styleFloat"),n.test(t)&&(t=t.replace(n,function(){return arguments[2].toUpperCase()})),e.currentStyle[t]?e.currentStyle[t]:null},this} +} diff --git a/test.js b/test.js index 5e4fbfa..7c2a200 100644 --- a/test.js +++ b/test.js @@ -2025,19 +2025,29 @@ if(fs.existsSync(dir + 'dbf/d11.dbf')) describe('dbf', function() { var JSDOM = null; // $FlowIgnore var domtest = browser || (function(){try{return !!(JSDOM=require('jsdom').JSDOM);}catch(e){return 0;}})(); +var inserted_dom_elements = []; function get_dom_element(html) { if(browser) { var domelt = document.createElement('div'); domelt.innerHTML = html; - return domelt; + document.body.appendChild(domelt); + inserted_dom_elements.push(domelt); + return domelt.children[0]; } if(!JSDOM) throw new Error("Browser test fail"); return new JSDOM(html).window.document.body.children[0]; } describe('HTML', function() { - describe('input string', function(){ + afterEach(function () { + // Remove the DOM elements inserted to the page by get_dom_element + inserted_dom_elements.forEach(function (element) { + if(element.parentNode) element.parentNode.removeChild(element); + }); + inserted_dom_elements = []; + }); + describe('input string', function() { it('should interpret values by default', function() { plaintext_test(X.read(html_bstr, {type:"binary"}), false); }); it('should generate strings if raw option is passed', function() { plaintext_test(X.read(html_bstr, {type:"binary", raw:true}), true); }); it('should handle "string" type', function() { plaintext_test(X.read(html_str, {type:"string"}), false); }); @@ -2071,6 +2081,7 @@ describe('HTML', function() { }); if(domtest) it('should honor sheetRows', function() { var html = X.utils.sheet_to_html(X.utils.aoa_to_sheet([[1,2],[3,4],[5,6]])); + html = /]*>([\s\S]*)<\/body>/i.exec(html)[1]; var ws = X.utils.table_to_sheet(get_dom_element(html)); assert.equal(ws['!ref'], "A1:B3"); ws = X.utils.table_to_sheet(get_dom_element(html), {sheetRows:1}); @@ -2080,6 +2091,24 @@ describe('HTML', function() { assert.equal(ws['!ref'], "A1:B2"); assert.equal(ws['!fullref'], "A1:B3"); }); + if(domtest) it('should hide hidden rows', function() { + var html = "
Foo
Bar
"; + var ws = X.utils.table_to_sheet(get_dom_element(html)); + var expected_rows = []; + expected_rows[0] = expected_rows[2] = {hidden: true}; + assert.equal(ws['!ref'], "A1:A3"); + assert.deepEqual(ws['!rows'], expected_rows); + assert.equal(get_cell(ws, "A1").v, "Foo"); + assert.equal(get_cell(ws, "A2").v, "Bar"); + assert.equal(get_cell(ws, "A3").v, "Baz"); + }); + if(domtest) it('should ignore hidden rows and cells when the `display` option is on', function() { + var html = "
123
BarBaz
"; + var ws = X.utils.table_to_sheet(get_dom_element(html), {display: true}); + assert.equal(ws['!ref'], "A1"); + assert.equal(ws.hasOwnProperty('!rows'), false); + assert.equal(get_cell(ws, "A1").v, "Bar"); + }); describe('type override', function() { function chk(ws) { assert.equal(get_cell(ws, "A1").t, "s"); diff --git a/tests/core.js b/tests/core.js index 5e4fbfa..7c2a200 100644 --- a/tests/core.js +++ b/tests/core.js @@ -2025,19 +2025,29 @@ if(fs.existsSync(dir + 'dbf/d11.dbf')) describe('dbf', function() { var JSDOM = null; // $FlowIgnore var domtest = browser || (function(){try{return !!(JSDOM=require('jsdom').JSDOM);}catch(e){return 0;}})(); +var inserted_dom_elements = []; function get_dom_element(html) { if(browser) { var domelt = document.createElement('div'); domelt.innerHTML = html; - return domelt; + document.body.appendChild(domelt); + inserted_dom_elements.push(domelt); + return domelt.children[0]; } if(!JSDOM) throw new Error("Browser test fail"); return new JSDOM(html).window.document.body.children[0]; } describe('HTML', function() { - describe('input string', function(){ + afterEach(function () { + // Remove the DOM elements inserted to the page by get_dom_element + inserted_dom_elements.forEach(function (element) { + if(element.parentNode) element.parentNode.removeChild(element); + }); + inserted_dom_elements = []; + }); + describe('input string', function() { it('should interpret values by default', function() { plaintext_test(X.read(html_bstr, {type:"binary"}), false); }); it('should generate strings if raw option is passed', function() { plaintext_test(X.read(html_bstr, {type:"binary", raw:true}), true); }); it('should handle "string" type', function() { plaintext_test(X.read(html_str, {type:"string"}), false); }); @@ -2071,6 +2081,7 @@ describe('HTML', function() { }); if(domtest) it('should honor sheetRows', function() { var html = X.utils.sheet_to_html(X.utils.aoa_to_sheet([[1,2],[3,4],[5,6]])); + html = /]*>([\s\S]*)<\/body>/i.exec(html)[1]; var ws = X.utils.table_to_sheet(get_dom_element(html)); assert.equal(ws['!ref'], "A1:B3"); ws = X.utils.table_to_sheet(get_dom_element(html), {sheetRows:1}); @@ -2080,6 +2091,24 @@ describe('HTML', function() { assert.equal(ws['!ref'], "A1:B2"); assert.equal(ws['!fullref'], "A1:B3"); }); + if(domtest) it('should hide hidden rows', function() { + var html = "
Foo
Bar
"; + var ws = X.utils.table_to_sheet(get_dom_element(html)); + var expected_rows = []; + expected_rows[0] = expected_rows[2] = {hidden: true}; + assert.equal(ws['!ref'], "A1:A3"); + assert.deepEqual(ws['!rows'], expected_rows); + assert.equal(get_cell(ws, "A1").v, "Foo"); + assert.equal(get_cell(ws, "A2").v, "Bar"); + assert.equal(get_cell(ws, "A3").v, "Baz"); + }); + if(domtest) it('should ignore hidden rows and cells when the `display` option is on', function() { + var html = "
123
BarBaz
"; + var ws = X.utils.table_to_sheet(get_dom_element(html), {display: true}); + assert.equal(ws['!ref'], "A1"); + assert.equal(ws.hasOwnProperty('!rows'), false); + assert.equal(get_cell(ws, "A1").v, "Bar"); + }); describe('type override', function() { function chk(ws) { assert.equal(get_cell(ws, "A1").t, "s"); diff --git a/types/index.d.ts b/types/index.d.ts index d81eb56..d09a1ca 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -663,6 +663,9 @@ export interface Table2SheetOpts extends CommonOptions, DateNFOption { * @default 0 */ sheetRows?: number; + + /** If true, hidden rows and cells will not be parsed */ + display?: boolean; } /** General utilities */ diff --git a/xlsx.flow.js b/xlsx.flow.js index b9bb8a9..8cb49cc 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -18281,15 +18281,22 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { if(DENSE != null) opts.dense = DENSE; var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); var rows/*:HTMLCollection*/ = table.getElementsByTagName('tr'); - var sheetRows = Math.min(opts.sheetRows||10000000, rows.length); - var range/*:Range*/ = {s:{r:0,c:0},e:{r:sheetRows - 1,c:0}}; + var sheetRows = opts.sheetRows || 10000000; + var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}}; var merges/*:Array*/ = [], midx = 0; - var R = 0, _C = 0, C = 0, RS = 0, CS = 0; - for(; R < sheetRows; ++R) { - var row/*:HTMLTableRowElement*/ = rows[R]; + var rowinfo/*:Array*/ = []; + var _R = 0, R = 0, _C, C, RS, CS; + for(; _R < rows.length && R < sheetRows; ++_R) { + var row/*:HTMLTableRowElement*/ = rows[_R]; + if (is_dom_element_hidden(row)) { + if (opts.display) continue; + rowinfo[R] = {hidden: true}; + } var elts/*:HTMLCollection*/ = (row.children/*:any*/); for(_C = C = 0; _C < elts.length; ++_C) { - var elt/*:HTMLTableCellElement*/ = elts[_C], v = htmldecode(elts[_C].innerHTML); + var elt/*:HTMLTableCellElement*/ = elts[_C]; + if (opts.display && is_dom_element_hidden(elt)) continue; + var v/*:string*/ = htmldecode(elt.innerHTML); for(midx = 0; midx < merges.length; ++midx) { var m/*:Range*/ = merges[midx]; if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } @@ -18316,16 +18323,36 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { if(range.e.c < C) range.e.c = C; C += CS; } + ++R; } if(merges.length) ws['!merges'] = merges; + if(rowinfo.length) ws['!rows'] = rowinfo; + range.e.r = R - 1; ws['!ref'] = encode_range(range); - if(sheetRows < rows.length) ws['!fullref'] = encode_range((range.e.r = rows.length-1,range)); + if(R >= sheetRows) ws['!fullref'] = encode_range((range.e.r = rows.length-_R+R-1,range)); // We can count the real number of rows to parse but we don't to improve the performance return ws; } function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ { return sheet_to_workbook(parse_dom_table(table, opts), opts); } + +function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ { + var display/*:string*/ = ''; + var get_computed_style/*:?function*/ = get_get_computed_style_function(element); + if(get_computed_style) display = get_computed_style(element).getPropertyValue('display'); + if(!display) display = element.style.display; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document) + return display === 'none'; +} + +/* global getComputedStyle */ +function get_get_computed_style_function(element/*:HTMLElement*/)/*:?function*/ { + // The proper getComputedStyle implementation is the one defined in the element window + if(element.ownerDocument.defaultView && typeof element.ownerDocument.defaultView.getComputedStyle === 'function') return element.ownerDocument.defaultView.getComputedStyle; + // If it is not available, try to get one from the global namespace + if(typeof getComputedStyle === 'function') return getComputedStyle; + return null; +} /* OpenDocument */ var parse_content_xml = (function() { diff --git a/xlsx.js b/xlsx.js index 49a705c..e69797d 100644 --- a/xlsx.js +++ b/xlsx.js @@ -18168,15 +18168,22 @@ function parse_dom_table(table, _opts) { if(DENSE != null) opts.dense = DENSE; var ws = opts.dense ? ([]) : ({}); var rows = table.getElementsByTagName('tr'); - var sheetRows = Math.min(opts.sheetRows||10000000, rows.length); - var range = {s:{r:0,c:0},e:{r:sheetRows - 1,c:0}}; + var sheetRows = opts.sheetRows || 10000000; + var range = {s:{r:0,c:0},e:{r:0,c:0}}; var merges = [], midx = 0; - var R = 0, _C = 0, C = 0, RS = 0, CS = 0; - for(; R < sheetRows; ++R) { - var row = rows[R]; + var rowinfo = []; + var _R = 0, R = 0, _C, C, RS, CS; + for(; _R < rows.length && R < sheetRows; ++_R) { + var row = rows[_R]; + if (is_dom_element_hidden(row)) { + if (opts.display) continue; + rowinfo[R] = {hidden: true}; + } var elts = (row.children); for(_C = C = 0; _C < elts.length; ++_C) { - var elt = elts[_C], v = htmldecode(elts[_C].innerHTML); + var elt = elts[_C]; + if (opts.display && is_dom_element_hidden(elt)) continue; + var v = htmldecode(elt.innerHTML); for(midx = 0; midx < merges.length; ++midx) { var m = merges[midx]; if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } @@ -18203,16 +18210,36 @@ function parse_dom_table(table, _opts) { if(range.e.c < C) range.e.c = C; C += CS; } + ++R; } if(merges.length) ws['!merges'] = merges; + if(rowinfo.length) ws['!rows'] = rowinfo; + range.e.r = R - 1; ws['!ref'] = encode_range(range); - if(sheetRows < rows.length) ws['!fullref'] = encode_range((range.e.r = rows.length-1,range)); + if(R >= sheetRows) ws['!fullref'] = encode_range((range.e.r = rows.length-_R+R-1,range)); // We can count the real number of rows to parse but we don't to improve the performance return ws; } function table_to_book(table, opts) { return sheet_to_workbook(parse_dom_table(table, opts), opts); } + +function is_dom_element_hidden(element) { + var display = ''; + var get_computed_style = get_get_computed_style_function(element); + if(get_computed_style) display = get_computed_style(element).getPropertyValue('display'); + if(!display) display = element.style.display; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document) + return display === 'none'; +} + +/* global getComputedStyle */ +function get_get_computed_style_function(element) { + // The proper getComputedStyle implementation is the one defined in the element window + if(element.ownerDocument.defaultView && typeof element.ownerDocument.defaultView.getComputedStyle === 'function') return element.ownerDocument.defaultView.getComputedStyle; + // If it is not available, try to get one from the global namespace + if(typeof getComputedStyle === 'function') return getComputedStyle; + return null; +} /* OpenDocument */ var parse_content_xml = (function() {