From eafc07926bba158f679e660236f017455b32e761 Mon Sep 17 00:00:00 2001 From: ThomasChan Date: Thu, 19 Sep 2019 20:23:39 +0800 Subject: [PATCH] improve parse_dom_table merge cell logic performance --- xlsx.flow.js | 34 +++++++++++++++++++++------------- xlsx.js | 34 +++++++++++++++++++++------------- xlsx.mini.flow.js | 34 +++++++++++++++++++++------------- xlsx.mini.js | 34 +++++++++++++++++++++------------- 4 files changed, 84 insertions(+), 52 deletions(-) diff --git a/xlsx.flow.js b/xlsx.flow.js index f8bbb79..18e3d81 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -19160,29 +19160,37 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { var rows/*:HTMLCollection*/ = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges/*:Array*/ = [], midx = 0; + var merges/*:Array*/ = [], midx = 0, m, cInRange/*:Array*/ = [], cache/*:Object*/ = {}; var rowinfo/*:Array*/ = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row/*:HTMLTableRowElement*/ = rows[_R]; + row/*:HTMLTableRowElement*/ = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts/*:HTMLCollection*/ = (row.children/*:any*/); + elts/*:HTMLCollection*/ = (row.children/*:any*/); for(_C = C = 0; _C < elts.length; ++_C) { - var elt/*:HTMLTableCellElement*/ = elts[_C]; + elt/*:HTMLTableCellElement*/ = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v/*:string*/ = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m/*:Range*/ = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h/*:string*/ = elt.innerHTML; + v/*:string*/ = cache[h] || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx/*:number*/ = merges.length; + while (midx--) { + m/*:Range*/ = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o/*:Cell*/ = {t:'s', v:v}; - var _t/*:string*/ = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o/*:Cell*/ = {t:'s', v:v}; + _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){} diff --git a/xlsx.js b/xlsx.js index 5c4744e..004bf20 100644 --- a/xlsx.js +++ b/xlsx.js @@ -19044,29 +19044,37 @@ function parse_dom_table(table, _opts) { var rows = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges = [], midx = 0; + var merges = [], midx = 0, m, cInRange = [], cache = {}; var rowinfo = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row = rows[_R]; + row = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts = (row.children); + elts = (row.children); for(_C = C = 0; _C < elts.length; ++_C) { - var elt = elts[_C]; + elt = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h = elt.innerHTML; + v = h || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx = merges.length; + while (midx--) { + m = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o = {t:'s', v:v}; - var _t = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o = {t:'s', v:v}; + _t = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){} diff --git a/xlsx.mini.flow.js b/xlsx.mini.flow.js index 9fe0d0a..3fb78c6 100644 --- a/xlsx.mini.flow.js +++ b/xlsx.mini.flow.js @@ -7379,29 +7379,37 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { var rows/*:HTMLCollection*/ = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range/*:Range*/ = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges/*:Array*/ = [], midx = 0; + var merges/*:Array*/ = [], midx = 0, m, cInRange/*:Array*/ = [], cache/*:Object*/ = {}; var rowinfo/*:Array*/ = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row/*:HTMLTableRowElement*/ = rows[_R]; + row/*:HTMLTableRowElement*/ = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts/*:HTMLCollection*/ = (row.children/*:any*/); + elts/*:HTMLCollection*/ = (row.children/*:any*/); for(_C = C = 0; _C < elts.length; ++_C) { - var elt/*:HTMLTableCellElement*/ = elts[_C]; + elt/*:HTMLTableCellElement*/ = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v/*:string*/ = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m/*:Range*/ = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h/*:string*/ = elt.innerHTML; + v/*:string*/ = cache[h] || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx/*:number*/ = merges.length; + while (midx--) { + m/*:Range*/ = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o/*:Cell*/ = {t:'s', v:v}; - var _t/*:string*/ = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o/*:Cell*/ = {t:'s', v:v}; + _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){} diff --git a/xlsx.mini.js b/xlsx.mini.js index 021ffc2..18e0617 100644 --- a/xlsx.mini.js +++ b/xlsx.mini.js @@ -7288,29 +7288,37 @@ function parse_dom_table(table, _opts) { var rows = table.getElementsByTagName('tr'); var sheetRows = opts.sheetRows || 10000000; var range = {s:{r:0,c:0},e:{r:0,c:0}}; - var merges = [], midx = 0; + var merges = [], midx = 0, m, cInRange = [], cache = {}; var rowinfo = []; - var _R = 0, R = 0, _C, C, RS, CS; + var _R = 0, R = 0, _C, C, RS, CS, row, elts, elt, h, v, o, _t; for(; _R < rows.length && R < sheetRows; ++_R) { - var row = rows[_R]; + row = rows[_R]; if (is_dom_element_hidden(row)) { if (opts.display) continue; rowinfo[R] = {hidden: true}; } - var elts = (row.children); + elts = (row.children); for(_C = C = 0; _C < elts.length; ++_C) { - var elt = elts[_C]; + elt = elts[_C]; if (opts.display && is_dom_element_hidden(elt)) continue; - var v = htmldecode(elt.innerHTML); - for(midx = 0; midx < merges.length; ++midx) { - var m = merges[midx]; - if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; } + h = elt.innerHTML; + v = h || (cache[h] = htmldecode(elt.innerHTML)); + cInRange.length = 0; + midx = merges.length; + while (midx--) { + m = merges[midx]; + if(m.s.r <= R && R <= m.e.r) { + cInRange.push(m.e.c); + } + } + if (cInRange.indexOf(C) !== -1) { + C = Math.max.apply(null, cInRange) + 1; } /* TODO: figure out how to extract nonstandard mso- style */ - CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); - var o = {t:'s', v:v}; - var _t = elt.getAttribute("t") || ""; + CS = +elt.colSpan || 1; + if((RS = +elt.rowSpan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + o = {t:'s', v:v}; + _t = elt.getAttribute("t") || ""; if(v != null) { if(v.length == 0) o.t = _t || 'z'; else if(opts.raw || v.trim().length == 0 || _t == "s"){}