From 9368a85b5f6a4b4fd41f21413394c03ef9752978 Mon Sep 17 00:00:00 2001 From: SheetJS <dev@sheetjs.com> Date: Mon, 19 Aug 2024 12:43:37 -0400 Subject: [PATCH] XLSX encoded entities (fixes #3177) - HTML DOM ingress support formulae (`data-f`) - Sheet Visibility for ODS / FODS (fixes #3162) --- CHANGELOG.md | 4 ++++ bits/22_xmlutils.js | 2 +- bits/79_html.js | 3 +++ bits/80_parseods.js | 14 +++++++++++--- bits/81_writeods.js | 7 ++++++- package.json | 5 +---- test.js | 8 +++++--- 7 files changed, 31 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index c7c3612..350b601 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ This log is intended to keep track of backwards-incompatible changes, including but not limited to API changes and file location changes. Minor behavioral changes may not be included if they are not expected to break existing code. +* Sheet Visibility for ODS / FODS (h/t @edemaine) +* HTML DOM ingress support formulae (`data-f`) +* Proper handling of XLSX encoded entities (h/t @inreoh) + ## v0.20.3 * Correct parsing of NUMBERS and ODS merge cells (h/t @s-ashwin) diff --git a/bits/22_xmlutils.js b/bits/22_xmlutils.js index bf0d9ea..fb4e5ef 100644 --- a/bits/22_xmlutils.js +++ b/bits/22_xmlutils.js @@ -67,7 +67,7 @@ var rencoding = /*#__PURE__*/evert(encodings); // TODO: CP remap (need to read file version to determine OS) var unescapexml/*:StringConv*/ = /*#__PURE__*/(function() { /* 22.4.2.4 bstr (Basic String) */ - var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/ig, coderegex = /_x([\da-fA-F]{4})_/ig; + var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/ig, coderegex = /_x([\da-fA-F]{4})_/g; function raw_unescapexml(text/*:string*/)/*:string*/ { var s = text + '', i = s.indexOf("<![CDATA["); if(i == -1) return s.replace(encregex, function($$, $1) { return encodings[$$]||String.fromCharCode(parseInt($1,$$.indexOf("x")>-1?16:10))||$$; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));}); diff --git a/bits/79_html.js b/bits/79_html.js index b271a84..5143a68 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -92,6 +92,7 @@ function make_html_row(ws/*:Worksheet*/, r/*:Range*/, R/*:number*/, o/*:Sheet2HT // note: data-v is unaffected by the timezone interpretation if(cell.v != null) sp["data-v"] = escapehtml(cell.v instanceof Date ? cell.v.toISOString() : cell.v); if(cell.z != null) sp["data-z"] = cell.z; + if(cell.f != null) sp["data-f"] = escapehtml(cell.f); if(cell.l && (cell.l.Target || "#").charAt(0) != "#") w = '<a href="' + escapehtml(cell.l.Target) +'">' + w + '</a>'; } sp.id = (o.id || "sjs") + "-" + coord; @@ -179,6 +180,7 @@ function sheet_add_dom(ws/*:Worksheet*/, table/*:HTMLElement*/, _opts/*:?any*/)/ if (opts.display && is_dom_element_hidden(elt)) continue; var v/*:?string*/ = elt.hasAttribute('data-v') ? elt.getAttribute('data-v') : elt.hasAttribute('v') ? elt.getAttribute('v') : htmldecode(elt.innerHTML); var z/*:?string*/ = elt.getAttribute('data-z') || elt.getAttribute('z'); + var f/*:?string*/ = elt.hasAttribute('data-f') ? elt.getAttribute('data-f') : elt.hasAttribute('f') ? elt.getAttribute('f') : null; for(midx = 0; midx < merges.length; ++midx) { var m/*:Range*/ = merges[midx]; if(m.s.c == C + or_C && m.s.r < R + or_R && R + or_R <= m.e.r) { C = m.e.c+1 - or_C; midx = -1; } @@ -210,6 +212,7 @@ function sheet_add_dom(ws/*:Worksheet*/, table/*:HTMLElement*/, _opts/*:?any*/)/ l = Aelts[Aelti].getAttribute("href"); if(l.charAt(0) != "#") break; } if(l && l.charAt(0) != "#" && l.slice(0, 11).toLowerCase() != 'javascript:') o.l = ({ Target: l }); + if(f != null) o.f = f; if(dense) { if(!ws["!data"][R + or_R]) ws["!data"][R + or_R] = []; ws["!data"][R + or_R][C + or_C] = o; } else ws[encode_cell({c:C + or_C, r:R + or_R})] = o; if(range.e.c < C + or_C) range.e.c = C + or_C; diff --git a/bits/80_parseods.js b/bits/80_parseods.js index 769d88e..ff1718b 100644 --- a/bits/80_parseods.js +++ b/bits/80_parseods.js @@ -252,11 +252,11 @@ function parse_content_xml(d/*:string*/, _opts, _nfm)/*:Workbook*/ { var textR = [], oldtextR = []; var R = -1, C = -1, range = {s: {r:1000000,c:10000000}, e: {r:0, c:0}}; var row_ol = 0; - var number_format_map = _nfm || {}, styles = {}; + var number_format_map = _nfm || {}, styles = {}, tstyles = {}; var merges/*:Array<Range>*/ = [], mrange = {}, mR = 0, mC = 0; var rowinfo/*:Array<RowInfo>*/ = [], rowpeat = 1, colpeat = 1; var arrayf/*:Array<[Range, string]>*/ = []; - var WB = {Names:[], WBProps:{}}; + var WB = {Names:[], WBProps:{}, Sheets:[]}; var atag = ({}/*:any*/); var _Ref/*:[string, string]*/ = ["", ""]; var comments/*:Array<Comment>*/ = [], comment/*:Comment*/ = ({}/*:any*/); @@ -282,6 +282,10 @@ function parse_content_xml(d/*:string*/, _opts, _nfm)/*:Workbook*/ { if(typeof JSON !== 'undefined') JSON.stringify(sheetag); SheetNames.push(sheetag.name); Sheets[sheetag.name] = ws; + WB.Sheets.push({ + /* TODO: CodeName */ + Hidden: (tstyles[sheetag["style-name"]] && tstyles[sheetag["style-name"]]["display"] ? (parsexmlbool(tstyles[sheetag["style-name"]]["display"]) ? 0 : 1) : 0) + }); intable = false; } else if(Rn[0].charAt(Rn[0].length-2) !== '/') { @@ -529,12 +533,16 @@ function parse_content_xml(d/*:string*/, _opts, _nfm)/*:Workbook*/ { case 'style': { // 16.2 <style:style> var styletag = parsexmltag(Rn[0], false); if(styletag["family"] == "table-cell" && number_format_map[styletag["data-style-name"]]) styles[styletag["name"]] = number_format_map[styletag["data-style-name"]]; + else if(styletag["family"] == "table") tstyles[styletag["name"]] = styletag; } break; case 'map': break; // 16.3 <style:map> case 'font-face': break; // 16.21 <style:font-face> case 'paragraph-properties': break; // 17.6 <style:paragraph-properties> - case 'table-properties': break; // 17.15 <style:table-properties> + case 'table-properties': { // 17.15 <style:table-properties> + var proptag = parsexmltag(Rn[0], false); + if(styletag && styletag.family == "table") styletag.display = proptag.display; + } break; case 'table-column-properties': break; // 17.16 <style:table-column-properties> case 'table-row-properties': break; // 17.17 <style:table-row-properties> case 'table-cell-properties': break; // 17.18 <style:table-cell-properties> diff --git a/bits/81_writeods.js b/bits/81_writeods.js index 997f42f..d6386db 100644 --- a/bits/81_writeods.js +++ b/bits/81_writeods.js @@ -214,7 +214,9 @@ var write_content_ods/*:{(wb:any, opts:any):string}*/ = /* @__PURE__ */(function var write_ws = function(ws, wb/*:Workbook*/, i/*:number*/, opts, nfs, date1904)/*:string*/ { /* Section 9 Tables */ var o/*:Array<string>*/ = []; - o.push(' <table:table table:name="' + escapexml(wb.SheetNames[i]) + '" table:style-name="ta1">\n'); + var tstyle = "ta1"; + if(((((wb||{}).Workbook||{}).Sheets||[])[i]||{}).Hidden) tstyle = "ta2"; + o.push(' <table:table table:name="' + escapexml(wb.SheetNames[i]) + '" table:style-name="' + tstyle + '">\n'); var R=0,C=0, range = decode_range(ws['!ref']||"A1"); var marr/*:Array<Range>*/ = ws['!merges'] || [], mi = 0; var dense = ws["!data"] != null; @@ -362,6 +364,9 @@ var write_content_ods/*:{(wb:any, opts:any):string}*/ = /* @__PURE__ */(function o.push(' <style:style style:name="ta1" style:family="table" style:master-page-name="mp1">\n'); o.push(' <style:table-properties table:display="true" style:writing-mode="lr-tb"/>\n'); o.push(' </style:style>\n'); + o.push(' <style:style style:name="ta2" style:family="table" style:master-page-name="mp1">\n'); + o.push(' <style:table-properties table:display="false" style:writing-mode="lr-tb"/>\n'); + o.push(' </style:style>\n'); o.push(' <number:date-style style:name="N37" number:automatic-order="true">\n'); o.push(' <number:month number:style="long"/>\n'); diff --git a/package.json b/package.json index 3d78bd0..d84c746 100644 --- a/package.json +++ b/package.json @@ -142,10 +142,7 @@ "url": "https://git.sheetjs.com/SheetJS/sheetjs" }, "scripts": { - "pretest": "npm run lint", - "test": "npm run tests-only", - "pretest-only": "git submodule init && git submodule update", - "tests-only": "make travis", + "test": "make travis", "build": "make", "lint": "make fullint", "dtslint": "dtslint types" diff --git a/test.js b/test.js index baca005..0a3853b 100644 --- a/test.js +++ b/test.js @@ -1772,8 +1772,8 @@ describe('roundtrip features', function() { ['xlsx', paths.svxlsx], ['xlsb', paths.svxlsb], ['xls', paths.svxls], - ['biff5', paths.svxls5] - // ['ods', paths.svods] + ['biff5', paths.svxls5], + ['ods', paths.svods] ].forEach(function(w) { it(w[0], function() { var wb1 = X.read(fs.readFileSync(w[1]), {type:TYPE}); @@ -1783,7 +1783,9 @@ describe('roundtrip features', function() { assert.equal(wbs1.length, wbs2.length); for(var i = 0; i < wbs1.length; ++i) { assert.equal(wbs1[i].name, wbs2[i].name); - assert.equal(wbs1[i].Hidden, wbs2[i].Hidden); + /* NOTE: ODS does not support the equivalent of "Very Hidden" */ + if(w[0] != "ods") assert.equal(wbs1[i].Hidden, wbs2[i].Hidden); + else assert.equal(!!wbs1[i].Hidden, !!wbs2[i].Hidden); } }); });