From 616d2e534fc8736040fbb4fbdeb5ff97a515aad4 Mon Sep 17 00:00:00 2001 From: Jonas Berlin Date: Thu, 13 Apr 2017 21:28:16 +0300 Subject: [PATCH] XLSX Proper HTML entity escaping for .h --- bits/22_xmlutils.js | 5 +++++ bits/42_sstxml.js | 2 +- bits/67_wsxml.js | 2 +- test.js | 19 +++++++++++++++++++ xlsx.flow.js | 9 +++++++-- xlsx.js | 9 +++++++-- 6 files changed, 40 insertions(+), 6 deletions(-) diff --git a/bits/22_xmlutils.js b/bits/22_xmlutils.js index 944faa8..201b0ee 100644 --- a/bits/22_xmlutils.js +++ b/bits/22_xmlutils.js @@ -54,6 +54,11 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{ } function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); } +function escapehtml(text){ + var s = text + ''; + return s.replace(decregex, function(y) { return rencoding[y]; }); +} + /* TODO: handle codepages */ var xlml_fixstr/*:StringConv*/ = (function() { var entregex = /&#(\d+);/g; diff --git a/bits/42_sstxml.js b/bits/42_sstxml.js index 5de5c4c..f3e2461 100644 --- a/bits/42_sstxml.js +++ b/bits/42_sstxml.js @@ -152,7 +152,7 @@ function parse_si(x, opts) { if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0])); z.r = utf8read(x); - if(html) z.h = z.t; + if(html) z.h = escapehtml(z.t); } /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { diff --git a/bits/67_wsxml.js b/bits/67_wsxml.js index e25d10b..c58a3a1 100644 --- a/bits/67_wsxml.js +++ b/bits/67_wsxml.js @@ -318,7 +318,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) { case 'str': p.t = "s"; p.v = (p.v!=null) ? utf8read(p.v) : ''; - if(opts.cellHTML) p.h = p.v; + if(opts.cellHTML) p.h = escapehtml(p.v); break; case 'inlineStr': cref = d.match(isregex); diff --git a/test.js b/test.js index 3302a6d..2b8e928 100644 --- a/test.js +++ b/test.js @@ -956,6 +956,25 @@ describe('parse features', function() { }); }); }); + describe('HTML', function() { + var ws, wb; + var bef = (function() { + ws = X.utils.aoa_to_sheet([ + ["a","b","c"], + ["&","<",">"] + ]); + wb = {SheetNames:["Sheet1"],Sheets:{Sheet1:ws}}; + }); + if(typeof before != 'undefined') before(bef); + else it('before', bef); + ['xlsx'].forEach(function(m) { it(m, function() { + var wb2 = X.read(X.write(wb, {bookType:m, type:"binary"}),{type:"binary", cellHTML:true}); + assert.equal(get_cell(wb2.Sheets.Sheet1, "A2").h, "&"); + assert.equal(get_cell(wb2.Sheets.Sheet1, "B2").h, "<"); + assert.equal(get_cell(wb2.Sheets.Sheet1, "C2").h, ">"); + }); }); + }); + describe('page margins', function() { function check_margin(margins, exp) { assert.equal(margins.left, exp[0]); diff --git a/xlsx.flow.js b/xlsx.flow.js index 8f5a4d7..a6ecf85 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -1605,6 +1605,11 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{ } function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); } +function escapehtml(text){ + var s = text + ''; + return s.replace(decregex, function(y) { return rencoding[y]; }); +} + /* TODO: handle codepages */ var xlml_fixstr/*:StringConv*/ = (function() { var entregex = /&#(\d+);/g; @@ -5774,7 +5779,7 @@ function parse_si(x, opts) { if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0])); z.r = utf8read(x); - if(html) z.h = z.t; + if(html) z.h = escapehtml(z.t); } /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { @@ -10190,7 +10195,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) { case 'str': p.t = "s"; p.v = (p.v!=null) ? utf8read(p.v) : ''; - if(opts.cellHTML) p.h = p.v; + if(opts.cellHTML) p.h = escapehtml(p.v); break; case 'inlineStr': cref = d.match(isregex); diff --git a/xlsx.js b/xlsx.js index 8519e87..e56cf7c 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1554,6 +1554,11 @@ function escapexml(text, xml){ } function escapexmltag(text){ return escapexml(text).replace(/ /g,"_x0020_"); } +function escapehtml(text){ + var s = text + ''; + return s.replace(decregex, function(y) { return rencoding[y]; }); +} + /* TODO: handle codepages */ var xlml_fixstr = (function() { var entregex = /&#(\d+);/g; @@ -5718,7 +5723,7 @@ function parse_si(x, opts) { if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0])); z.r = utf8read(x); - if(html) z.h = z.t; + if(html) z.h = escapehtml(z.t); } /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { @@ -10133,7 +10138,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) { case 'str': p.t = "s"; p.v = (p.v!=null) ? utf8read(p.v) : ''; - if(opts.cellHTML) p.h = p.v; + if(opts.cellHTML) p.h = escapehtml(p.v); break; case 'inlineStr': cref = d.match(isregex);