XLSX Proper HTML entity escaping for .h

This commit is contained in:
Jonas Berlin 2017-04-13 21:28:16 +03:00 committed by SheetJS
parent b3ace1e5a9
commit 616d2e534f
6 changed files with 40 additions and 6 deletions

@ -54,6 +54,11 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{
}
function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); }
function escapehtml(text){
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; });
}
/* TODO: handle codepages */
var xlml_fixstr/*:StringConv*/ = (function() {
var entregex = /&#(\d+);/g;

@ -152,7 +152,7 @@ function parse_si(x, opts) {
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]));
z.r = utf8read(x);
if(html) z.h = z.t;
if(html) z.h = escapehtml(z.t);
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(sirregex))) {

@ -318,7 +318,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
case 'str':
p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : '';
if(opts.cellHTML) p.h = p.v;
if(opts.cellHTML) p.h = escapehtml(p.v);
break;
case 'inlineStr':
cref = d.match(isregex);

19
test.js

@ -956,6 +956,25 @@ describe('parse features', function() {
}); });
});
describe('HTML', function() {
var ws, wb;
var bef = (function() {
ws = X.utils.aoa_to_sheet([
["a","b","c"],
["&","<",">"]
]);
wb = {SheetNames:["Sheet1"],Sheets:{Sheet1:ws}};
});
if(typeof before != 'undefined') before(bef);
else it('before', bef);
['xlsx'].forEach(function(m) { it(m, function() {
var wb2 = X.read(X.write(wb, {bookType:m, type:"binary"}),{type:"binary", cellHTML:true});
assert.equal(get_cell(wb2.Sheets.Sheet1, "A2").h, "&amp;");
assert.equal(get_cell(wb2.Sheets.Sheet1, "B2").h, "&lt;");
assert.equal(get_cell(wb2.Sheets.Sheet1, "C2").h, "&gt;");
}); });
});
describe('page margins', function() {
function check_margin(margins, exp) {
assert.equal(margins.left, exp[0]);

@ -1605,6 +1605,11 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{
}
function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); }
function escapehtml(text){
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; });
}
/* TODO: handle codepages */
var xlml_fixstr/*:StringConv*/ = (function() {
var entregex = /&#(\d+);/g;
@ -5774,7 +5779,7 @@ function parse_si(x, opts) {
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]));
z.r = utf8read(x);
if(html) z.h = z.t;
if(html) z.h = escapehtml(z.t);
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(sirregex))) {
@ -10190,7 +10195,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
case 'str':
p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : '';
if(opts.cellHTML) p.h = p.v;
if(opts.cellHTML) p.h = escapehtml(p.v);
break;
case 'inlineStr':
cref = d.match(isregex);

@ -1554,6 +1554,11 @@ function escapexml(text, xml){
}
function escapexmltag(text){ return escapexml(text).replace(/ /g,"_x0020_"); }
function escapehtml(text){
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; });
}
/* TODO: handle codepages */
var xlml_fixstr = (function() {
var entregex = /&#(\d+);/g;
@ -5718,7 +5723,7 @@ function parse_si(x, opts) {
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]));
z.r = utf8read(x);
if(html) z.h = z.t;
if(html) z.h = escapehtml(z.t);
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(sirregex))) {
@ -10133,7 +10138,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
case 'str':
p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : '';
if(opts.cellHTML) p.h = p.v;
if(opts.cellHTML) p.h = escapehtml(p.v);
break;
case 'inlineStr':
cref = d.match(isregex);