1
forked from sheetjs/sheetjs

XLSX Proper HTML entity escaping for .h

This commit is contained in:
Jonas Berlin 2017-04-13 21:28:16 +03:00 committed by SheetJS
parent b3ace1e5a9
commit 616d2e534f
6 changed files with 40 additions and 6 deletions

@ -54,6 +54,11 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{
} }
function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); } function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); }
function escapehtml(text){
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; });
}
/* TODO: handle codepages */ /* TODO: handle codepages */
var xlml_fixstr/*:StringConv*/ = (function() { var xlml_fixstr/*:StringConv*/ = (function() {
var entregex = /&#(\d+);/g; var entregex = /&#(\d+);/g;

@ -152,7 +152,7 @@ function parse_si(x, opts) {
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0])); z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]));
z.r = utf8read(x); z.r = utf8read(x);
if(html) z.h = z.t; if(html) z.h = escapehtml(z.t);
} }
/* 18.4.4 r CT_RElt (Rich Text Run) */ /* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(sirregex))) { else if((y = x.match(sirregex))) {

@ -318,7 +318,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
case 'str': case 'str':
p.t = "s"; p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : ''; p.v = (p.v!=null) ? utf8read(p.v) : '';
if(opts.cellHTML) p.h = p.v; if(opts.cellHTML) p.h = escapehtml(p.v);
break; break;
case 'inlineStr': case 'inlineStr':
cref = d.match(isregex); cref = d.match(isregex);

19
test.js

@ -956,6 +956,25 @@ describe('parse features', function() {
}); }); }); });
}); });
describe('HTML', function() {
var ws, wb;
var bef = (function() {
ws = X.utils.aoa_to_sheet([
["a","b","c"],
["&","<",">"]
]);
wb = {SheetNames:["Sheet1"],Sheets:{Sheet1:ws}};
});
if(typeof before != 'undefined') before(bef);
else it('before', bef);
['xlsx'].forEach(function(m) { it(m, function() {
var wb2 = X.read(X.write(wb, {bookType:m, type:"binary"}),{type:"binary", cellHTML:true});
assert.equal(get_cell(wb2.Sheets.Sheet1, "A2").h, "&amp;");
assert.equal(get_cell(wb2.Sheets.Sheet1, "B2").h, "&lt;");
assert.equal(get_cell(wb2.Sheets.Sheet1, "C2").h, "&gt;");
}); });
});
describe('page margins', function() { describe('page margins', function() {
function check_margin(margins, exp) { function check_margin(margins, exp) {
assert.equal(margins.left, exp[0]); assert.equal(margins.left, exp[0]);

@ -1605,6 +1605,11 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{
} }
function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); } function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); }
function escapehtml(text){
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; });
}
/* TODO: handle codepages */ /* TODO: handle codepages */
var xlml_fixstr/*:StringConv*/ = (function() { var xlml_fixstr/*:StringConv*/ = (function() {
var entregex = /&#(\d+);/g; var entregex = /&#(\d+);/g;
@ -5774,7 +5779,7 @@ function parse_si(x, opts) {
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0])); z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]));
z.r = utf8read(x); z.r = utf8read(x);
if(html) z.h = z.t; if(html) z.h = escapehtml(z.t);
} }
/* 18.4.4 r CT_RElt (Rich Text Run) */ /* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(sirregex))) { else if((y = x.match(sirregex))) {
@ -10190,7 +10195,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
case 'str': case 'str':
p.t = "s"; p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : ''; p.v = (p.v!=null) ? utf8read(p.v) : '';
if(opts.cellHTML) p.h = p.v; if(opts.cellHTML) p.h = escapehtml(p.v);
break; break;
case 'inlineStr': case 'inlineStr':
cref = d.match(isregex); cref = d.match(isregex);

@ -1554,6 +1554,11 @@ function escapexml(text, xml){
} }
function escapexmltag(text){ return escapexml(text).replace(/ /g,"_x0020_"); } function escapexmltag(text){ return escapexml(text).replace(/ /g,"_x0020_"); }
function escapehtml(text){
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; });
}
/* TODO: handle codepages */ /* TODO: handle codepages */
var xlml_fixstr = (function() { var xlml_fixstr = (function() {
var entregex = /&#(\d+);/g; var entregex = /&#(\d+);/g;
@ -5718,7 +5723,7 @@ function parse_si(x, opts) {
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0])); z.t = utf8read(unescapexml(x.substr(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]));
z.r = utf8read(x); z.r = utf8read(x);
if(html) z.h = z.t; if(html) z.h = escapehtml(z.t);
} }
/* 18.4.4 r CT_RElt (Rich Text Run) */ /* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(sirregex))) { else if((y = x.match(sirregex))) {
@ -10133,7 +10138,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
case 'str': case 'str':
p.t = "s"; p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : ''; p.v = (p.v!=null) ? utf8read(p.v) : '';
if(opts.cellHTML) p.h = p.v; if(opts.cellHTML) p.h = escapehtml(p.v);
break; break;
case 'inlineStr': case 'inlineStr':
cref = d.match(isregex); cref = d.match(isregex);