newline normalization

This commit is contained in:
SheetJS 2022-04-20 13:31:11 -04:00
parent e6b6f382c0
commit 8124fcbae0
11 changed files with 72 additions and 33 deletions

View File

@ -48,11 +48,15 @@ var rencoding = /*#__PURE__*/evert(encodings);
var unescapexml/*:StringConv*/ = /*#__PURE__*/(function() {
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/ig, coderegex = /_x([\da-fA-F]{4})_/ig;
return function unescapexml(text/*:string*/)/*:string*/ {
function raw_unescapexml(text/*:string*/)/*:string*/ {
var s = text + '', i = s.indexOf("<![CDATA[");
if(i == -1) return s.replace(encregex, function($$, $1) { return encodings[$$]||String.fromCharCode(parseInt($1,$$.indexOf("x")>-1?16:10))||$$; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
var j = s.indexOf("]]>");
return unescapexml(s.slice(0, i)) + s.slice(i+9,j) + unescapexml(s.slice(j+3));
return raw_unescapexml(s.slice(0, i)) + s.slice(i+9,j) + raw_unescapexml(s.slice(j+3));
}
return function unescapexml(text/*:string*/, xlsx/*:boolean*/) {
var out = raw_unescapexml(text);
return xlsx ? out.replace(/\r\n/g, "\n") : out;
};
})();

View File

@ -185,6 +185,7 @@ function parse_PropertySet(blob, PIDSI) {
if(fail) throw new Error("Read Error: Expected address " + Props[i][1] + ' at ' + blob.l + ' :' + i);
}
if(PIDSI) {
if(Props[i][0] == 0 && Props.length > i+1 && Props[i][1] == Props[i+1][1]) continue; // R9
var piddsi = PIDSI[Props[i][0]];
PropH[piddsi.n] = parse_TypedPropertyValue(blob, piddsi.t, {raw:true});
if(piddsi.p === 'version') PropH[piddsi.n] = String(PropH[piddsi.n] >> 16) + "." + ("0000" + String(PropH[piddsi.n] & 0xFFFF)).slice(-4);

View File

@ -905,10 +905,9 @@ var PRN = /*#__PURE__*/(function() {
else sep = guess_sep(str.slice(0,1024));
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0, startcc=str.charCodeAt(0);
str = str.replace(/\r\n/mg, "\n");
var _re/*:?RegExp*/ = o.dateNF != null ? dateNF_regex(o.dateNF) : null;
function finish_cell() {
var s = str.slice(start, end);
var s = str.slice(start, end); if(s.slice(-1) == "\r") s = s.slice(0, -1);
var cell = ({}/*:any*/);
if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
if(s.length === 0) cell.t = 'z';
@ -943,7 +942,11 @@ var PRN = /*#__PURE__*/(function() {
}
outer: for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) {
case 0x22: if(startcc === 0x22) instr = !instr; break;
case sepcc: case 0x0a: case 0x0d: if(!instr && finish_cell()) break outer; break;
case 0x0d:
if(instr) break;
if(str.charCodeAt(end+1) == 0x0a) ++end;
/* falls through */
case sepcc: case 0x0a: if(!instr && finish_cell()) break outer; break;
default: break;
}
if(end - start > 0) finish_cell();

View File

@ -47,12 +47,17 @@ var WK_ = /*#__PURE__*/(function() {
o.vers = val;
if(val >= 0x1000) o.qpro = true;
break;
case 0xFF: /* BOF (works 3+) */
o.vers = val;
o.works = true;
break;
case 0x06: refguess = val; break; /* RANGE */
case 0xCC: if(val) next_n = val; break; /* SHEETNAMECS */
case 0xDE: next_n = val; break; /* SHEETNAMELP */
case 0x0F: /* LABEL */
case 0x33: /* STRING */
if(!o.qpro) val[1].v = val[1].v.slice(1);
if((!o.qpro && !o.works || RT == 0x33) && val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1);
if(o.works || o.works2) val[1].v = val[1].v.replace(/\r\n/g, "\n");
/* falls through */
case 0x0D: /* INTEGER */
case 0x0E: /* NUMBER */
@ -86,6 +91,7 @@ var WK_ = /*#__PURE__*/(function() {
s[val[0].r][val[0].c] = val[1];
} else s[encode_cell(val[0])] = val[1];
break;
case 0x5405: o.works2 = true; break;
default:
}}, o);
} else if(d[2] == 0x1A || d[2] == 0x0E) {
@ -94,7 +100,9 @@ var WK_ = /*#__PURE__*/(function() {
lotushopper(d, function(val, R, RT) { switch(RT) {
case 0xCC: n = val; break; /* SHEETNAMECS */
case 0x16: /* LABEL16 */
val[1].v = val[1].v.slice(1);
if(val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1);
// TODO: R9 appears to encode control codes this way -- verify against other versions
val[1].v = val[1].v.replace(/\x0F./g, function($$) { return String.fromCharCode($$.charCodeAt(1) - 0x20); }).replace(/\r\n/g, "\n");
/* falls through */
case 0x17: /* NUMBER17 */
case 0x18: /* NUMBER18 */
@ -289,6 +297,9 @@ var WK_ = /*#__PURE__*/(function() {
o[3] = blob.read_shift(1);
o[0].r = blob.read_shift(2);
blob.l+=2;
} else if(opts.works) { // TODO: verify with more complex works3-4 examples
o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2);
o[2] = blob.read_shift(2);
} else {
o[2] = blob.read_shift(1);
o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2);
@ -324,6 +335,18 @@ var WK_ = /*#__PURE__*/(function() {
o.write_shift(1, 0);
return o;
}
function parse_STRING(blob, length, opts) {
var tgt = blob.l + length;
var o = parse_cell(blob, length, opts);
o[1].t = 's';
if(opts.vers == 0x5120) {
var len = blob.read_shift(1);
o[1].v = blob.read_shift(len, 'utf8');
return o;
}
o[1].v = blob.read_shift(tgt - blob.l, 'cstr');
return o;
}
function parse_INTEGER(blob, length, opts) {
var o = parse_cell(blob, length, opts);
@ -382,6 +405,7 @@ var WK_ = /*#__PURE__*/(function() {
0x33: ["FALSE", 0],
0x34: ["TRUE", 0],
0x46: ["LEN", 1],
0x4A: ["CHAR", 1],
0x50: ["SUM", 69],
0x51: ["AVERAGEA", 69],
0x52: ["COUNTA", 69],
@ -572,8 +596,8 @@ var WK_ = /*#__PURE__*/(function() {
}
function parse_FORMULA_28(blob, length) {
var o = parse_NUMBER_27(blob, 14);
blob.l += length - 10; /* TODO: formula */
var o = parse_NUMBER_27(blob, 12);
blob.l += length - 12; /* TODO: formula */
return o;
}
@ -663,7 +687,7 @@ var WK_ = /*#__PURE__*/(function() {
/*::[*/0x0030/*::]*/: { n:"UNFORMATTED" },
/*::[*/0x0031/*::]*/: { n:"CURSORW12" },
/*::[*/0x0032/*::]*/: { n:"WINDOW" },
/*::[*/0x0033/*::]*/: { n:"STRING", f:parse_LABEL },
/*::[*/0x0033/*::]*/: { n:"STRING", f:parse_STRING },
/*::[*/0x0037/*::]*/: { n:"PASSWORD" },
/*::[*/0x0038/*::]*/: { n:"LOCKED" },
/*::[*/0x003C/*::]*/: { n:"QUERY" },
@ -687,6 +711,7 @@ var WK_ = /*#__PURE__*/(function() {
/*::[*/0x0069/*::]*/: { n:"MRANGES??" },
/*::[*/0x00CC/*::]*/: { n:"SHEETNAMECS", f:parse_SHEETNAMECS },
/*::[*/0x00DE/*::]*/: { n:"SHEETNAMELP", f:parse_SHEETNAMELP },
/*::[*/0x00FF/*::]*/: { n:"BOF", f:parseuint16 },
/*::[*/0xFFFF/*::]*/: { n:"" }
};

View File

@ -178,14 +178,14 @@ function parse_si(x, opts) {
/* 18.4.12 t ST_Xstring (Plaintext String) */
// TODO: is whitespace actually valid here?
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""));
z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""), true);
z.r = utf8read(x);
if(html) z.h = escapehtml(z.t);
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((/*y = */x.match(sirregex))) {
z.r = utf8read(x);
z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")));
z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")), true);
if(html) z.h = rs_to_html(parse_rs(z.r));
}
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */

View File

@ -14,27 +14,33 @@ var RTF = /*#__PURE__*/(function() {
var o = opts || {};
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var rows = str.match(/\\trowd.*?\\row\b/g);
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\\w+\b/g;
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data[0] == " ") data = data.slice(1);
++C;
if(data.length) {
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: data, t:"s"};
var cell = {v: payload.join(""), t:"s"};
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
@ -60,7 +66,7 @@ var RTF = /*#__PURE__*/(function() {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)));
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");

View File

@ -388,7 +388,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th
if(opts.cellFormula) {
if((cref=d.match(match_f))!= null && /*::cref != null && */cref[1] !== '') {
/* TODO: match against XLSXFutureFunctions */
p.f=unescapexml(utf8read(cref[1])).replace(/\r\n/g, "\n");
p.f=unescapexml(utf8read(cref[1]), true);
if(!opts.xlfn) p.f = _xlfn(p.f);
if(/*::cref != null && cref[0] != null && */cref[0].indexOf('t="array"') > -1) {
p.F = (d.match(refregex)||[])[1];
@ -442,7 +442,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th
break;
case 'str':
p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : '';
p.v = (p.v!=null) ? unescapexml(utf8read(p.v), true) : '';
if(opts.cellHTML) p.h = escapehtml(p.v);
break;
case 'inlineStr':

View File

@ -803,6 +803,8 @@ function parse_ws_bin(data, _opts, idx, rels, wb/*:WBWBProps*/, themes, styles)/
/* TODO: something useful -- this is a stub */
function write_ws_bin_cell(ba/*:BufArray*/, cell/*:Cell*/, R/*:number*/, C/*:number*/, opts, ws/*:Worksheet*/, last_seen/*:boolean*/)/*:boolean*/ {
var o/*:any*/ = ({r:R, c:C}/*:any*/);
if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]);
if(cell.v === undefined) return false;
var vv = "";
switch(cell.t) {
@ -816,11 +818,9 @@ function write_ws_bin_cell(ba/*:BufArray*/, cell/*:Cell*/, R/*:number*/, C/*:num
case 'n': case 'e': vv = ''+cell.v; break;
default: vv = cell.v; break;
}
var o/*:any*/ = ({r:R, c:C}/*:any*/);
/* TODO: cell style */
o.s = get_cell_style(opts.cellXfs, cell, opts);
if(cell.l) ws['!links'].push([encode_cell(o), cell.l]);
if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]);
switch(cell.t) {
case 's': case 'str':
if(opts.bookSST) {

View File

@ -13,8 +13,8 @@ function write_zip_xlsb(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ {
opts.Strings = /*::((*/[]/*:: :any):SST)*/; opts.Strings.Count = 0; opts.Strings.Unique = 0;
if(browser_has_Map) opts.revStrings = new Map();
else { opts.revStrings = {}; opts.revStrings.foo = []; delete opts.revStrings.foo; }
var wbext = opts.bookType == "xlsb" ? "bin" : "xml";
var vbafmt = VBAFMTS.indexOf(opts.bookType) > -1;
var wbext = "bin";
var vbafmt = true;
var ct = new_ct();
fix_write_opts(opts = opts || {});
var zip = zip_new();
@ -214,10 +214,10 @@ function write_zip_xlsx(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ {
carr[1].forEach(function(c) { if(c.T == true) needtc = true; });
});
if(needtc) {
cf = "xl/threadedComments/threadedComment" + rId + "." + wbext;
cf = "xl/threadedComments/threadedComment" + rId + ".xml";
zip_add_file(zip, cf, write_tcmnt_xml(comments, people, opts));
ct.threadedcomments.push(cf);
add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + "." + wbext, RELS.TCMNT);
add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + ".xml", RELS.TCMNT);
}
cf = "xl/comments" + rId + "." + wbext;

View File

@ -192,7 +192,7 @@ var do_file = (function() {
})();
(function() {
var dropZone = document.getElementById('drop-zone')
var dropZone = document.getElementById('drop-zone');
if(!dropZone.addEventListener && !window.addEventListener) return;
function handleDrop(e) {
@ -217,15 +217,15 @@ var do_file = (function() {
dropZone.style.zIndex = zIndex;
}
window.addEventListener('drop' , handleDrop)
window.addEventListener('dragover' , handleDragover)
window.addEventListener('drop' , handleDrop);
window.addEventListener('dragover' , handleDragover);
window.addEventListener('dragenter' , function(e){
dropZoneDisplay(e, true);
})
});
dropZone.addEventListener('dragleave' , function(e){
dropZoneDisplay(e, false);
})
});
})();
(function() {

@ -1 +1 @@
Subproject commit 1935dfc8b9edf59b00a3dc031ddfefa8add40179
Subproject commit 57645de9ec3abd7c5ffd94d2eeb26c3a1074e507