From 694cdcb75a77fc677dffcc9c84c4124ef4357e48 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Mon, 25 Apr 2022 05:02:14 -0400 Subject: [PATCH] QPW and newline tests --- README.md | 3 +- bits/41_lotus.js | 139 +++++++++++++++++++++++ docbits/20_import.md | 4 +- docbits/85_filetype.md | 1 + misc/docs/README.md | 7 +- test.js | 112 +++++++++++++++++++ test_files | 2 +- tests/core.js | 112 +++++++++++++++++++ tests/fixtures.lst | 35 ++++++ xlsx.flow.js | 248 +++++++++++++++++++++++++++++++++++------ xlsx.js | 248 +++++++++++++++++++++++++++++++++++------ 11 files changed, 836 insertions(+), 75 deletions(-) diff --git a/README.md b/README.md index 2aae214..901a9b4 100644 --- a/README.md +++ b/README.md @@ -1085,7 +1085,7 @@ async function process_RS(stream) { const data = await process_RS(stream); /* data is Uint8Array */ -const workbook = XLSX.read(data, {type: "array"}); +const workbook = XLSX.read(data, {type: 'array'}); ``` @@ -4092,6 +4092,7 @@ range limits will be silently truncated: | Excel 2007+ XML Formats (XLSX/XLSM) | XFD1048576 | 16384 | 1048576 | | Excel 2007+ Binary Format (XLSB BIFF12) | XFD1048576 | 16384 | 1048576 | | Numbers 12.0 (NUMBERS) | ALL1000000 | 1000 | 1000000 | +| Quattro Pro 9+ (QPW) | IV1000000 | 256 | 1000000 | | Excel 97-2004 (XLS BIFF8) | IV65536 | 256 | 65536 | | Excel 5.0/95 (XLS BIFF5) | IV16384 | 256 | 16384 | | Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 | diff --git a/bits/41_lotus.js b/bits/41_lotus.js index 4b58eb7..8581243 100644 --- a/bits/41_lotus.js +++ b/bits/41_lotus.js @@ -34,6 +34,7 @@ var WK_ = /*#__PURE__*/(function() { var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; var sheetRows = o.sheetRows || 0; + if(d[4] == 0x51 && d[5] == 0x50 && d[6] == 0x57) return qpw_to_workbook_buf(d, opts); if(d[2] == 0x00) { if(d[3] == 0x08 || d[3] == 0x09) { if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file"); @@ -840,6 +841,144 @@ var WK_ = /*#__PURE__*/(function() { /*::[*/0x6F44/*::]*/: { n:"??" }, /*::[*/0xFFFF/*::]*/: { n:"" } }; + + /* QPW uses a different set of record types */ + function qpw_to_workbook_buf(d, opts)/*:Workbook*/ { + prep_blob(d, 0); + var o = opts || {}; + if(DENSE != null && o.dense == null) o.dense = DENSE; + var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/); + var SST = [], sname = "", formulae = []; + var range = {s:{r:-1,c:-1}, e:{r:-1,c:-1}}; + var cnt = 0, type = 0, C = 0, R = 0; + var wb = { SheetNames: [], Sheets: {} }; + outer: while(d.l < d.length) { + var RT = d.read_shift(2), length = d.read_shift(2); + var p = d.slice(d.l, d.l + length); + prep_blob(p, 0); + switch(RT) { + case 0x01: /* BOF */ + if(p.read_shift(4) != 0x39575051) throw "Bad QPW9 BOF!"; + break; + case 0x02: /* EOF */ break outer; + + /* TODO: The behavior here should be consistent with Numbers: QP Notebook ~ .TN.SheetArchive, QP Sheet ~ .TST.TSTable */ + case 0x0401: /* BON */ break; + case 0x0402: /* EON */ /* TODO: backfill missing sheets based on BON cnt */ break; + + case 0x0407: { /* SST */ + p.l += 12; + while(p.l < p.length) { + cnt = p.read_shift(2); + type = p.read_shift(1); + SST.push(p.read_shift(cnt, 'cstr')); + } + } break; + case 0x0408: { /* FORMULAE */ + //p.l += 12; + //while(p.l < p.length) { + // cnt = p.read_shift(2); + // formulae.push(p.slice(p.l, p.l + cnt + 1)); p.l += cnt + 1; + //} + } break; + + case 0x0601: { /* BOS */ + var sidx = p.read_shift(2); + s = ((o.dense ? [] : {})/*:any*/); + range.s.c = p.read_shift(2); + range.e.c = p.read_shift(2); + range.s.r = p.read_shift(4); + range.e.r = p.read_shift(4); + p.l += 4; + if(p.l + 2 < p.length) { + cnt = p.read_shift(2); + type = p.read_shift(1); + sname = cnt == 0 ? "" : p.read_shift(cnt, 'cstr'); + } + if(!sname) sname = XLSX.utils.encode_col(sidx); + /* TODO: backfill empty sheets */ + } break; + case 0x0602: { /* EOS */ + /* NOTE: QP valid range A1:IV1000000 */ + if(range.s.c > 0xFF || range.s.r > 999999) break; + if(range.e.c < range.s.c) range.e.c = range.s.c; + if(range.e.r < range.s.r) range.e.r = range.s.r; + s["!ref"] = encode_range(range); + book_append_sheet(wb, s, sname); // TODO: a barrel roll + } break; + + case 0x0A01: { /* COL (like XLS Row, modulo the layout transposition) */ + C = p.read_shift(2); + if(range.e.c < C) range.e.c = C; + if(range.s.c > C) range.s.c = C; + R = p.read_shift(4); + if(range.s.r > R) range.s.r = R; + R = p.read_shift(4); + if(range.e.r < R) range.e.r = R; + } break; + + case 0x0C01: { /* MulCells (like XLS MulRK, but takes advantage of common column data patterns) */ + R = p.read_shift(4), cnt = p.read_shift(4); + if(range.s.r > R) range.s.r = R; + if(range.e.r < R + cnt - 1) range.e.r = R + cnt - 1; + while(p.l < p.length) { + var cell = { t: "z" }; + var flags = p.read_shift(1); + if(flags & 0x80) p.l += 2; + var mul = (flags & 0x40) ? p.read_shift(2) - 1: 0; + switch(flags & 0x1F) { + case 1: break; + case 2: cell = { t: "n", v: p.read_shift(2) }; break; + case 3: cell = { t: "n", v: p.read_shift(2, 'i') }; break; + case 5: cell = { t: "n", v: p.read_shift(8, 'f') }; break; + case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break; + case 8: cell = { t: "n", v: p.read_shift(8, 'f') }; p.l += 2; /* cell.f = formulae[p.read_shift(4)]; */ p.l += 4; break; + default: throw "Unrecognized QPW cell type " + (flags & 0x1F); + } + var delta = 0; + if(flags & 0x20) switch(flags & 0x1F) { + case 2: delta = p.read_shift(2); break; + case 3: delta = p.read_shift(2, 'i'); break; + case 7: delta = p.read_shift(2); break; + default: throw "Unsupported delta for QPW cell type " + (flags & 0x1F); + } + if(!(!o.sheetStubs && cell.t == "z")) { + if(Array.isArray(s)) { + if(!s[R]) s[R] = []; + s[R][C] = cell; + } else s[encode_cell({r:R, c:C})] = cell; + } + ++R; --cnt; + while(mul-- > 0 && cnt >= 0) { + if(flags & 0x20) switch(flags & 0x1F) { + case 2: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; break; + case 3: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; if(cell.v > 0x7FFF) cell.v -= 0x10000; break; + case 7: cell = { t: "s", v: SST[type = (type + delta) >>> 0] }; break; + default: throw "Cannot apply delta for QPW cell type " + (flags & 0x1F); + } else switch(flags & 0x1F) { + case 1: cell = { t: "z" }; break; + case 2: cell = { t: "n", v: p.read_shift(2) }; break; + case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break; + default: throw "Cannot apply repeat for QPW cell type " + (flags & 0x1F); + } + if(!(!o.sheetStubs && cell.t == "z")) { + if(Array.isArray(s)) { + if(!s[R]) s[R] = []; + s[R][C] = cell; + } else s[encode_cell({r:R, c:C})] = cell; + } + ++R; --cnt; + } + } + } break; + + default: break; + } + d.l += length; + } + return wb; + } + return { sheet_to_wk1: sheet_to_wk1, book_to_wk3: book_to_wk3, diff --git a/docbits/20_import.md b/docbits/20_import.md index 8aa5504..68343be 100644 --- a/docbits/20_import.md +++ b/docbits/20_import.md @@ -424,7 +424,7 @@ async function process_RS(stream) { const out = new Uint8Array(buffers.reduce((acc, v) => acc + v.length, 0)); let off = 0; - for(const u8 of arr) { + for(const u8 of buffers) { out.set(u8, off); off += u8.length; } @@ -434,7 +434,7 @@ async function process_RS(stream) { const data = await process_RS(stream); /* data is Uint8Array */ -const workbook = XLSX.read(data); +const workbook = XLSX.read(data, {type: 'array'}); ``` diff --git a/docbits/85_filetype.md b/docbits/85_filetype.md index ac30878..ffa265b 100644 --- a/docbits/85_filetype.md +++ b/docbits/85_filetype.md @@ -43,6 +43,7 @@ range limits will be silently truncated: | Excel 2007+ XML Formats (XLSX/XLSM) | XFD1048576 | 16384 | 1048576 | | Excel 2007+ Binary Format (XLSB BIFF12) | XFD1048576 | 16384 | 1048576 | | Numbers 12.0 (NUMBERS) | ALL1000000 | 1000 | 1000000 | +| Quattro Pro 9+ (QPW) | IV1000000 | 256 | 1000000 | | Excel 97-2004 (XLS BIFF8) | IV65536 | 256 | 65536 | | Excel 5.0/95 (XLS BIFF5) | IV16384 | 256 | 16384 | | Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 | diff --git a/misc/docs/README.md b/misc/docs/README.md index 3f99ac6..9044b76 100644 --- a/misc/docs/README.md +++ b/misc/docs/README.md @@ -1025,7 +1025,7 @@ async function process_RS(stream) { const out = new Uint8Array(buffers.reduce((acc, v) => acc + v.length, 0)); let off = 0; - for(const u8 of arr) { + for(const u8 of buffers) { out.set(u8, off); off += u8.length; } @@ -1035,7 +1035,7 @@ async function process_RS(stream) { const data = await process_RS(stream); /* data is Uint8Array */ -const workbook = XLSX.read(data); +const workbook = XLSX.read(data, {type: 'array'}); ``` @@ -2079,7 +2079,7 @@ Parse options are described in the [Parsing Options](#parsing-options) section. `XLSX.writeFile(wb, filename, write_opts)` attempts to write `wb` to `filename`. In browser-based environments, it will attempt to force a client-side download. -`XLSX.writeFileAsync(wb, filename, o, cb)` attempts to write `wb` to `filename`. +`XLSX.writeFileAsync(filename, wb, o, cb)` attempts to write `wb` to `filename`. If `o` is omitted, the writer will use the third argument as the callback. `XLSX.stream` contains a set of streaming write functions. @@ -3867,6 +3867,7 @@ range limits will be silently truncated: | Excel 2007+ XML Formats (XLSX/XLSM) | XFD1048576 | 16384 | 1048576 | | Excel 2007+ Binary Format (XLSB BIFF12) | XFD1048576 | 16384 | 1048576 | | Numbers 12.0 (NUMBERS) | ALL1000000 | 1000 | 1000000 | +| Quattro Pro 9+ (QPW) | IV1000000 | 256 | 1000000 | | Excel 97-2004 (XLS BIFF8) | IV65536 | 256 | 65536 | | Excel 5.0/95 (XLS BIFF5) | IV16384 | 256 | 16384 | | Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 | diff --git a/test.js b/test.js index dc8f566..0f7f268 100644 --- a/test.js +++ b/test.js @@ -2522,6 +2522,118 @@ describe('corner cases', function() { }); }); }); + it('should handle \\r and \\n', function() { + var base = "./test_files/crlf/"; + [ + "CRLFR9.123", + "CRLFR9.WK1", + "CRLFR9.WK3", + "CRLFR9.WK4", + "CRLFR9.XLS", + "CRLFR9_4.XLS", + "CRLFR9_5.XLS", + "CRLFX5_2.XLS", + "CRLFX5_3.XLS", + "CRLFX5_4.XLS", + "CRLFX5_5.XLS", + "crlf.csv", + "crlf.fods", + "crlf.htm", + "crlf.numbers", + "crlf.ods", + "crlf.rtf", + "crlf.slk", + "crlf.xls", + "crlf.xlsb", + "crlf.xlsx", + "crlf.xml", + "crlf5.xls", + "crlfq9.qpw", + "crlfq9.wb1", + "crlfq9.wb2", + "crlfq9.wb3", + "crlfq9.wk1", + "crlfq9.wk3", + "crlfq9.wk4", + "crlfq9.wks", + "crlfq9.wq1", + "crlfw4_2.wks", + "crlfw4_3.wks", + "crlfw4_4.wks" + ].map(function(path) { return base + path; }).forEach(function(w) { + var wb = X.read(fs.readFileSync(w), {type:TYPE}); + var ws = wb.Sheets[wb.SheetNames[0]]; + var B1 = get_cell(ws, "B1"), B2 = get_cell(ws, "B2"); + var lio = w.match(/\.[^\.]*$/).index, stem = w.slice(0, lio).toLowerCase(), ext = w.slice(lio + 1).toLowerCase() + switch(ext) { + case 'fm3': break; + + case '123': + assert.equal(B1.v, "abc\ndef"); + // TODO: parse formula // assert.equal(B1.v, "abc\r\ndef"); + break; + case 'qpw': + case 'wb1': + case 'wb2': + case 'wb3': + case 'wk1': + case 'wk3': + case 'wk4': + case 'wq1': + assert(B1.v == "abcdef" || B1.v == "abc\ndef"); + // TODO: formula -> string values + if(B2 && B2.t != "e" && B2.v != "") assert(B2.v == "abcdef" || B2.v == "abc\r\ndef"); + break; + + case 'wks': + if(stem.match(/w4/)) { + assert.equal(B1.v, "abc\ndef"); + assert(!B2 || B2.t == "z"); // Works4 did not support CODE / CHAR + } else if(stem.match(/q9/)) { + assert.equal(B1.v, "abcdef"); + assert.equal(B2.v, "abc\r\ndef"); + } else { + assert.equal(B1.v, "abc\ndef"); + assert.equal(B2.v, "abc\r\ndef"); + } + break; + + case 'xls': + if(stem.match(/CRLFR9/i)) { + assert.equal(B1.v, "abc\r\ndef"); + } else { + assert.equal(B1.v, "abc\ndef"); + } + assert.equal(B2.v, "abc\r\ndef"); + break; + + case 'rtf': + case 'htm': + assert.equal(B1.v, "abc\ndef"); + assert.equal(B2.v, "abc\n\ndef"); + break; + + case 'xlsx': + case 'xlsb': + case 'xml': + case 'slk': + case 'csv': + assert.equal(B1.v, "abc\ndef"); + assert.equal(B2.v, "abc\r\ndef"); + break; + case 'fods': + case 'ods': + assert.equal(B1.v, "abc\nDef"); + assert.equal(B2.v, "abc\r\ndef"); + break; + case 'numbers': + assert.equal(B1.v, "abc\ndef"); + // TODO: B2 should be a formula error + break; + default: throw ext; + } + }); + }); }); describe('encryption', function() { diff --git a/test_files b/test_files index 57645de..59a8103 160000 --- a/test_files +++ b/test_files @@ -1 +1 @@ -Subproject commit 57645de9ec3abd7c5ffd94d2eeb26c3a1074e507 +Subproject commit 59a810302a68b26d6c9c3f9c4e7f499b0fdd6d37 diff --git a/tests/core.js b/tests/core.js index ad90261..b31b782 100644 --- a/tests/core.js +++ b/tests/core.js @@ -2522,6 +2522,118 @@ describe('corner cases', function() { }); }); }); + it('should handle \\r and \\n', function() { + var base = "./test_files/crlf/"; + [ + "CRLFR9.123", + "CRLFR9.WK1", + "CRLFR9.WK3", + "CRLFR9.WK4", + "CRLFR9.XLS", + "CRLFR9_4.XLS", + "CRLFR9_5.XLS", + "CRLFX5_2.XLS", + "CRLFX5_3.XLS", + "CRLFX5_4.XLS", + "CRLFX5_5.XLS", + "crlf.csv", + "crlf.fods", + "crlf.htm", + "crlf.numbers", + "crlf.ods", + "crlf.rtf", + "crlf.slk", + "crlf.xls", + "crlf.xlsb", + "crlf.xlsx", + "crlf.xml", + "crlf5.xls", + "crlfq9.qpw", + "crlfq9.wb1", + "crlfq9.wb2", + "crlfq9.wb3", + "crlfq9.wk1", + "crlfq9.wk3", + "crlfq9.wk4", + "crlfq9.wks", + "crlfq9.wq1", + "crlfw4_2.wks", + "crlfw4_3.wks", + "crlfw4_4.wks" + ].map(function(path) { return base + path; }).forEach(function(w) { + var wb = X.read(fs.readFileSync(w), {type:TYPE}); + var ws = wb.Sheets[wb.SheetNames[0]]; + var B1 = get_cell(ws, "B1"), B2 = get_cell(ws, "B2"); + var lio = w.match(/\.[^\.]*$/).index, stem = w.slice(0, lio).toLowerCase(), ext = w.slice(lio + 1).toLowerCase() + switch(ext) { + case 'fm3': break; + + case '123': + assert.equal(B1.v, "abc\ndef"); + // TODO: parse formula // assert.equal(B1.v, "abc\r\ndef"); + break; + case 'qpw': + case 'wb1': + case 'wb2': + case 'wb3': + case 'wk1': + case 'wk3': + case 'wk4': + case 'wq1': + assert(B1.v == "abcdef" || B1.v == "abc\ndef"); + // TODO: formula -> string values + if(B2 && B2.t != "e" && B2.v != "") assert(B2.v == "abcdef" || B2.v == "abc\r\ndef"); + break; + + case 'wks': + if(stem.match(/w4/)) { + assert.equal(B1.v, "abc\ndef"); + assert(!B2 || B2.t == "z"); // Works4 did not support CODE / CHAR + } else if(stem.match(/q9/)) { + assert.equal(B1.v, "abcdef"); + assert.equal(B2.v, "abc\r\ndef"); + } else { + assert.equal(B1.v, "abc\ndef"); + assert.equal(B2.v, "abc\r\ndef"); + } + break; + + case 'xls': + if(stem.match(/CRLFR9/i)) { + assert.equal(B1.v, "abc\r\ndef"); + } else { + assert.equal(B1.v, "abc\ndef"); + } + assert.equal(B2.v, "abc\r\ndef"); + break; + + case 'rtf': + case 'htm': + assert.equal(B1.v, "abc\ndef"); + assert.equal(B2.v, "abc\n\ndef"); + break; + + case 'xlsx': + case 'xlsb': + case 'xml': + case 'slk': + case 'csv': + assert.equal(B1.v, "abc\ndef"); + assert.equal(B2.v, "abc\r\ndef"); + break; + case 'fods': + case 'ods': + assert.equal(B1.v, "abc\nDef"); + assert.equal(B2.v, "abc\r\ndef"); + break; + case 'numbers': + assert.equal(B1.v, "abc\ndef"); + // TODO: B2 should be a formula error + break; + default: throw ext; + } + }); + }); }); describe('encryption', function() { diff --git a/tests/fixtures.lst b/tests/fixtures.lst index 72d8cf8..c106a70 100644 --- a/tests/fixtures.lst +++ b/tests/fixtures.lst @@ -175,3 +175,38 @@ ./test_files/numbers/types_61.numbers ./test_files/numbers/Untitled.key ./test_files/numbers/Untitled.pages +./test_files/crlf/CRLFR9.123 +./test_files/crlf/CRLFR9.WK1 +./test_files/crlf/CRLFR9.WK3 +./test_files/crlf/CRLFR9.WK4 +./test_files/crlf/CRLFR9.XLS +./test_files/crlf/CRLFR9_4.XLS +./test_files/crlf/CRLFR9_5.XLS +./test_files/crlf/CRLFX5_2.XLS +./test_files/crlf/CRLFX5_3.XLS +./test_files/crlf/CRLFX5_4.XLS +./test_files/crlf/CRLFX5_5.XLS +./test_files/crlf/crlf.csv +./test_files/crlf/crlf.fods +./test_files/crlf/crlf.htm +./test_files/crlf/crlf.numbers +./test_files/crlf/crlf.ods +./test_files/crlf/crlf.rtf +./test_files/crlf/crlf.slk +./test_files/crlf/crlf.xls +./test_files/crlf/crlf.xlsb +./test_files/crlf/crlf.xlsx +./test_files/crlf/crlf.xml +./test_files/crlf/crlf5.xls +./test_files/crlf/crlfq9.qpw +./test_files/crlf/crlfq9.wb1 +./test_files/crlf/crlfq9.wb2 +./test_files/crlf/crlfq9.wb3 +./test_files/crlf/crlfq9.wk1 +./test_files/crlf/crlfq9.wk3 +./test_files/crlf/crlfq9.wk4 +./test_files/crlf/crlfq9.wks +./test_files/crlf/crlfq9.wq1 +./test_files/crlf/crlfw4_2.wks +./test_files/crlf/crlfw4_3.wks +./test_files/crlf/crlfw4_4.wks diff --git a/xlsx.flow.js b/xlsx.flow.js index c394c82..4f24482 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -3592,15 +3592,19 @@ var rencoding = /*#__PURE__*/evert(encodings); var unescapexml/*:StringConv*/ = /*#__PURE__*/(function() { /* 22.4.2.4 bstr (Basic String) */ var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/ig, coderegex = /_x([\da-fA-F]{4})_/ig; - return function unescapexml(text/*:string*/)/*:string*/ { + function raw_unescapexml(text/*:string*/)/*:string*/ { var s = text + '', i = s.indexOf("-1?16:10))||$$; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));}); var j = s.indexOf("]]>"); - return unescapexml(s.slice(0, i)) + s.slice(i+9,j) + unescapexml(s.slice(j+3)); + return raw_unescapexml(s.slice(0, i)) + s.slice(i+9,j) + raw_unescapexml(s.slice(j+3)); + } + return function unescapexml(text/*:string*/, xlsx/*:boolean*/) { + var out = raw_unescapexml(text); + return xlsx ? out.replace(/\r\n/g, "\n") : out; }; })(); -var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f]/g; +var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f\uFFFE-\uFFFF]/g; function escapexml(text/*:string*/)/*:string*/{ var s = text + ''; return s.replace(decregex, function(y) { return rencoding[y]; }).replace(charegex,function(s) { return "_x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + "_";}); @@ -3626,12 +3630,14 @@ var xlml_fixstr/*:StringConv*/ = /*#__PURE__*/(function() { })(); function xlml_unfixstr(str/*:string*/)/*:string*/ { return str.replace(/(\r\n|[\r\n])/g,"\ "); } +/* note: xsd:boolean valid values: true / 1 / false / 0 */ function parsexmlbool(value/*:any*/)/*:boolean*/ { switch(value) { - case 1: case true: case '1': case 'true': case 'TRUE': return true; - /* case '0': case 'false': case 'FALSE':*/ - default: return false; + case 1: case true: case '1': case 'true': return true; + case 0: case false: case '0': case 'false': return false; + //default: throw new Error("Invalid xsd:boolean " + value); } + return false; } function utf8reada(orig/*:string*/)/*:string*/ { @@ -5992,6 +5998,7 @@ function parse_PropertySet(blob, PIDSI) { if(fail) throw new Error("Read Error: Expected address " + Props[i][1] + ' at ' + blob.l + ' :' + i); } if(PIDSI) { + if(Props[i][0] == 0 && Props.length > i+1 && Props[i][1] == Props[i+1][1]) continue; // R9 var piddsi = PIDSI[Props[i][0]]; PropH[piddsi.n] = parse_TypedPropertyValue(blob, piddsi.t, {raw:true}); if(piddsi.p === 'version') PropH[piddsi.n] = String(PropH[piddsi.n] >> 16) + "." + ("0000" + String(PropH[piddsi.n] & 0xFFFF)).slice(-4); @@ -8449,10 +8456,9 @@ var PRN = /*#__PURE__*/(function() { else sep = guess_sep(str.slice(0,1024)); var R = 0, C = 0, v = 0; var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0, startcc=str.charCodeAt(0); - str = str.replace(/\r\n/mg, "\n"); var _re/*:?RegExp*/ = o.dateNF != null ? dateNF_regex(o.dateNF) : null; function finish_cell() { - var s = str.slice(start, end); + var s = str.slice(start, end); if(s.slice(-1) == "\r") s = s.slice(0, -1); var cell = ({}/*:any*/); if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"'); if(s.length === 0) cell.t = 'z'; @@ -8487,7 +8493,11 @@ var PRN = /*#__PURE__*/(function() { } outer: for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) { case 0x22: if(startcc === 0x22) instr = !instr; break; - case sepcc: case 0x0a: case 0x0d: if(!instr && finish_cell()) break outer; break; + case 0x0d: + if(instr) break; + if(str.charCodeAt(end+1) == 0x0a) ++end; + /* falls through */ + case sepcc: case 0x0a: if(!instr && finish_cell()) break outer; break; default: break; } if(end - start > 0) finish_cell(); @@ -8603,6 +8613,7 @@ var WK_ = /*#__PURE__*/(function() { var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; var sheetRows = o.sheetRows || 0; + if(d[4] == 0x51 && d[5] == 0x50 && d[6] == 0x57) return qpw_to_workbook_buf(d, opts); if(d[2] == 0x00) { if(d[3] == 0x08 || d[3] == 0x09) { if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file"); @@ -8616,12 +8627,17 @@ var WK_ = /*#__PURE__*/(function() { o.vers = val; if(val >= 0x1000) o.qpro = true; break; + case 0xFF: /* BOF (works 3+) */ + o.vers = val; + o.works = true; + break; case 0x06: refguess = val; break; /* RANGE */ case 0xCC: if(val) next_n = val; break; /* SHEETNAMECS */ case 0xDE: next_n = val; break; /* SHEETNAMELP */ case 0x0F: /* LABEL */ case 0x33: /* STRING */ - if(!o.qpro) val[1].v = val[1].v.slice(1); + if((!o.qpro && !o.works || RT == 0x33) && val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1); + if(o.works || o.works2) val[1].v = val[1].v.replace(/\r\n/g, "\n"); /* falls through */ case 0x0D: /* INTEGER */ case 0x0E: /* NUMBER */ @@ -8655,6 +8671,7 @@ var WK_ = /*#__PURE__*/(function() { s[val[0].r][val[0].c] = val[1]; } else s[encode_cell(val[0])] = val[1]; break; + case 0x5405: o.works2 = true; break; default: }}, o); } else if(d[2] == 0x1A || d[2] == 0x0E) { @@ -8663,7 +8680,9 @@ var WK_ = /*#__PURE__*/(function() { lotushopper(d, function(val, R, RT) { switch(RT) { case 0xCC: n = val; break; /* SHEETNAMECS */ case 0x16: /* LABEL16 */ - val[1].v = val[1].v.slice(1); + if(val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1); + // TODO: R9 appears to encode control codes this way -- verify against other versions + val[1].v = val[1].v.replace(/\x0F./g, function($$) { return String.fromCharCode($$.charCodeAt(1) - 0x20); }).replace(/\r\n/g, "\n"); /* falls through */ case 0x17: /* NUMBER17 */ case 0x18: /* NUMBER18 */ @@ -8858,6 +8877,9 @@ var WK_ = /*#__PURE__*/(function() { o[3] = blob.read_shift(1); o[0].r = blob.read_shift(2); blob.l+=2; + } else if(opts.works) { // TODO: verify with more complex works3-4 examples + o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2); + o[2] = blob.read_shift(2); } else { o[2] = blob.read_shift(1); o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2); @@ -8893,6 +8915,18 @@ var WK_ = /*#__PURE__*/(function() { o.write_shift(1, 0); return o; } + function parse_STRING(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + o[1].t = 's'; + if(opts.vers == 0x5120) { + var len = blob.read_shift(1); + o[1].v = blob.read_shift(len, 'utf8'); + return o; + } + o[1].v = blob.read_shift(tgt - blob.l, 'cstr'); + return o; + } function parse_INTEGER(blob, length, opts) { var o = parse_cell(blob, length, opts); @@ -8951,6 +8985,7 @@ var WK_ = /*#__PURE__*/(function() { 0x33: ["FALSE", 0], 0x34: ["TRUE", 0], 0x46: ["LEN", 1], + 0x4A: ["CHAR", 1], 0x50: ["SUM", 69], 0x51: ["AVERAGEA", 69], 0x52: ["COUNTA", 69], @@ -9141,8 +9176,8 @@ var WK_ = /*#__PURE__*/(function() { } function parse_FORMULA_28(blob, length) { - var o = parse_NUMBER_27(blob, 14); - blob.l += length - 10; /* TODO: formula */ + var o = parse_NUMBER_27(blob, 12); + blob.l += length - 12; /* TODO: formula */ return o; } @@ -9232,7 +9267,7 @@ var WK_ = /*#__PURE__*/(function() { /*::[*/0x0030/*::]*/: { n:"UNFORMATTED" }, /*::[*/0x0031/*::]*/: { n:"CURSORW12" }, /*::[*/0x0032/*::]*/: { n:"WINDOW" }, - /*::[*/0x0033/*::]*/: { n:"STRING", f:parse_LABEL }, + /*::[*/0x0033/*::]*/: { n:"STRING", f:parse_STRING }, /*::[*/0x0037/*::]*/: { n:"PASSWORD" }, /*::[*/0x0038/*::]*/: { n:"LOCKED" }, /*::[*/0x003C/*::]*/: { n:"QUERY" }, @@ -9256,6 +9291,7 @@ var WK_ = /*#__PURE__*/(function() { /*::[*/0x0069/*::]*/: { n:"MRANGES??" }, /*::[*/0x00CC/*::]*/: { n:"SHEETNAMECS", f:parse_SHEETNAMECS }, /*::[*/0x00DE/*::]*/: { n:"SHEETNAMELP", f:parse_SHEETNAMELP }, + /*::[*/0x00FF/*::]*/: { n:"BOF", f:parseuint16 }, /*::[*/0xFFFF/*::]*/: { n:"" } }; @@ -9384,6 +9420,144 @@ var WK_ = /*#__PURE__*/(function() { /*::[*/0x6F44/*::]*/: { n:"??" }, /*::[*/0xFFFF/*::]*/: { n:"" } }; + + /* QPW uses a different set of record types */ + function qpw_to_workbook_buf(d, opts)/*:Workbook*/ { + prep_blob(d, 0); + var o = opts || {}; + if(DENSE != null && o.dense == null) o.dense = DENSE; + var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/); + var SST = [], sname = "", formulae = []; + var range = {s:{r:-1,c:-1}, e:{r:-1,c:-1}}; + var cnt = 0, type = 0, C = 0, R = 0; + var wb = { SheetNames: [], Sheets: {} }; + outer: while(d.l < d.length) { + var RT = d.read_shift(2), length = d.read_shift(2); + var p = d.slice(d.l, d.l + length); + prep_blob(p, 0); + switch(RT) { + case 0x01: /* BOF */ + if(p.read_shift(4) != 0x39575051) throw "Bad QPW9 BOF!"; + break; + case 0x02: /* EOF */ break outer; + + /* TODO: The behavior here should be consistent with Numbers: QP Notebook ~ .TN.SheetArchive, QP Sheet ~ .TST.TSTable */ + case 0x0401: /* BON */ break; + case 0x0402: /* EON */ /* TODO: backfill missing sheets based on BON cnt */ break; + + case 0x0407: { /* SST */ + p.l += 12; + while(p.l < p.length) { + cnt = p.read_shift(2); + type = p.read_shift(1); + SST.push(p.read_shift(cnt, 'cstr')); + } + } break; + case 0x0408: { /* FORMULAE */ + //p.l += 12; + //while(p.l < p.length) { + // cnt = p.read_shift(2); + // formulae.push(p.slice(p.l, p.l + cnt + 1)); p.l += cnt + 1; + //} + } break; + + case 0x0601: { /* BOS */ + var sidx = p.read_shift(2); + s = ((o.dense ? [] : {})/*:any*/); + range.s.c = p.read_shift(2); + range.e.c = p.read_shift(2); + range.s.r = p.read_shift(4); + range.e.r = p.read_shift(4); + p.l += 4; + if(p.l + 2 < p.length) { + cnt = p.read_shift(2); + type = p.read_shift(1); + sname = cnt == 0 ? "" : p.read_shift(cnt, 'cstr'); + } + if(!sname) sname = XLSX.utils.encode_col(sidx); + /* TODO: backfill empty sheets */ + } break; + case 0x0602: { /* EOS */ + /* NOTE: QP valid range A1:IV1000000 */ + if(range.s.c > 0xFF || range.s.r > 999999) break; + if(range.e.c < range.s.c) range.e.c = range.s.c; + if(range.e.r < range.s.r) range.e.r = range.s.r; + s["!ref"] = encode_range(range); + book_append_sheet(wb, s, sname); // TODO: a barrel roll + } break; + + case 0x0A01: { /* COL (like XLS Row, modulo the layout transposition) */ + C = p.read_shift(2); + if(range.e.c < C) range.e.c = C; + if(range.s.c > C) range.s.c = C; + R = p.read_shift(4); + if(range.s.r > R) range.s.r = R; + R = p.read_shift(4); + if(range.e.r < R) range.e.r = R; + } break; + + case 0x0C01: { /* MulCells (like XLS MulRK, but takes advantage of common column data patterns) */ + R = p.read_shift(4), cnt = p.read_shift(4); + if(range.s.r > R) range.s.r = R; + if(range.e.r < R + cnt - 1) range.e.r = R + cnt - 1; + while(p.l < p.length) { + var cell = { t: "z" }; + var flags = p.read_shift(1); + if(flags & 0x80) p.l += 2; + var mul = (flags & 0x40) ? p.read_shift(2) - 1: 0; + switch(flags & 0x1F) { + case 1: break; + case 2: cell = { t: "n", v: p.read_shift(2) }; break; + case 3: cell = { t: "n", v: p.read_shift(2, 'i') }; break; + case 5: cell = { t: "n", v: p.read_shift(8, 'f') }; break; + case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break; + case 8: cell = { t: "n", v: p.read_shift(8, 'f') }; p.l += 2; /* cell.f = formulae[p.read_shift(4)]; */ p.l += 4; break; + default: throw "Unrecognized QPW cell type " + (flags & 0x1F); + } + var delta = 0; + if(flags & 0x20) switch(flags & 0x1F) { + case 2: delta = p.read_shift(2); break; + case 3: delta = p.read_shift(2, 'i'); break; + case 7: delta = p.read_shift(2); break; + default: throw "Unsupported delta for QPW cell type " + (flags & 0x1F); + } + if(!(!o.sheetStubs && cell.t == "z")) { + if(Array.isArray(s)) { + if(!s[R]) s[R] = []; + s[R][C] = cell; + } else s[encode_cell({r:R, c:C})] = cell; + } + ++R; --cnt; + while(mul-- > 0 && cnt >= 0) { + if(flags & 0x20) switch(flags & 0x1F) { + case 2: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; break; + case 3: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; if(cell.v > 0x7FFF) cell.v -= 0x10000; break; + case 7: cell = { t: "s", v: SST[type = (type + delta) >>> 0] }; break; + default: throw "Cannot apply delta for QPW cell type " + (flags & 0x1F); + } else switch(flags & 0x1F) { + case 1: cell = { t: "z" }; break; + case 2: cell = { t: "n", v: p.read_shift(2) }; break; + case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break; + default: throw "Cannot apply repeat for QPW cell type " + (flags & 0x1F); + } + if(!(!o.sheetStubs && cell.t == "z")) { + if(Array.isArray(s)) { + if(!s[R]) s[R] = []; + s[R][C] = cell; + } else s[encode_cell({r:R, c:C})] = cell; + } + ++R; --cnt; + } + } + } break; + + default: break; + } + d.l += length; + } + return wb; + } + return { sheet_to_wk1: sheet_to_wk1, book_to_wk3: book_to_wk3, @@ -9570,14 +9744,14 @@ function parse_si(x, opts) { /* 18.4.12 t ST_Xstring (Plaintext String) */ // TODO: is whitespace actually valid here? if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { - z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||"")); + z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""), true); z.r = utf8read(x); if(html) z.h = escapehtml(z.t); } /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((/*y = */x.match(sirregex))) { z.r = utf8read(x); - z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")), true); if(html) z.h = rs_to_html(parse_rs(z.r)); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ @@ -10010,27 +10184,33 @@ var RTF = /*#__PURE__*/(function() { var o = opts || {}; var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); - var rows = str.match(/\\trowd.*?\\row\b/g); + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); if(!rows.length) throw new Error("RTF missing table"); var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); rows.forEach(function(rowtf, R) { if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\\w+\b/g; + var rtfre = /\\[\w\-]+\b/g; var last_index = 0; var res; var C = -1; + var payload = []; while((res = rtfre.exec(rowtf))) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if(data.charCodeAt(0) == 0x20) data = data.slice(1); + if(data.length) payload.push(data); switch(res[0]) { case "\\cell": - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data[0] == " ") data = data.slice(1); ++C; - if(data.length) { + if(payload.length) { // TODO: value parsing, including codepage adjustments - var cell = {v: data, t:"s"}; + var cell = {v: payload.join(""), t:"s"}; if(Array.isArray(ws)) ws[R][C] = cell; else ws[encode_cell({r:R, c:C})] = cell; } + payload = []; + break; + case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" + payload.push("\n"); break; } last_index = rtfre.lastIndex; @@ -10056,7 +10236,7 @@ var RTF = /*#__PURE__*/(function() { var coord = encode_cell({r:R,c:C}); cell = dense ? (ws[R]||[])[C]: ws[coord]; if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w))); + o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); o.push("\\cell"); } o.push("\\pard\\intbl\\row"); @@ -14861,7 +15041,7 @@ function parse_ws_xml_sheetviews(data, wb/*:WBWBProps*/) { // $FlowIgnore if(+tag.zoomScale) wb.Views[i].zoom = +tag.zoomScale; // $FlowIgnore - if(parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true; + if(tag.rightToLeft && parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true; }); } function write_ws_xml_sheetviews(ws, opts, idx, wb)/*:string*/ { @@ -14953,7 +15133,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th if(opts.sheetRows && opts.sheetRows < tagr) continue; rowobj = {}; rowrite = false; if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); } - if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; } + if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; } if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; } if(rowrite) rows[tagr-1] = rowobj; } @@ -14970,7 +15150,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th if(opts && opts.cellStyles) { rowobj = {}; rowrite = false; if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); } - if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; } + if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; } if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; } if(rowrite) rows[tagr-1] = rowobj; } @@ -15003,7 +15183,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th if(opts.cellFormula) { if((cref=d.match(match_f))!= null && /*::cref != null && */cref[1] !== '') { /* TODO: match against XLSXFutureFunctions */ - p.f=unescapexml(utf8read(cref[1])).replace(/\r\n/g, "\n"); + p.f=unescapexml(utf8read(cref[1]), true); if(!opts.xlfn) p.f = _xlfn(p.f); if(/*::cref != null && cref[0] != null && */cref[0].indexOf('t="array"') > -1) { p.F = (d.match(refregex)||[])[1]; @@ -15057,7 +15237,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th break; case 'str': p.t = "s"; - p.v = (p.v!=null) ? utf8read(p.v) : ''; + p.v = (p.v!=null) ? unescapexml(utf8read(p.v), true) : ''; if(opts.cellHTML) p.h = escapehtml(p.v); break; case 'inlineStr': @@ -16073,6 +16253,8 @@ function parse_ws_bin(data, _opts, idx, rels, wb/*:WBWBProps*/, themes, styles)/ /* TODO: something useful -- this is a stub */ function write_ws_bin_cell(ba/*:BufArray*/, cell/*:Cell*/, R/*:number*/, C/*:number*/, opts, ws/*:Worksheet*/, last_seen/*:boolean*/)/*:boolean*/ { + var o/*:any*/ = ({r:R, c:C}/*:any*/); + if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]); if(cell.v === undefined) return false; var vv = ""; switch(cell.t) { @@ -16086,11 +16268,9 @@ function write_ws_bin_cell(ba/*:BufArray*/, cell/*:Cell*/, R/*:number*/, C/*:num case 'n': case 'e': vv = ''+cell.v; break; default: vv = cell.v; break; } - var o/*:any*/ = ({r:R, c:C}/*:any*/); /* TODO: cell style */ o.s = get_cell_style(opts.cellXfs, cell, opts); if(cell.l) ws['!links'].push([encode_cell(o), cell.l]); - if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]); switch(cell.t) { case 's': case 'str': if(opts.bookSST) { @@ -23512,8 +23692,8 @@ function write_zip_xlsb(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ { opts.Strings = /*::((*/[]/*:: :any):SST)*/; opts.Strings.Count = 0; opts.Strings.Unique = 0; if(browser_has_Map) opts.revStrings = new Map(); else { opts.revStrings = {}; opts.revStrings.foo = []; delete opts.revStrings.foo; } - var wbext = opts.bookType == "xlsb" ? "bin" : "xml"; - var vbafmt = VBAFMTS.indexOf(opts.bookType) > -1; + var wbext = "bin"; + var vbafmt = true; var ct = new_ct(); fix_write_opts(opts = opts || {}); var zip = zip_new(); @@ -23713,10 +23893,10 @@ function write_zip_xlsx(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ { carr[1].forEach(function(c) { if(c.T == true) needtc = true; }); }); if(needtc) { - cf = "xl/threadedComments/threadedComment" + rId + "." + wbext; + cf = "xl/threadedComments/threadedComment" + rId + ".xml"; zip_add_file(zip, cf, write_tcmnt_xml(comments, people, opts)); ct.threadedcomments.push(cf); - add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + "." + wbext, RELS.TCMNT); + add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + ".xml", RELS.TCMNT); } cf = "xl/comments" + rId + "." + wbext; diff --git a/xlsx.js b/xlsx.js index 8c93ef1..e24b716 100644 --- a/xlsx.js +++ b/xlsx.js @@ -3518,15 +3518,19 @@ var rencoding = evert(encodings); var unescapexml = (function() { /* 22.4.2.4 bstr (Basic String) */ var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/ig, coderegex = /_x([\da-fA-F]{4})_/ig; - return function unescapexml(text) { + function raw_unescapexml(text) { var s = text + '', i = s.indexOf("-1?16:10))||$$; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));}); var j = s.indexOf("]]>"); - return unescapexml(s.slice(0, i)) + s.slice(i+9,j) + unescapexml(s.slice(j+3)); + return raw_unescapexml(s.slice(0, i)) + s.slice(i+9,j) + raw_unescapexml(s.slice(j+3)); + } + return function unescapexml(text, xlsx) { + var out = raw_unescapexml(text); + return xlsx ? out.replace(/\r\n/g, "\n") : out; }; })(); -var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f]/g; +var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f\uFFFE-\uFFFF]/g; function escapexml(text){ var s = text + ''; return s.replace(decregex, function(y) { return rencoding[y]; }).replace(charegex,function(s) { return "_x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + "_";}); @@ -3552,12 +3556,14 @@ var xlml_fixstr = (function() { })(); function xlml_unfixstr(str) { return str.replace(/(\r\n|[\r\n])/g,"\ "); } +/* note: xsd:boolean valid values: true / 1 / false / 0 */ function parsexmlbool(value) { switch(value) { - case 1: case true: case '1': case 'true': case 'TRUE': return true; - /* case '0': case 'false': case 'FALSE':*/ - default: return false; + case 1: case true: case '1': case 'true': return true; + case 0: case false: case '0': case 'false': return false; + //default: throw new Error("Invalid xsd:boolean " + value); } + return false; } function utf8reada(orig) { @@ -5904,6 +5910,7 @@ function parse_PropertySet(blob, PIDSI) { if(fail) throw new Error("Read Error: Expected address " + Props[i][1] + ' at ' + blob.l + ' :' + i); } if(PIDSI) { + if(Props[i][0] == 0 && Props.length > i+1 && Props[i][1] == Props[i+1][1]) continue; // R9 var piddsi = PIDSI[Props[i][0]]; PropH[piddsi.n] = parse_TypedPropertyValue(blob, piddsi.t, {raw:true}); if(piddsi.p === 'version') PropH[piddsi.n] = String(PropH[piddsi.n] >> 16) + "." + ("0000" + String(PropH[piddsi.n] & 0xFFFF)).slice(-4); @@ -8359,10 +8366,9 @@ var PRN = (function() { else sep = guess_sep(str.slice(0,1024)); var R = 0, C = 0, v = 0; var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0, startcc=str.charCodeAt(0); - str = str.replace(/\r\n/mg, "\n"); var _re = o.dateNF != null ? dateNF_regex(o.dateNF) : null; function finish_cell() { - var s = str.slice(start, end); + var s = str.slice(start, end); if(s.slice(-1) == "\r") s = s.slice(0, -1); var cell = ({}); if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"'); if(s.length === 0) cell.t = 'z'; @@ -8397,7 +8403,11 @@ var PRN = (function() { } outer: for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) { case 0x22: if(startcc === 0x22) instr = !instr; break; - case sepcc: case 0x0a: case 0x0d: if(!instr && finish_cell()) break outer; break; + case 0x0d: + if(instr) break; + if(str.charCodeAt(end+1) == 0x0a) ++end; + /* falls through */ + case sepcc: case 0x0a: if(!instr && finish_cell()) break outer; break; default: break; } if(end - start > 0) finish_cell(); @@ -8513,6 +8523,7 @@ var WK_ = (function() { var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; var sheetRows = o.sheetRows || 0; + if(d[4] == 0x51 && d[5] == 0x50 && d[6] == 0x57) return qpw_to_workbook_buf(d, opts); if(d[2] == 0x00) { if(d[3] == 0x08 || d[3] == 0x09) { if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file"); @@ -8526,12 +8537,17 @@ var WK_ = (function() { o.vers = val; if(val >= 0x1000) o.qpro = true; break; + case 0xFF: /* BOF (works 3+) */ + o.vers = val; + o.works = true; + break; case 0x06: refguess = val; break; /* RANGE */ case 0xCC: if(val) next_n = val; break; /* SHEETNAMECS */ case 0xDE: next_n = val; break; /* SHEETNAMELP */ case 0x0F: /* LABEL */ case 0x33: /* STRING */ - if(!o.qpro) val[1].v = val[1].v.slice(1); + if((!o.qpro && !o.works || RT == 0x33) && val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1); + if(o.works || o.works2) val[1].v = val[1].v.replace(/\r\n/g, "\n"); /* falls through */ case 0x0D: /* INTEGER */ case 0x0E: /* NUMBER */ @@ -8565,6 +8581,7 @@ var WK_ = (function() { s[val[0].r][val[0].c] = val[1]; } else s[encode_cell(val[0])] = val[1]; break; + case 0x5405: o.works2 = true; break; default: }}, o); } else if(d[2] == 0x1A || d[2] == 0x0E) { @@ -8573,7 +8590,9 @@ var WK_ = (function() { lotushopper(d, function(val, R, RT) { switch(RT) { case 0xCC: n = val; break; /* SHEETNAMECS */ case 0x16: /* LABEL16 */ - val[1].v = val[1].v.slice(1); + if(val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1); + // TODO: R9 appears to encode control codes this way -- verify against other versions + val[1].v = val[1].v.replace(/\x0F./g, function($$) { return String.fromCharCode($$.charCodeAt(1) - 0x20); }).replace(/\r\n/g, "\n"); /* falls through */ case 0x17: /* NUMBER17 */ case 0x18: /* NUMBER18 */ @@ -8768,6 +8787,9 @@ var WK_ = (function() { o[3] = blob.read_shift(1); o[0].r = blob.read_shift(2); blob.l+=2; + } else if(opts.works) { // TODO: verify with more complex works3-4 examples + o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2); + o[2] = blob.read_shift(2); } else { o[2] = blob.read_shift(1); o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2); @@ -8803,6 +8825,18 @@ var WK_ = (function() { o.write_shift(1, 0); return o; } + function parse_STRING(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + o[1].t = 's'; + if(opts.vers == 0x5120) { + var len = blob.read_shift(1); + o[1].v = blob.read_shift(len, 'utf8'); + return o; + } + o[1].v = blob.read_shift(tgt - blob.l, 'cstr'); + return o; + } function parse_INTEGER(blob, length, opts) { var o = parse_cell(blob, length, opts); @@ -8861,6 +8895,7 @@ var WK_ = (function() { 0x33: ["FALSE", 0], 0x34: ["TRUE", 0], 0x46: ["LEN", 1], + 0x4A: ["CHAR", 1], 0x50: ["SUM", 69], 0x51: ["AVERAGEA", 69], 0x52: ["COUNTA", 69], @@ -9051,8 +9086,8 @@ var WK_ = (function() { } function parse_FORMULA_28(blob, length) { - var o = parse_NUMBER_27(blob, 14); - blob.l += length - 10; /* TODO: formula */ + var o = parse_NUMBER_27(blob, 12); + blob.l += length - 12; /* TODO: formula */ return o; } @@ -9142,7 +9177,7 @@ var WK_ = (function() { 0x0030: { n:"UNFORMATTED" }, 0x0031: { n:"CURSORW12" }, 0x0032: { n:"WINDOW" }, -0x0033: { n:"STRING", f:parse_LABEL }, +0x0033: { n:"STRING", f:parse_STRING }, 0x0037: { n:"PASSWORD" }, 0x0038: { n:"LOCKED" }, 0x003C: { n:"QUERY" }, @@ -9166,6 +9201,7 @@ var WK_ = (function() { 0x0069: { n:"MRANGES??" }, 0x00CC: { n:"SHEETNAMECS", f:parse_SHEETNAMECS }, 0x00DE: { n:"SHEETNAMELP", f:parse_SHEETNAMELP }, +0x00FF: { n:"BOF", f:parseuint16 }, 0xFFFF: { n:"" } }; @@ -9294,6 +9330,144 @@ var WK_ = (function() { 0x6F44: { n:"??" }, 0xFFFF: { n:"" } }; + + /* QPW uses a different set of record types */ + function qpw_to_workbook_buf(d, opts) { + prep_blob(d, 0); + var o = opts || {}; + if(DENSE != null && o.dense == null) o.dense = DENSE; + var s = ((o.dense ? [] : {})); + var SST = [], sname = "", formulae = []; + var range = {s:{r:-1,c:-1}, e:{r:-1,c:-1}}; + var cnt = 0, type = 0, C = 0, R = 0; + var wb = { SheetNames: [], Sheets: {} }; + outer: while(d.l < d.length) { + var RT = d.read_shift(2), length = d.read_shift(2); + var p = d.slice(d.l, d.l + length); + prep_blob(p, 0); + switch(RT) { + case 0x01: /* BOF */ + if(p.read_shift(4) != 0x39575051) throw "Bad QPW9 BOF!"; + break; + case 0x02: /* EOF */ break outer; + + /* TODO: The behavior here should be consistent with Numbers: QP Notebook ~ .TN.SheetArchive, QP Sheet ~ .TST.TSTable */ + case 0x0401: /* BON */ break; + case 0x0402: /* EON */ /* TODO: backfill missing sheets based on BON cnt */ break; + + case 0x0407: { /* SST */ + p.l += 12; + while(p.l < p.length) { + cnt = p.read_shift(2); + type = p.read_shift(1); + SST.push(p.read_shift(cnt, 'cstr')); + } + } break; + case 0x0408: { /* FORMULAE */ + //p.l += 12; + //while(p.l < p.length) { + // cnt = p.read_shift(2); + // formulae.push(p.slice(p.l, p.l + cnt + 1)); p.l += cnt + 1; + //} + } break; + + case 0x0601: { /* BOS */ + var sidx = p.read_shift(2); + s = ((o.dense ? [] : {})); + range.s.c = p.read_shift(2); + range.e.c = p.read_shift(2); + range.s.r = p.read_shift(4); + range.e.r = p.read_shift(4); + p.l += 4; + if(p.l + 2 < p.length) { + cnt = p.read_shift(2); + type = p.read_shift(1); + sname = cnt == 0 ? "" : p.read_shift(cnt, 'cstr'); + } + if(!sname) sname = XLSX.utils.encode_col(sidx); + /* TODO: backfill empty sheets */ + } break; + case 0x0602: { /* EOS */ + /* NOTE: QP valid range A1:IV1000000 */ + if(range.s.c > 0xFF || range.s.r > 999999) break; + if(range.e.c < range.s.c) range.e.c = range.s.c; + if(range.e.r < range.s.r) range.e.r = range.s.r; + s["!ref"] = encode_range(range); + book_append_sheet(wb, s, sname); // TODO: a barrel roll + } break; + + case 0x0A01: { /* COL (like XLS Row, modulo the layout transposition) */ + C = p.read_shift(2); + if(range.e.c < C) range.e.c = C; + if(range.s.c > C) range.s.c = C; + R = p.read_shift(4); + if(range.s.r > R) range.s.r = R; + R = p.read_shift(4); + if(range.e.r < R) range.e.r = R; + } break; + + case 0x0C01: { /* MulCells (like XLS MulRK, but takes advantage of common column data patterns) */ + R = p.read_shift(4), cnt = p.read_shift(4); + if(range.s.r > R) range.s.r = R; + if(range.e.r < R + cnt - 1) range.e.r = R + cnt - 1; + while(p.l < p.length) { + var cell = { t: "z" }; + var flags = p.read_shift(1); + if(flags & 0x80) p.l += 2; + var mul = (flags & 0x40) ? p.read_shift(2) - 1: 0; + switch(flags & 0x1F) { + case 1: break; + case 2: cell = { t: "n", v: p.read_shift(2) }; break; + case 3: cell = { t: "n", v: p.read_shift(2, 'i') }; break; + case 5: cell = { t: "n", v: p.read_shift(8, 'f') }; break; + case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break; + case 8: cell = { t: "n", v: p.read_shift(8, 'f') }; p.l += 2; /* cell.f = formulae[p.read_shift(4)]; */ p.l += 4; break; + default: throw "Unrecognized QPW cell type " + (flags & 0x1F); + } + var delta = 0; + if(flags & 0x20) switch(flags & 0x1F) { + case 2: delta = p.read_shift(2); break; + case 3: delta = p.read_shift(2, 'i'); break; + case 7: delta = p.read_shift(2); break; + default: throw "Unsupported delta for QPW cell type " + (flags & 0x1F); + } + if(!(!o.sheetStubs && cell.t == "z")) { + if(Array.isArray(s)) { + if(!s[R]) s[R] = []; + s[R][C] = cell; + } else s[encode_cell({r:R, c:C})] = cell; + } + ++R; --cnt; + while(mul-- > 0 && cnt >= 0) { + if(flags & 0x20) switch(flags & 0x1F) { + case 2: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; break; + case 3: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; if(cell.v > 0x7FFF) cell.v -= 0x10000; break; + case 7: cell = { t: "s", v: SST[type = (type + delta) >>> 0] }; break; + default: throw "Cannot apply delta for QPW cell type " + (flags & 0x1F); + } else switch(flags & 0x1F) { + case 1: cell = { t: "z" }; break; + case 2: cell = { t: "n", v: p.read_shift(2) }; break; + case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break; + default: throw "Cannot apply repeat for QPW cell type " + (flags & 0x1F); + } + if(!(!o.sheetStubs && cell.t == "z")) { + if(Array.isArray(s)) { + if(!s[R]) s[R] = []; + s[R][C] = cell; + } else s[encode_cell({r:R, c:C})] = cell; + } + ++R; --cnt; + } + } + } break; + + default: break; + } + d.l += length; + } + return wb; + } + return { sheet_to_wk1: sheet_to_wk1, book_to_wk3: book_to_wk3, @@ -9480,14 +9654,14 @@ function parse_si(x, opts) { /* 18.4.12 t ST_Xstring (Plaintext String) */ // TODO: is whitespace actually valid here? if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) { - z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||"")); + z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""), true); z.r = utf8read(x); if(html) z.h = escapehtml(z.t); } /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((/*y = */x.match(sirregex))) { z.r = utf8read(x); - z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")), true); if(html) z.h = rs_to_html(parse_rs(z.r)); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ @@ -9919,27 +10093,33 @@ var RTF = (function() { var o = opts || {}; var ws = o.dense ? ([]) : ({}); - var rows = str.match(/\\trowd.*?\\row\b/g); + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); if(!rows.length) throw new Error("RTF missing table"); var range = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}); rows.forEach(function(rowtf, R) { if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\\w+\b/g; + var rtfre = /\\[\w\-]+\b/g; var last_index = 0; var res; var C = -1; + var payload = []; while((res = rtfre.exec(rowtf))) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if(data.charCodeAt(0) == 0x20) data = data.slice(1); + if(data.length) payload.push(data); switch(res[0]) { case "\\cell": - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data[0] == " ") data = data.slice(1); ++C; - if(data.length) { + if(payload.length) { // TODO: value parsing, including codepage adjustments - var cell = {v: data, t:"s"}; + var cell = {v: payload.join(""), t:"s"}; if(Array.isArray(ws)) ws[R][C] = cell; else ws[encode_cell({r:R, c:C})] = cell; } + payload = []; + break; + case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" + payload.push("\n"); break; } last_index = rtfre.lastIndex; @@ -9965,7 +10145,7 @@ var RTF = (function() { var coord = encode_cell({r:R,c:C}); cell = dense ? (ws[R]||[])[C]: ws[coord]; if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w))); + o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); o.push("\\cell"); } o.push("\\pard\\intbl\\row"); @@ -14767,7 +14947,7 @@ function parse_ws_xml_sheetviews(data, wb) { // $FlowIgnore if(+tag.zoomScale) wb.Views[i].zoom = +tag.zoomScale; // $FlowIgnore - if(parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true; + if(tag.rightToLeft && parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true; }); } function write_ws_xml_sheetviews(ws, opts, idx, wb) { @@ -14859,7 +15039,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) { if(opts.sheetRows && opts.sheetRows < tagr) continue; rowobj = {}; rowrite = false; if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); } - if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; } + if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; } if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; } if(rowrite) rows[tagr-1] = rowobj; } @@ -14876,7 +15056,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) { if(opts && opts.cellStyles) { rowobj = {}; rowrite = false; if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); } - if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; } + if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; } if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; } if(rowrite) rows[tagr-1] = rowobj; } @@ -14909,7 +15089,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) { if(opts.cellFormula) { if((cref=d.match(match_f))!= null && cref[1] !== '') { /* TODO: match against XLSXFutureFunctions */ - p.f=unescapexml(utf8read(cref[1])).replace(/\r\n/g, "\n"); + p.f=unescapexml(utf8read(cref[1]), true); if(!opts.xlfn) p.f = _xlfn(p.f); if(cref[0].indexOf('t="array"') > -1) { p.F = (d.match(refregex)||[])[1]; @@ -14963,7 +15143,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) { break; case 'str': p.t = "s"; - p.v = (p.v!=null) ? utf8read(p.v) : ''; + p.v = (p.v!=null) ? unescapexml(utf8read(p.v), true) : ''; if(opts.cellHTML) p.h = escapehtml(p.v); break; case 'inlineStr': @@ -15978,6 +16158,8 @@ function parse_ws_bin(data, _opts, idx, rels, wb, themes, styles) { /* TODO: something useful -- this is a stub */ function write_ws_bin_cell(ba, cell, R, C, opts, ws, last_seen) { + var o = ({r:R, c:C}); + if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]); if(cell.v === undefined) return false; var vv = ""; switch(cell.t) { @@ -15991,11 +16173,9 @@ function write_ws_bin_cell(ba, cell, R, C, opts, ws, last_seen) { case 'n': case 'e': vv = ''+cell.v; break; default: vv = cell.v; break; } - var o = ({r:R, c:C}); /* TODO: cell style */ o.s = get_cell_style(opts.cellXfs, cell, opts); if(cell.l) ws['!links'].push([encode_cell(o), cell.l]); - if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]); switch(cell.t) { case 's': case 'str': if(opts.bookSST) { @@ -23400,8 +23580,8 @@ function write_zip_xlsb(wb, opts) { opts.Strings = []; opts.Strings.Count = 0; opts.Strings.Unique = 0; if(browser_has_Map) opts.revStrings = new Map(); else { opts.revStrings = {}; opts.revStrings.foo = []; delete opts.revStrings.foo; } - var wbext = opts.bookType == "xlsb" ? "bin" : "xml"; - var vbafmt = VBAFMTS.indexOf(opts.bookType) > -1; + var wbext = "bin"; + var vbafmt = true; var ct = new_ct(); fix_write_opts(opts = opts || {}); var zip = zip_new(); @@ -23599,10 +23779,10 @@ f = "docProps/app.xml"; carr[1].forEach(function(c) { if(c.T == true) needtc = true; }); }); if(needtc) { - cf = "xl/threadedComments/threadedComment" + rId + "." + wbext; + cf = "xl/threadedComments/threadedComment" + rId + ".xml"; zip_add_file(zip, cf, write_tcmnt_xml(comments, people, opts)); ct.threadedcomments.push(cf); - add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + "." + wbext, RELS.TCMNT); + add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + ".xml", RELS.TCMNT); } cf = "xl/comments" + rId + "." + wbext;