1
forked from sheetjs/sheetjs

QPW and newline tests

This commit is contained in:
SheetJS 2022-04-25 05:02:14 -04:00
parent fb85dfbedc
commit 694cdcb75a
11 changed files with 836 additions and 75 deletions

@ -1085,7 +1085,7 @@ async function process_RS(stream) {
const data = await process_RS(stream);
/* data is Uint8Array */
const workbook = XLSX.read(data, {type: "array"});
const workbook = XLSX.read(data, {type: 'array'});
```
</details>
@ -4092,6 +4092,7 @@ range limits will be silently truncated:
| Excel 2007+ XML Formats (XLSX/XLSM) | XFD1048576 | 16384 | 1048576 |
| Excel 2007+ Binary Format (XLSB BIFF12) | XFD1048576 | 16384 | 1048576 |
| Numbers 12.0 (NUMBERS) | ALL1000000 | 1000 | 1000000 |
| Quattro Pro 9+ (QPW) | IV1000000 | 256 | 1000000 |
| Excel 97-2004 (XLS BIFF8) | IV65536 | 256 | 65536 |
| Excel 5.0/95 (XLS BIFF5) | IV16384 | 256 | 16384 |
| Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 |

@ -34,6 +34,7 @@ var WK_ = /*#__PURE__*/(function() {
var refguess = {s: {r:0, c:0}, e: {r:0, c:0} };
var sheetRows = o.sheetRows || 0;
if(d[4] == 0x51 && d[5] == 0x50 && d[6] == 0x57) return qpw_to_workbook_buf(d, opts);
if(d[2] == 0x00) {
if(d[3] == 0x08 || d[3] == 0x09) {
if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file");
@ -840,6 +841,144 @@ var WK_ = /*#__PURE__*/(function() {
/*::[*/0x6F44/*::]*/: { n:"??" },
/*::[*/0xFFFF/*::]*/: { n:"" }
};
/* QPW uses a different set of record types */
function qpw_to_workbook_buf(d, opts)/*:Workbook*/ {
prep_blob(d, 0);
var o = opts || {};
if(DENSE != null && o.dense == null) o.dense = DENSE;
var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/);
var SST = [], sname = "", formulae = [];
var range = {s:{r:-1,c:-1}, e:{r:-1,c:-1}};
var cnt = 0, type = 0, C = 0, R = 0;
var wb = { SheetNames: [], Sheets: {} };
outer: while(d.l < d.length) {
var RT = d.read_shift(2), length = d.read_shift(2);
var p = d.slice(d.l, d.l + length);
prep_blob(p, 0);
switch(RT) {
case 0x01: /* BOF */
if(p.read_shift(4) != 0x39575051) throw "Bad QPW9 BOF!";
break;
case 0x02: /* EOF */ break outer;
/* TODO: The behavior here should be consistent with Numbers: QP Notebook ~ .TN.SheetArchive, QP Sheet ~ .TST.TSTable */
case 0x0401: /* BON */ break;
case 0x0402: /* EON */ /* TODO: backfill missing sheets based on BON cnt */ break;
case 0x0407: { /* SST */
p.l += 12;
while(p.l < p.length) {
cnt = p.read_shift(2);
type = p.read_shift(1);
SST.push(p.read_shift(cnt, 'cstr'));
}
} break;
case 0x0408: { /* FORMULAE */
//p.l += 12;
//while(p.l < p.length) {
// cnt = p.read_shift(2);
// formulae.push(p.slice(p.l, p.l + cnt + 1)); p.l += cnt + 1;
//}
} break;
case 0x0601: { /* BOS */
var sidx = p.read_shift(2);
s = ((o.dense ? [] : {})/*:any*/);
range.s.c = p.read_shift(2);
range.e.c = p.read_shift(2);
range.s.r = p.read_shift(4);
range.e.r = p.read_shift(4);
p.l += 4;
if(p.l + 2 < p.length) {
cnt = p.read_shift(2);
type = p.read_shift(1);
sname = cnt == 0 ? "" : p.read_shift(cnt, 'cstr');
}
if(!sname) sname = XLSX.utils.encode_col(sidx);
/* TODO: backfill empty sheets */
} break;
case 0x0602: { /* EOS */
/* NOTE: QP valid range A1:IV1000000 */
if(range.s.c > 0xFF || range.s.r > 999999) break;
if(range.e.c < range.s.c) range.e.c = range.s.c;
if(range.e.r < range.s.r) range.e.r = range.s.r;
s["!ref"] = encode_range(range);
book_append_sheet(wb, s, sname); // TODO: a barrel roll
} break;
case 0x0A01: { /* COL (like XLS Row, modulo the layout transposition) */
C = p.read_shift(2);
if(range.e.c < C) range.e.c = C;
if(range.s.c > C) range.s.c = C;
R = p.read_shift(4);
if(range.s.r > R) range.s.r = R;
R = p.read_shift(4);
if(range.e.r < R) range.e.r = R;
} break;
case 0x0C01: { /* MulCells (like XLS MulRK, but takes advantage of common column data patterns) */
R = p.read_shift(4), cnt = p.read_shift(4);
if(range.s.r > R) range.s.r = R;
if(range.e.r < R + cnt - 1) range.e.r = R + cnt - 1;
while(p.l < p.length) {
var cell = { t: "z" };
var flags = p.read_shift(1);
if(flags & 0x80) p.l += 2;
var mul = (flags & 0x40) ? p.read_shift(2) - 1: 0;
switch(flags & 0x1F) {
case 1: break;
case 2: cell = { t: "n", v: p.read_shift(2) }; break;
case 3: cell = { t: "n", v: p.read_shift(2, 'i') }; break;
case 5: cell = { t: "n", v: p.read_shift(8, 'f') }; break;
case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break;
case 8: cell = { t: "n", v: p.read_shift(8, 'f') }; p.l += 2; /* cell.f = formulae[p.read_shift(4)]; */ p.l += 4; break;
default: throw "Unrecognized QPW cell type " + (flags & 0x1F);
}
var delta = 0;
if(flags & 0x20) switch(flags & 0x1F) {
case 2: delta = p.read_shift(2); break;
case 3: delta = p.read_shift(2, 'i'); break;
case 7: delta = p.read_shift(2); break;
default: throw "Unsupported delta for QPW cell type " + (flags & 0x1F);
}
if(!(!o.sheetStubs && cell.t == "z")) {
if(Array.isArray(s)) {
if(!s[R]) s[R] = [];
s[R][C] = cell;
} else s[encode_cell({r:R, c:C})] = cell;
}
++R; --cnt;
while(mul-- > 0 && cnt >= 0) {
if(flags & 0x20) switch(flags & 0x1F) {
case 2: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; break;
case 3: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; if(cell.v > 0x7FFF) cell.v -= 0x10000; break;
case 7: cell = { t: "s", v: SST[type = (type + delta) >>> 0] }; break;
default: throw "Cannot apply delta for QPW cell type " + (flags & 0x1F);
} else switch(flags & 0x1F) {
case 1: cell = { t: "z" }; break;
case 2: cell = { t: "n", v: p.read_shift(2) }; break;
case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break;
default: throw "Cannot apply repeat for QPW cell type " + (flags & 0x1F);
}
if(!(!o.sheetStubs && cell.t == "z")) {
if(Array.isArray(s)) {
if(!s[R]) s[R] = [];
s[R][C] = cell;
} else s[encode_cell({r:R, c:C})] = cell;
}
++R; --cnt;
}
}
} break;
default: break;
}
d.l += length;
}
return wb;
}
return {
sheet_to_wk1: sheet_to_wk1,
book_to_wk3: book_to_wk3,

@ -424,7 +424,7 @@ async function process_RS(stream) {
const out = new Uint8Array(buffers.reduce((acc, v) => acc + v.length, 0));
let off = 0;
for(const u8 of arr) {
for(const u8 of buffers) {
out.set(u8, off);
off += u8.length;
}
@ -434,7 +434,7 @@ async function process_RS(stream) {
const data = await process_RS(stream);
/* data is Uint8Array */
const workbook = XLSX.read(data);
const workbook = XLSX.read(data, {type: 'array'});
```
</details>

@ -43,6 +43,7 @@ range limits will be silently truncated:
| Excel 2007+ XML Formats (XLSX/XLSM) | XFD1048576 | 16384 | 1048576 |
| Excel 2007+ Binary Format (XLSB BIFF12) | XFD1048576 | 16384 | 1048576 |
| Numbers 12.0 (NUMBERS) | ALL1000000 | 1000 | 1000000 |
| Quattro Pro 9+ (QPW) | IV1000000 | 256 | 1000000 |
| Excel 97-2004 (XLS BIFF8) | IV65536 | 256 | 65536 |
| Excel 5.0/95 (XLS BIFF5) | IV16384 | 256 | 16384 |
| Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 |

@ -1025,7 +1025,7 @@ async function process_RS(stream) {
const out = new Uint8Array(buffers.reduce((acc, v) => acc + v.length, 0));
let off = 0;
for(const u8 of arr) {
for(const u8 of buffers) {
out.set(u8, off);
off += u8.length;
}
@ -1035,7 +1035,7 @@ async function process_RS(stream) {
const data = await process_RS(stream);
/* data is Uint8Array */
const workbook = XLSX.read(data);
const workbook = XLSX.read(data, {type: 'array'});
```
@ -2079,7 +2079,7 @@ Parse options are described in the [Parsing Options](#parsing-options) section.
`XLSX.writeFile(wb, filename, write_opts)` attempts to write `wb` to `filename`.
In browser-based environments, it will attempt to force a client-side download.
`XLSX.writeFileAsync(wb, filename, o, cb)` attempts to write `wb` to `filename`.
`XLSX.writeFileAsync(filename, wb, o, cb)` attempts to write `wb` to `filename`.
If `o` is omitted, the writer will use the third argument as the callback.
`XLSX.stream` contains a set of streaming write functions.
@ -3867,6 +3867,7 @@ range limits will be silently truncated:
| Excel 2007+ XML Formats (XLSX/XLSM) | XFD1048576 | 16384 | 1048576 |
| Excel 2007+ Binary Format (XLSB BIFF12) | XFD1048576 | 16384 | 1048576 |
| Numbers 12.0 (NUMBERS) | ALL1000000 | 1000 | 1000000 |
| Quattro Pro 9+ (QPW) | IV1000000 | 256 | 1000000 |
| Excel 97-2004 (XLS BIFF8) | IV65536 | 256 | 65536 |
| Excel 5.0/95 (XLS BIFF5) | IV16384 | 256 | 16384 |
| Excel 4.0 (XLS BIFF4) | IV16384 | 256 | 16384 |

112
test.js

@ -2522,6 +2522,118 @@ describe('corner cases', function() {
});
});
});
it('should handle \\r and \\n', function() {
var base = "./test_files/crlf/";
[
"CRLFR9.123",
"CRLFR9.WK1",
"CRLFR9.WK3",
"CRLFR9.WK4",
"CRLFR9.XLS",
"CRLFR9_4.XLS",
"CRLFR9_5.XLS",
"CRLFX5_2.XLS",
"CRLFX5_3.XLS",
"CRLFX5_4.XLS",
"CRLFX5_5.XLS",
"crlf.csv",
"crlf.fods",
"crlf.htm",
"crlf.numbers",
"crlf.ods",
"crlf.rtf",
"crlf.slk",
"crlf.xls",
"crlf.xlsb",
"crlf.xlsx",
"crlf.xml",
"crlf5.xls",
"crlfq9.qpw",
"crlfq9.wb1",
"crlfq9.wb2",
"crlfq9.wb3",
"crlfq9.wk1",
"crlfq9.wk3",
"crlfq9.wk4",
"crlfq9.wks",
"crlfq9.wq1",
"crlfw4_2.wks",
"crlfw4_3.wks",
"crlfw4_4.wks"
].map(function(path) { return base + path; }).forEach(function(w) {
var wb = X.read(fs.readFileSync(w), {type:TYPE});
var ws = wb.Sheets[wb.SheetNames[0]];
var B1 = get_cell(ws, "B1"), B2 = get_cell(ws, "B2");
var lio = w.match(/\.[^\.]*$/).index, stem = w.slice(0, lio).toLowerCase(), ext = w.slice(lio + 1).toLowerCase()
switch(ext) {
case 'fm3': break;
case '123':
assert.equal(B1.v, "abc\ndef");
// TODO: parse formula // assert.equal(B1.v, "abc\r\ndef");
break;
case 'qpw':
case 'wb1':
case 'wb2':
case 'wb3':
case 'wk1':
case 'wk3':
case 'wk4':
case 'wq1':
assert(B1.v == "abcdef" || B1.v == "abc\ndef");
// TODO: formula -> string values
if(B2 && B2.t != "e" && B2.v != "") assert(B2.v == "abcdef" || B2.v == "abc\r\ndef");
break;
case 'wks':
if(stem.match(/w4/)) {
assert.equal(B1.v, "abc\ndef");
assert(!B2 || B2.t == "z"); // Works4 did not support CODE / CHAR
} else if(stem.match(/q9/)) {
assert.equal(B1.v, "abcdef");
assert.equal(B2.v, "abc\r\ndef");
} else {
assert.equal(B1.v, "abc\ndef");
assert.equal(B2.v, "abc\r\ndef");
}
break;
case 'xls':
if(stem.match(/CRLFR9/i)) {
assert.equal(B1.v, "abc\r\ndef");
} else {
assert.equal(B1.v, "abc\ndef");
}
assert.equal(B2.v, "abc\r\ndef");
break;
case 'rtf':
case 'htm':
assert.equal(B1.v, "abc\ndef");
assert.equal(B2.v, "abc\n\ndef");
break;
case 'xlsx':
case 'xlsb':
case 'xml':
case 'slk':
case 'csv':
assert.equal(B1.v, "abc\ndef");
assert.equal(B2.v, "abc\r\ndef");
break;
case 'fods':
case 'ods':
assert.equal(B1.v, "abc\nDef");
assert.equal(B2.v, "abc\r\ndef");
break;
case 'numbers':
assert.equal(B1.v, "abc\ndef");
// TODO: B2 should be a formula error
break;
default: throw ext;
}
});
});
});
describe('encryption', function() {

@ -1 +1 @@
Subproject commit 57645de9ec3abd7c5ffd94d2eeb26c3a1074e507
Subproject commit 59a810302a68b26d6c9c3f9c4e7f499b0fdd6d37

112
tests/core.js generated

@ -2522,6 +2522,118 @@ describe('corner cases', function() {
});
});
});
it('should handle \\r and \\n', function() {
var base = "./test_files/crlf/";
[
"CRLFR9.123",
"CRLFR9.WK1",
"CRLFR9.WK3",
"CRLFR9.WK4",
"CRLFR9.XLS",
"CRLFR9_4.XLS",
"CRLFR9_5.XLS",
"CRLFX5_2.XLS",
"CRLFX5_3.XLS",
"CRLFX5_4.XLS",
"CRLFX5_5.XLS",
"crlf.csv",
"crlf.fods",
"crlf.htm",
"crlf.numbers",
"crlf.ods",
"crlf.rtf",
"crlf.slk",
"crlf.xls",
"crlf.xlsb",
"crlf.xlsx",
"crlf.xml",
"crlf5.xls",
"crlfq9.qpw",
"crlfq9.wb1",
"crlfq9.wb2",
"crlfq9.wb3",
"crlfq9.wk1",
"crlfq9.wk3",
"crlfq9.wk4",
"crlfq9.wks",
"crlfq9.wq1",
"crlfw4_2.wks",
"crlfw4_3.wks",
"crlfw4_4.wks"
].map(function(path) { return base + path; }).forEach(function(w) {
var wb = X.read(fs.readFileSync(w), {type:TYPE});
var ws = wb.Sheets[wb.SheetNames[0]];
var B1 = get_cell(ws, "B1"), B2 = get_cell(ws, "B2");
var lio = w.match(/\.[^\.]*$/).index, stem = w.slice(0, lio).toLowerCase(), ext = w.slice(lio + 1).toLowerCase()
switch(ext) {
case 'fm3': break;
case '123':
assert.equal(B1.v, "abc\ndef");
// TODO: parse formula // assert.equal(B1.v, "abc\r\ndef");
break;
case 'qpw':
case 'wb1':
case 'wb2':
case 'wb3':
case 'wk1':
case 'wk3':
case 'wk4':
case 'wq1':
assert(B1.v == "abcdef" || B1.v == "abc\ndef");
// TODO: formula -> string values
if(B2 && B2.t != "e" && B2.v != "") assert(B2.v == "abcdef" || B2.v == "abc\r\ndef");
break;
case 'wks':
if(stem.match(/w4/)) {
assert.equal(B1.v, "abc\ndef");
assert(!B2 || B2.t == "z"); // Works4 did not support CODE / CHAR
} else if(stem.match(/q9/)) {
assert.equal(B1.v, "abcdef");
assert.equal(B2.v, "abc\r\ndef");
} else {
assert.equal(B1.v, "abc\ndef");
assert.equal(B2.v, "abc\r\ndef");
}
break;
case 'xls':
if(stem.match(/CRLFR9/i)) {
assert.equal(B1.v, "abc\r\ndef");
} else {
assert.equal(B1.v, "abc\ndef");
}
assert.equal(B2.v, "abc\r\ndef");
break;
case 'rtf':
case 'htm':
assert.equal(B1.v, "abc\ndef");
assert.equal(B2.v, "abc\n\ndef");
break;
case 'xlsx':
case 'xlsb':
case 'xml':
case 'slk':
case 'csv':
assert.equal(B1.v, "abc\ndef");
assert.equal(B2.v, "abc\r\ndef");
break;
case 'fods':
case 'ods':
assert.equal(B1.v, "abc\nDef");
assert.equal(B2.v, "abc\r\ndef");
break;
case 'numbers':
assert.equal(B1.v, "abc\ndef");
// TODO: B2 should be a formula error
break;
default: throw ext;
}
});
});
});
describe('encryption', function() {

@ -175,3 +175,38 @@
./test_files/numbers/types_61.numbers
./test_files/numbers/Untitled.key
./test_files/numbers/Untitled.pages
./test_files/crlf/CRLFR9.123
./test_files/crlf/CRLFR9.WK1
./test_files/crlf/CRLFR9.WK3
./test_files/crlf/CRLFR9.WK4
./test_files/crlf/CRLFR9.XLS
./test_files/crlf/CRLFR9_4.XLS
./test_files/crlf/CRLFR9_5.XLS
./test_files/crlf/CRLFX5_2.XLS
./test_files/crlf/CRLFX5_3.XLS
./test_files/crlf/CRLFX5_4.XLS
./test_files/crlf/CRLFX5_5.XLS
./test_files/crlf/crlf.csv
./test_files/crlf/crlf.fods
./test_files/crlf/crlf.htm
./test_files/crlf/crlf.numbers
./test_files/crlf/crlf.ods
./test_files/crlf/crlf.rtf
./test_files/crlf/crlf.slk
./test_files/crlf/crlf.xls
./test_files/crlf/crlf.xlsb
./test_files/crlf/crlf.xlsx
./test_files/crlf/crlf.xml
./test_files/crlf/crlf5.xls
./test_files/crlf/crlfq9.qpw
./test_files/crlf/crlfq9.wb1
./test_files/crlf/crlfq9.wb2
./test_files/crlf/crlfq9.wb3
./test_files/crlf/crlfq9.wk1
./test_files/crlf/crlfq9.wk3
./test_files/crlf/crlfq9.wk4
./test_files/crlf/crlfq9.wks
./test_files/crlf/crlfq9.wq1
./test_files/crlf/crlfw4_2.wks
./test_files/crlf/crlfw4_3.wks
./test_files/crlf/crlfw4_4.wks

@ -3592,15 +3592,19 @@ var rencoding = /*#__PURE__*/evert(encodings);
var unescapexml/*:StringConv*/ = /*#__PURE__*/(function() {
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/ig, coderegex = /_x([\da-fA-F]{4})_/ig;
return function unescapexml(text/*:string*/)/*:string*/ {
function raw_unescapexml(text/*:string*/)/*:string*/ {
var s = text + '', i = s.indexOf("<![CDATA[");
if(i == -1) return s.replace(encregex, function($$, $1) { return encodings[$$]||String.fromCharCode(parseInt($1,$$.indexOf("x")>-1?16:10))||$$; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
var j = s.indexOf("]]>");
return unescapexml(s.slice(0, i)) + s.slice(i+9,j) + unescapexml(s.slice(j+3));
return raw_unescapexml(s.slice(0, i)) + s.slice(i+9,j) + raw_unescapexml(s.slice(j+3));
}
return function unescapexml(text/*:string*/, xlsx/*:boolean*/) {
var out = raw_unescapexml(text);
return xlsx ? out.replace(/\r\n/g, "\n") : out;
};
})();
var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f]/g;
var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f\uFFFE-\uFFFF]/g;
function escapexml(text/*:string*/)/*:string*/{
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; }).replace(charegex,function(s) { return "_x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + "_";});
@ -3626,12 +3630,14 @@ var xlml_fixstr/*:StringConv*/ = /*#__PURE__*/(function() {
})();
function xlml_unfixstr(str/*:string*/)/*:string*/ { return str.replace(/(\r\n|[\r\n])/g,"\&#10;"); }
/* note: xsd:boolean valid values: true / 1 / false / 0 */
function parsexmlbool(value/*:any*/)/*:boolean*/ {
switch(value) {
case 1: case true: case '1': case 'true': case 'TRUE': return true;
/* case '0': case 'false': case 'FALSE':*/
default: return false;
case 1: case true: case '1': case 'true': return true;
case 0: case false: case '0': case 'false': return false;
//default: throw new Error("Invalid xsd:boolean " + value);
}
return false;
}
function utf8reada(orig/*:string*/)/*:string*/ {
@ -5992,6 +5998,7 @@ function parse_PropertySet(blob, PIDSI) {
if(fail) throw new Error("Read Error: Expected address " + Props[i][1] + ' at ' + blob.l + ' :' + i);
}
if(PIDSI) {
if(Props[i][0] == 0 && Props.length > i+1 && Props[i][1] == Props[i+1][1]) continue; // R9
var piddsi = PIDSI[Props[i][0]];
PropH[piddsi.n] = parse_TypedPropertyValue(blob, piddsi.t, {raw:true});
if(piddsi.p === 'version') PropH[piddsi.n] = String(PropH[piddsi.n] >> 16) + "." + ("0000" + String(PropH[piddsi.n] & 0xFFFF)).slice(-4);
@ -8449,10 +8456,9 @@ var PRN = /*#__PURE__*/(function() {
else sep = guess_sep(str.slice(0,1024));
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0, startcc=str.charCodeAt(0);
str = str.replace(/\r\n/mg, "\n");
var _re/*:?RegExp*/ = o.dateNF != null ? dateNF_regex(o.dateNF) : null;
function finish_cell() {
var s = str.slice(start, end);
var s = str.slice(start, end); if(s.slice(-1) == "\r") s = s.slice(0, -1);
var cell = ({}/*:any*/);
if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
if(s.length === 0) cell.t = 'z';
@ -8487,7 +8493,11 @@ var PRN = /*#__PURE__*/(function() {
}
outer: for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) {
case 0x22: if(startcc === 0x22) instr = !instr; break;
case sepcc: case 0x0a: case 0x0d: if(!instr && finish_cell()) break outer; break;
case 0x0d:
if(instr) break;
if(str.charCodeAt(end+1) == 0x0a) ++end;
/* falls through */
case sepcc: case 0x0a: if(!instr && finish_cell()) break outer; break;
default: break;
}
if(end - start > 0) finish_cell();
@ -8603,6 +8613,7 @@ var WK_ = /*#__PURE__*/(function() {
var refguess = {s: {r:0, c:0}, e: {r:0, c:0} };
var sheetRows = o.sheetRows || 0;
if(d[4] == 0x51 && d[5] == 0x50 && d[6] == 0x57) return qpw_to_workbook_buf(d, opts);
if(d[2] == 0x00) {
if(d[3] == 0x08 || d[3] == 0x09) {
if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file");
@ -8616,12 +8627,17 @@ var WK_ = /*#__PURE__*/(function() {
o.vers = val;
if(val >= 0x1000) o.qpro = true;
break;
case 0xFF: /* BOF (works 3+) */
o.vers = val;
o.works = true;
break;
case 0x06: refguess = val; break; /* RANGE */
case 0xCC: if(val) next_n = val; break; /* SHEETNAMECS */
case 0xDE: next_n = val; break; /* SHEETNAMELP */
case 0x0F: /* LABEL */
case 0x33: /* STRING */
if(!o.qpro) val[1].v = val[1].v.slice(1);
if((!o.qpro && !o.works || RT == 0x33) && val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1);
if(o.works || o.works2) val[1].v = val[1].v.replace(/\r\n/g, "\n");
/* falls through */
case 0x0D: /* INTEGER */
case 0x0E: /* NUMBER */
@ -8655,6 +8671,7 @@ var WK_ = /*#__PURE__*/(function() {
s[val[0].r][val[0].c] = val[1];
} else s[encode_cell(val[0])] = val[1];
break;
case 0x5405: o.works2 = true; break;
default:
}}, o);
} else if(d[2] == 0x1A || d[2] == 0x0E) {
@ -8663,7 +8680,9 @@ var WK_ = /*#__PURE__*/(function() {
lotushopper(d, function(val, R, RT) { switch(RT) {
case 0xCC: n = val; break; /* SHEETNAMECS */
case 0x16: /* LABEL16 */
val[1].v = val[1].v.slice(1);
if(val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1);
// TODO: R9 appears to encode control codes this way -- verify against other versions
val[1].v = val[1].v.replace(/\x0F./g, function($$) { return String.fromCharCode($$.charCodeAt(1) - 0x20); }).replace(/\r\n/g, "\n");
/* falls through */
case 0x17: /* NUMBER17 */
case 0x18: /* NUMBER18 */
@ -8858,6 +8877,9 @@ var WK_ = /*#__PURE__*/(function() {
o[3] = blob.read_shift(1);
o[0].r = blob.read_shift(2);
blob.l+=2;
} else if(opts.works) { // TODO: verify with more complex works3-4 examples
o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2);
o[2] = blob.read_shift(2);
} else {
o[2] = blob.read_shift(1);
o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2);
@ -8893,6 +8915,18 @@ var WK_ = /*#__PURE__*/(function() {
o.write_shift(1, 0);
return o;
}
function parse_STRING(blob, length, opts) {
var tgt = blob.l + length;
var o = parse_cell(blob, length, opts);
o[1].t = 's';
if(opts.vers == 0x5120) {
var len = blob.read_shift(1);
o[1].v = blob.read_shift(len, 'utf8');
return o;
}
o[1].v = blob.read_shift(tgt - blob.l, 'cstr');
return o;
}
function parse_INTEGER(blob, length, opts) {
var o = parse_cell(blob, length, opts);
@ -8951,6 +8985,7 @@ var WK_ = /*#__PURE__*/(function() {
0x33: ["FALSE", 0],
0x34: ["TRUE", 0],
0x46: ["LEN", 1],
0x4A: ["CHAR", 1],
0x50: ["SUM", 69],
0x51: ["AVERAGEA", 69],
0x52: ["COUNTA", 69],
@ -9141,8 +9176,8 @@ var WK_ = /*#__PURE__*/(function() {
}
function parse_FORMULA_28(blob, length) {
var o = parse_NUMBER_27(blob, 14);
blob.l += length - 10; /* TODO: formula */
var o = parse_NUMBER_27(blob, 12);
blob.l += length - 12; /* TODO: formula */
return o;
}
@ -9232,7 +9267,7 @@ var WK_ = /*#__PURE__*/(function() {
/*::[*/0x0030/*::]*/: { n:"UNFORMATTED" },
/*::[*/0x0031/*::]*/: { n:"CURSORW12" },
/*::[*/0x0032/*::]*/: { n:"WINDOW" },
/*::[*/0x0033/*::]*/: { n:"STRING", f:parse_LABEL },
/*::[*/0x0033/*::]*/: { n:"STRING", f:parse_STRING },
/*::[*/0x0037/*::]*/: { n:"PASSWORD" },
/*::[*/0x0038/*::]*/: { n:"LOCKED" },
/*::[*/0x003C/*::]*/: { n:"QUERY" },
@ -9256,6 +9291,7 @@ var WK_ = /*#__PURE__*/(function() {
/*::[*/0x0069/*::]*/: { n:"MRANGES??" },
/*::[*/0x00CC/*::]*/: { n:"SHEETNAMECS", f:parse_SHEETNAMECS },
/*::[*/0x00DE/*::]*/: { n:"SHEETNAMELP", f:parse_SHEETNAMELP },
/*::[*/0x00FF/*::]*/: { n:"BOF", f:parseuint16 },
/*::[*/0xFFFF/*::]*/: { n:"" }
};
@ -9384,6 +9420,144 @@ var WK_ = /*#__PURE__*/(function() {
/*::[*/0x6F44/*::]*/: { n:"??" },
/*::[*/0xFFFF/*::]*/: { n:"" }
};
/* QPW uses a different set of record types */
function qpw_to_workbook_buf(d, opts)/*:Workbook*/ {
prep_blob(d, 0);
var o = opts || {};
if(DENSE != null && o.dense == null) o.dense = DENSE;
var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/);
var SST = [], sname = "", formulae = [];
var range = {s:{r:-1,c:-1}, e:{r:-1,c:-1}};
var cnt = 0, type = 0, C = 0, R = 0;
var wb = { SheetNames: [], Sheets: {} };
outer: while(d.l < d.length) {
var RT = d.read_shift(2), length = d.read_shift(2);
var p = d.slice(d.l, d.l + length);
prep_blob(p, 0);
switch(RT) {
case 0x01: /* BOF */
if(p.read_shift(4) != 0x39575051) throw "Bad QPW9 BOF!";
break;
case 0x02: /* EOF */ break outer;
/* TODO: The behavior here should be consistent with Numbers: QP Notebook ~ .TN.SheetArchive, QP Sheet ~ .TST.TSTable */
case 0x0401: /* BON */ break;
case 0x0402: /* EON */ /* TODO: backfill missing sheets based on BON cnt */ break;
case 0x0407: { /* SST */
p.l += 12;
while(p.l < p.length) {
cnt = p.read_shift(2);
type = p.read_shift(1);
SST.push(p.read_shift(cnt, 'cstr'));
}
} break;
case 0x0408: { /* FORMULAE */
//p.l += 12;
//while(p.l < p.length) {
// cnt = p.read_shift(2);
// formulae.push(p.slice(p.l, p.l + cnt + 1)); p.l += cnt + 1;
//}
} break;
case 0x0601: { /* BOS */
var sidx = p.read_shift(2);
s = ((o.dense ? [] : {})/*:any*/);
range.s.c = p.read_shift(2);
range.e.c = p.read_shift(2);
range.s.r = p.read_shift(4);
range.e.r = p.read_shift(4);
p.l += 4;
if(p.l + 2 < p.length) {
cnt = p.read_shift(2);
type = p.read_shift(1);
sname = cnt == 0 ? "" : p.read_shift(cnt, 'cstr');
}
if(!sname) sname = XLSX.utils.encode_col(sidx);
/* TODO: backfill empty sheets */
} break;
case 0x0602: { /* EOS */
/* NOTE: QP valid range A1:IV1000000 */
if(range.s.c > 0xFF || range.s.r > 999999) break;
if(range.e.c < range.s.c) range.e.c = range.s.c;
if(range.e.r < range.s.r) range.e.r = range.s.r;
s["!ref"] = encode_range(range);
book_append_sheet(wb, s, sname); // TODO: a barrel roll
} break;
case 0x0A01: { /* COL (like XLS Row, modulo the layout transposition) */
C = p.read_shift(2);
if(range.e.c < C) range.e.c = C;
if(range.s.c > C) range.s.c = C;
R = p.read_shift(4);
if(range.s.r > R) range.s.r = R;
R = p.read_shift(4);
if(range.e.r < R) range.e.r = R;
} break;
case 0x0C01: { /* MulCells (like XLS MulRK, but takes advantage of common column data patterns) */
R = p.read_shift(4), cnt = p.read_shift(4);
if(range.s.r > R) range.s.r = R;
if(range.e.r < R + cnt - 1) range.e.r = R + cnt - 1;
while(p.l < p.length) {
var cell = { t: "z" };
var flags = p.read_shift(1);
if(flags & 0x80) p.l += 2;
var mul = (flags & 0x40) ? p.read_shift(2) - 1: 0;
switch(flags & 0x1F) {
case 1: break;
case 2: cell = { t: "n", v: p.read_shift(2) }; break;
case 3: cell = { t: "n", v: p.read_shift(2, 'i') }; break;
case 5: cell = { t: "n", v: p.read_shift(8, 'f') }; break;
case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break;
case 8: cell = { t: "n", v: p.read_shift(8, 'f') }; p.l += 2; /* cell.f = formulae[p.read_shift(4)]; */ p.l += 4; break;
default: throw "Unrecognized QPW cell type " + (flags & 0x1F);
}
var delta = 0;
if(flags & 0x20) switch(flags & 0x1F) {
case 2: delta = p.read_shift(2); break;
case 3: delta = p.read_shift(2, 'i'); break;
case 7: delta = p.read_shift(2); break;
default: throw "Unsupported delta for QPW cell type " + (flags & 0x1F);
}
if(!(!o.sheetStubs && cell.t == "z")) {
if(Array.isArray(s)) {
if(!s[R]) s[R] = [];
s[R][C] = cell;
} else s[encode_cell({r:R, c:C})] = cell;
}
++R; --cnt;
while(mul-- > 0 && cnt >= 0) {
if(flags & 0x20) switch(flags & 0x1F) {
case 2: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; break;
case 3: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; if(cell.v > 0x7FFF) cell.v -= 0x10000; break;
case 7: cell = { t: "s", v: SST[type = (type + delta) >>> 0] }; break;
default: throw "Cannot apply delta for QPW cell type " + (flags & 0x1F);
} else switch(flags & 0x1F) {
case 1: cell = { t: "z" }; break;
case 2: cell = { t: "n", v: p.read_shift(2) }; break;
case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break;
default: throw "Cannot apply repeat for QPW cell type " + (flags & 0x1F);
}
if(!(!o.sheetStubs && cell.t == "z")) {
if(Array.isArray(s)) {
if(!s[R]) s[R] = [];
s[R][C] = cell;
} else s[encode_cell({r:R, c:C})] = cell;
}
++R; --cnt;
}
}
} break;
default: break;
}
d.l += length;
}
return wb;
}
return {
sheet_to_wk1: sheet_to_wk1,
book_to_wk3: book_to_wk3,
@ -9570,14 +9744,14 @@ function parse_si(x, opts) {
/* 18.4.12 t ST_Xstring (Plaintext String) */
// TODO: is whitespace actually valid here?
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""));
z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""), true);
z.r = utf8read(x);
if(html) z.h = escapehtml(z.t);
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((/*y = */x.match(sirregex))) {
z.r = utf8read(x);
z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")));
z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")), true);
if(html) z.h = rs_to_html(parse_rs(z.r));
}
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */
@ -10010,27 +10184,33 @@ var RTF = /*#__PURE__*/(function() {
var o = opts || {};
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var rows = str.match(/\\trowd.*?\\row\b/g);
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\\w+\b/g;
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data[0] == " ") data = data.slice(1);
++C;
if(data.length) {
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: data, t:"s"};
var cell = {v: payload.join(""), t:"s"};
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
@ -10056,7 +10236,7 @@ var RTF = /*#__PURE__*/(function() {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)));
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
@ -14861,7 +15041,7 @@ function parse_ws_xml_sheetviews(data, wb/*:WBWBProps*/) {
// $FlowIgnore
if(+tag.zoomScale) wb.Views[i].zoom = +tag.zoomScale;
// $FlowIgnore
if(parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true;
if(tag.rightToLeft && parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true;
});
}
function write_ws_xml_sheetviews(ws, opts, idx, wb)/*:string*/ {
@ -14953,7 +15133,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th
if(opts.sheetRows && opts.sheetRows < tagr) continue;
rowobj = {}; rowrite = false;
if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); }
if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; }
if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; }
if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; }
if(rowrite) rows[tagr-1] = rowobj;
}
@ -14970,7 +15150,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th
if(opts && opts.cellStyles) {
rowobj = {}; rowrite = false;
if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); }
if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; }
if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; }
if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; }
if(rowrite) rows[tagr-1] = rowobj;
}
@ -15003,7 +15183,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th
if(opts.cellFormula) {
if((cref=d.match(match_f))!= null && /*::cref != null && */cref[1] !== '') {
/* TODO: match against XLSXFutureFunctions */
p.f=unescapexml(utf8read(cref[1])).replace(/\r\n/g, "\n");
p.f=unescapexml(utf8read(cref[1]), true);
if(!opts.xlfn) p.f = _xlfn(p.f);
if(/*::cref != null && cref[0] != null && */cref[0].indexOf('t="array"') > -1) {
p.F = (d.match(refregex)||[])[1];
@ -15057,7 +15237,7 @@ return function parse_ws_xml_data(sdata/*:string*/, s, opts, guess/*:Range*/, th
break;
case 'str':
p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : '';
p.v = (p.v!=null) ? unescapexml(utf8read(p.v), true) : '';
if(opts.cellHTML) p.h = escapehtml(p.v);
break;
case 'inlineStr':
@ -16073,6 +16253,8 @@ function parse_ws_bin(data, _opts, idx, rels, wb/*:WBWBProps*/, themes, styles)/
/* TODO: something useful -- this is a stub */
function write_ws_bin_cell(ba/*:BufArray*/, cell/*:Cell*/, R/*:number*/, C/*:number*/, opts, ws/*:Worksheet*/, last_seen/*:boolean*/)/*:boolean*/ {
var o/*:any*/ = ({r:R, c:C}/*:any*/);
if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]);
if(cell.v === undefined) return false;
var vv = "";
switch(cell.t) {
@ -16086,11 +16268,9 @@ function write_ws_bin_cell(ba/*:BufArray*/, cell/*:Cell*/, R/*:number*/, C/*:num
case 'n': case 'e': vv = ''+cell.v; break;
default: vv = cell.v; break;
}
var o/*:any*/ = ({r:R, c:C}/*:any*/);
/* TODO: cell style */
o.s = get_cell_style(opts.cellXfs, cell, opts);
if(cell.l) ws['!links'].push([encode_cell(o), cell.l]);
if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]);
switch(cell.t) {
case 's': case 'str':
if(opts.bookSST) {
@ -23512,8 +23692,8 @@ function write_zip_xlsb(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ {
opts.Strings = /*::((*/[]/*:: :any):SST)*/; opts.Strings.Count = 0; opts.Strings.Unique = 0;
if(browser_has_Map) opts.revStrings = new Map();
else { opts.revStrings = {}; opts.revStrings.foo = []; delete opts.revStrings.foo; }
var wbext = opts.bookType == "xlsb" ? "bin" : "xml";
var vbafmt = VBAFMTS.indexOf(opts.bookType) > -1;
var wbext = "bin";
var vbafmt = true;
var ct = new_ct();
fix_write_opts(opts = opts || {});
var zip = zip_new();
@ -23713,10 +23893,10 @@ function write_zip_xlsx(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ {
carr[1].forEach(function(c) { if(c.T == true) needtc = true; });
});
if(needtc) {
cf = "xl/threadedComments/threadedComment" + rId + "." + wbext;
cf = "xl/threadedComments/threadedComment" + rId + ".xml";
zip_add_file(zip, cf, write_tcmnt_xml(comments, people, opts));
ct.threadedcomments.push(cf);
add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + "." + wbext, RELS.TCMNT);
add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + ".xml", RELS.TCMNT);
}
cf = "xl/comments" + rId + "." + wbext;

248
xlsx.js generated

@ -3518,15 +3518,19 @@ var rencoding = evert(encodings);
var unescapexml = (function() {
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/ig, coderegex = /_x([\da-fA-F]{4})_/ig;
return function unescapexml(text) {
function raw_unescapexml(text) {
var s = text + '', i = s.indexOf("<![CDATA[");
if(i == -1) return s.replace(encregex, function($$, $1) { return encodings[$$]||String.fromCharCode(parseInt($1,$$.indexOf("x")>-1?16:10))||$$; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
var j = s.indexOf("]]>");
return unescapexml(s.slice(0, i)) + s.slice(i+9,j) + unescapexml(s.slice(j+3));
return raw_unescapexml(s.slice(0, i)) + s.slice(i+9,j) + raw_unescapexml(s.slice(j+3));
}
return function unescapexml(text, xlsx) {
var out = raw_unescapexml(text);
return xlsx ? out.replace(/\r\n/g, "\n") : out;
};
})();
var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f]/g;
var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f\uFFFE-\uFFFF]/g;
function escapexml(text){
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; }).replace(charegex,function(s) { return "_x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + "_";});
@ -3552,12 +3556,14 @@ var xlml_fixstr = (function() {
})();
function xlml_unfixstr(str) { return str.replace(/(\r\n|[\r\n])/g,"\&#10;"); }
/* note: xsd:boolean valid values: true / 1 / false / 0 */
function parsexmlbool(value) {
switch(value) {
case 1: case true: case '1': case 'true': case 'TRUE': return true;
/* case '0': case 'false': case 'FALSE':*/
default: return false;
case 1: case true: case '1': case 'true': return true;
case 0: case false: case '0': case 'false': return false;
//default: throw new Error("Invalid xsd:boolean " + value);
}
return false;
}
function utf8reada(orig) {
@ -5904,6 +5910,7 @@ function parse_PropertySet(blob, PIDSI) {
if(fail) throw new Error("Read Error: Expected address " + Props[i][1] + ' at ' + blob.l + ' :' + i);
}
if(PIDSI) {
if(Props[i][0] == 0 && Props.length > i+1 && Props[i][1] == Props[i+1][1]) continue; // R9
var piddsi = PIDSI[Props[i][0]];
PropH[piddsi.n] = parse_TypedPropertyValue(blob, piddsi.t, {raw:true});
if(piddsi.p === 'version') PropH[piddsi.n] = String(PropH[piddsi.n] >> 16) + "." + ("0000" + String(PropH[piddsi.n] & 0xFFFF)).slice(-4);
@ -8359,10 +8366,9 @@ var PRN = (function() {
else sep = guess_sep(str.slice(0,1024));
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0, startcc=str.charCodeAt(0);
str = str.replace(/\r\n/mg, "\n");
var _re = o.dateNF != null ? dateNF_regex(o.dateNF) : null;
function finish_cell() {
var s = str.slice(start, end);
var s = str.slice(start, end); if(s.slice(-1) == "\r") s = s.slice(0, -1);
var cell = ({});
if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
if(s.length === 0) cell.t = 'z';
@ -8397,7 +8403,11 @@ var PRN = (function() {
}
outer: for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) {
case 0x22: if(startcc === 0x22) instr = !instr; break;
case sepcc: case 0x0a: case 0x0d: if(!instr && finish_cell()) break outer; break;
case 0x0d:
if(instr) break;
if(str.charCodeAt(end+1) == 0x0a) ++end;
/* falls through */
case sepcc: case 0x0a: if(!instr && finish_cell()) break outer; break;
default: break;
}
if(end - start > 0) finish_cell();
@ -8513,6 +8523,7 @@ var WK_ = (function() {
var refguess = {s: {r:0, c:0}, e: {r:0, c:0} };
var sheetRows = o.sheetRows || 0;
if(d[4] == 0x51 && d[5] == 0x50 && d[6] == 0x57) return qpw_to_workbook_buf(d, opts);
if(d[2] == 0x00) {
if(d[3] == 0x08 || d[3] == 0x09) {
if(d.length >= 16 && d[14] == 0x05 && d[15] === 0x6c) throw new Error("Unsupported Works 3 for Mac file");
@ -8526,12 +8537,17 @@ var WK_ = (function() {
o.vers = val;
if(val >= 0x1000) o.qpro = true;
break;
case 0xFF: /* BOF (works 3+) */
o.vers = val;
o.works = true;
break;
case 0x06: refguess = val; break; /* RANGE */
case 0xCC: if(val) next_n = val; break; /* SHEETNAMECS */
case 0xDE: next_n = val; break; /* SHEETNAMELP */
case 0x0F: /* LABEL */
case 0x33: /* STRING */
if(!o.qpro) val[1].v = val[1].v.slice(1);
if((!o.qpro && !o.works || RT == 0x33) && val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1);
if(o.works || o.works2) val[1].v = val[1].v.replace(/\r\n/g, "\n");
/* falls through */
case 0x0D: /* INTEGER */
case 0x0E: /* NUMBER */
@ -8565,6 +8581,7 @@ var WK_ = (function() {
s[val[0].r][val[0].c] = val[1];
} else s[encode_cell(val[0])] = val[1];
break;
case 0x5405: o.works2 = true; break;
default:
}}, o);
} else if(d[2] == 0x1A || d[2] == 0x0E) {
@ -8573,7 +8590,9 @@ var WK_ = (function() {
lotushopper(d, function(val, R, RT) { switch(RT) {
case 0xCC: n = val; break; /* SHEETNAMECS */
case 0x16: /* LABEL16 */
val[1].v = val[1].v.slice(1);
if(val[1].v.charCodeAt(0) < 0x30) val[1].v = val[1].v.slice(1);
// TODO: R9 appears to encode control codes this way -- verify against other versions
val[1].v = val[1].v.replace(/\x0F./g, function($$) { return String.fromCharCode($$.charCodeAt(1) - 0x20); }).replace(/\r\n/g, "\n");
/* falls through */
case 0x17: /* NUMBER17 */
case 0x18: /* NUMBER18 */
@ -8768,6 +8787,9 @@ var WK_ = (function() {
o[3] = blob.read_shift(1);
o[0].r = blob.read_shift(2);
blob.l+=2;
} else if(opts.works) { // TODO: verify with more complex works3-4 examples
o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2);
o[2] = blob.read_shift(2);
} else {
o[2] = blob.read_shift(1);
o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2);
@ -8803,6 +8825,18 @@ var WK_ = (function() {
o.write_shift(1, 0);
return o;
}
function parse_STRING(blob, length, opts) {
var tgt = blob.l + length;
var o = parse_cell(blob, length, opts);
o[1].t = 's';
if(opts.vers == 0x5120) {
var len = blob.read_shift(1);
o[1].v = blob.read_shift(len, 'utf8');
return o;
}
o[1].v = blob.read_shift(tgt - blob.l, 'cstr');
return o;
}
function parse_INTEGER(blob, length, opts) {
var o = parse_cell(blob, length, opts);
@ -8861,6 +8895,7 @@ var WK_ = (function() {
0x33: ["FALSE", 0],
0x34: ["TRUE", 0],
0x46: ["LEN", 1],
0x4A: ["CHAR", 1],
0x50: ["SUM", 69],
0x51: ["AVERAGEA", 69],
0x52: ["COUNTA", 69],
@ -9051,8 +9086,8 @@ var WK_ = (function() {
}
function parse_FORMULA_28(blob, length) {
var o = parse_NUMBER_27(blob, 14);
blob.l += length - 10; /* TODO: formula */
var o = parse_NUMBER_27(blob, 12);
blob.l += length - 12; /* TODO: formula */
return o;
}
@ -9142,7 +9177,7 @@ var WK_ = (function() {
0x0030: { n:"UNFORMATTED" },
0x0031: { n:"CURSORW12" },
0x0032: { n:"WINDOW" },
0x0033: { n:"STRING", f:parse_LABEL },
0x0033: { n:"STRING", f:parse_STRING },
0x0037: { n:"PASSWORD" },
0x0038: { n:"LOCKED" },
0x003C: { n:"QUERY" },
@ -9166,6 +9201,7 @@ var WK_ = (function() {
0x0069: { n:"MRANGES??" },
0x00CC: { n:"SHEETNAMECS", f:parse_SHEETNAMECS },
0x00DE: { n:"SHEETNAMELP", f:parse_SHEETNAMELP },
0x00FF: { n:"BOF", f:parseuint16 },
0xFFFF: { n:"" }
};
@ -9294,6 +9330,144 @@ var WK_ = (function() {
0x6F44: { n:"??" },
0xFFFF: { n:"" }
};
/* QPW uses a different set of record types */
function qpw_to_workbook_buf(d, opts) {
prep_blob(d, 0);
var o = opts || {};
if(DENSE != null && o.dense == null) o.dense = DENSE;
var s = ((o.dense ? [] : {}));
var SST = [], sname = "", formulae = [];
var range = {s:{r:-1,c:-1}, e:{r:-1,c:-1}};
var cnt = 0, type = 0, C = 0, R = 0;
var wb = { SheetNames: [], Sheets: {} };
outer: while(d.l < d.length) {
var RT = d.read_shift(2), length = d.read_shift(2);
var p = d.slice(d.l, d.l + length);
prep_blob(p, 0);
switch(RT) {
case 0x01: /* BOF */
if(p.read_shift(4) != 0x39575051) throw "Bad QPW9 BOF!";
break;
case 0x02: /* EOF */ break outer;
/* TODO: The behavior here should be consistent with Numbers: QP Notebook ~ .TN.SheetArchive, QP Sheet ~ .TST.TSTable */
case 0x0401: /* BON */ break;
case 0x0402: /* EON */ /* TODO: backfill missing sheets based on BON cnt */ break;
case 0x0407: { /* SST */
p.l += 12;
while(p.l < p.length) {
cnt = p.read_shift(2);
type = p.read_shift(1);
SST.push(p.read_shift(cnt, 'cstr'));
}
} break;
case 0x0408: { /* FORMULAE */
//p.l += 12;
//while(p.l < p.length) {
// cnt = p.read_shift(2);
// formulae.push(p.slice(p.l, p.l + cnt + 1)); p.l += cnt + 1;
//}
} break;
case 0x0601: { /* BOS */
var sidx = p.read_shift(2);
s = ((o.dense ? [] : {}));
range.s.c = p.read_shift(2);
range.e.c = p.read_shift(2);
range.s.r = p.read_shift(4);
range.e.r = p.read_shift(4);
p.l += 4;
if(p.l + 2 < p.length) {
cnt = p.read_shift(2);
type = p.read_shift(1);
sname = cnt == 0 ? "" : p.read_shift(cnt, 'cstr');
}
if(!sname) sname = XLSX.utils.encode_col(sidx);
/* TODO: backfill empty sheets */
} break;
case 0x0602: { /* EOS */
/* NOTE: QP valid range A1:IV1000000 */
if(range.s.c > 0xFF || range.s.r > 999999) break;
if(range.e.c < range.s.c) range.e.c = range.s.c;
if(range.e.r < range.s.r) range.e.r = range.s.r;
s["!ref"] = encode_range(range);
book_append_sheet(wb, s, sname); // TODO: a barrel roll
} break;
case 0x0A01: { /* COL (like XLS Row, modulo the layout transposition) */
C = p.read_shift(2);
if(range.e.c < C) range.e.c = C;
if(range.s.c > C) range.s.c = C;
R = p.read_shift(4);
if(range.s.r > R) range.s.r = R;
R = p.read_shift(4);
if(range.e.r < R) range.e.r = R;
} break;
case 0x0C01: { /* MulCells (like XLS MulRK, but takes advantage of common column data patterns) */
R = p.read_shift(4), cnt = p.read_shift(4);
if(range.s.r > R) range.s.r = R;
if(range.e.r < R + cnt - 1) range.e.r = R + cnt - 1;
while(p.l < p.length) {
var cell = { t: "z" };
var flags = p.read_shift(1);
if(flags & 0x80) p.l += 2;
var mul = (flags & 0x40) ? p.read_shift(2) - 1: 0;
switch(flags & 0x1F) {
case 1: break;
case 2: cell = { t: "n", v: p.read_shift(2) }; break;
case 3: cell = { t: "n", v: p.read_shift(2, 'i') }; break;
case 5: cell = { t: "n", v: p.read_shift(8, 'f') }; break;
case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break;
case 8: cell = { t: "n", v: p.read_shift(8, 'f') }; p.l += 2; /* cell.f = formulae[p.read_shift(4)]; */ p.l += 4; break;
default: throw "Unrecognized QPW cell type " + (flags & 0x1F);
}
var delta = 0;
if(flags & 0x20) switch(flags & 0x1F) {
case 2: delta = p.read_shift(2); break;
case 3: delta = p.read_shift(2, 'i'); break;
case 7: delta = p.read_shift(2); break;
default: throw "Unsupported delta for QPW cell type " + (flags & 0x1F);
}
if(!(!o.sheetStubs && cell.t == "z")) {
if(Array.isArray(s)) {
if(!s[R]) s[R] = [];
s[R][C] = cell;
} else s[encode_cell({r:R, c:C})] = cell;
}
++R; --cnt;
while(mul-- > 0 && cnt >= 0) {
if(flags & 0x20) switch(flags & 0x1F) {
case 2: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; break;
case 3: cell = { t: "n", v: (cell.v + delta) & 0xFFFF }; if(cell.v > 0x7FFF) cell.v -= 0x10000; break;
case 7: cell = { t: "s", v: SST[type = (type + delta) >>> 0] }; break;
default: throw "Cannot apply delta for QPW cell type " + (flags & 0x1F);
} else switch(flags & 0x1F) {
case 1: cell = { t: "z" }; break;
case 2: cell = { t: "n", v: p.read_shift(2) }; break;
case 7: cell = { t: "s", v: SST[type = p.read_shift(4) - 1] }; break;
default: throw "Cannot apply repeat for QPW cell type " + (flags & 0x1F);
}
if(!(!o.sheetStubs && cell.t == "z")) {
if(Array.isArray(s)) {
if(!s[R]) s[R] = [];
s[R][C] = cell;
} else s[encode_cell({r:R, c:C})] = cell;
}
++R; --cnt;
}
}
} break;
default: break;
}
d.l += length;
}
return wb;
}
return {
sheet_to_wk1: sheet_to_wk1,
book_to_wk3: book_to_wk3,
@ -9480,14 +9654,14 @@ function parse_si(x, opts) {
/* 18.4.12 t ST_Xstring (Plaintext String) */
// TODO: is whitespace actually valid here?
if(x.match(/^\s*<(?:\w+:)?t[^>]*>/)) {
z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""));
z.t = unescapexml(utf8read(x.slice(x.indexOf(">")+1).split(/<\/(?:\w+:)?t>/)[0]||""), true);
z.r = utf8read(x);
if(html) z.h = escapehtml(z.t);
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((/*y = */x.match(sirregex))) {
z.r = utf8read(x);
z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")));
z.t = unescapexml(utf8read((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,"")), true);
if(html) z.h = rs_to_html(parse_rs(z.r));
}
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */
@ -9919,27 +10093,33 @@ var RTF = (function() {
var o = opts || {};
var ws = o.dense ? ([]) : ({});
var rows = str.match(/\\trowd.*?\\row\b/g);
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}});
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\\w+\b/g;
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data[0] == " ") data = data.slice(1);
++C;
if(data.length) {
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: data, t:"s"};
var cell = {v: payload.join(""), t:"s"};
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
@ -9965,7 +10145,7 @@ var RTF = (function() {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)));
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
@ -14767,7 +14947,7 @@ function parse_ws_xml_sheetviews(data, wb) {
// $FlowIgnore
if(+tag.zoomScale) wb.Views[i].zoom = +tag.zoomScale;
// $FlowIgnore
if(parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true;
if(tag.rightToLeft && parsexmlbool(tag.rightToLeft)) wb.Views[i].RTL = true;
});
}
function write_ws_xml_sheetviews(ws, opts, idx, wb) {
@ -14859,7 +15039,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
if(opts.sheetRows && opts.sheetRows < tagr) continue;
rowobj = {}; rowrite = false;
if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); }
if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; }
if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; }
if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; }
if(rowrite) rows[tagr-1] = rowobj;
}
@ -14876,7 +15056,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
if(opts && opts.cellStyles) {
rowobj = {}; rowrite = false;
if(tag.ht) { rowrite = true; rowobj.hpt = parseFloat(tag.ht); rowobj.hpx = pt2px(rowobj.hpt); }
if(tag.hidden == "1") { rowrite = true; rowobj.hidden = true; }
if(tag.hidden && parsexmlbool(tag.hidden)) { rowrite = true; rowobj.hidden = true; }
if(tag.outlineLevel != null) { rowrite = true; rowobj.level = +tag.outlineLevel; }
if(rowrite) rows[tagr-1] = rowobj;
}
@ -14909,7 +15089,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
if(opts.cellFormula) {
if((cref=d.match(match_f))!= null && cref[1] !== '') {
/* TODO: match against XLSXFutureFunctions */
p.f=unescapexml(utf8read(cref[1])).replace(/\r\n/g, "\n");
p.f=unescapexml(utf8read(cref[1]), true);
if(!opts.xlfn) p.f = _xlfn(p.f);
if(cref[0].indexOf('t="array"') > -1) {
p.F = (d.match(refregex)||[])[1];
@ -14963,7 +15143,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess, themes, styles) {
break;
case 'str':
p.t = "s";
p.v = (p.v!=null) ? utf8read(p.v) : '';
p.v = (p.v!=null) ? unescapexml(utf8read(p.v), true) : '';
if(opts.cellHTML) p.h = escapehtml(p.v);
break;
case 'inlineStr':
@ -15978,6 +16158,8 @@ function parse_ws_bin(data, _opts, idx, rels, wb, themes, styles) {
/* TODO: something useful -- this is a stub */
function write_ws_bin_cell(ba, cell, R, C, opts, ws, last_seen) {
var o = ({r:R, c:C});
if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]);
if(cell.v === undefined) return false;
var vv = "";
switch(cell.t) {
@ -15991,11 +16173,9 @@ function write_ws_bin_cell(ba, cell, R, C, opts, ws, last_seen) {
case 'n': case 'e': vv = ''+cell.v; break;
default: vv = cell.v; break;
}
var o = ({r:R, c:C});
/* TODO: cell style */
o.s = get_cell_style(opts.cellXfs, cell, opts);
if(cell.l) ws['!links'].push([encode_cell(o), cell.l]);
if(cell.c) ws['!comments'].push([encode_cell(o), cell.c]);
switch(cell.t) {
case 's': case 'str':
if(opts.bookSST) {
@ -23400,8 +23580,8 @@ function write_zip_xlsb(wb, opts) {
opts.Strings = []; opts.Strings.Count = 0; opts.Strings.Unique = 0;
if(browser_has_Map) opts.revStrings = new Map();
else { opts.revStrings = {}; opts.revStrings.foo = []; delete opts.revStrings.foo; }
var wbext = opts.bookType == "xlsb" ? "bin" : "xml";
var vbafmt = VBAFMTS.indexOf(opts.bookType) > -1;
var wbext = "bin";
var vbafmt = true;
var ct = new_ct();
fix_write_opts(opts = opts || {});
var zip = zip_new();
@ -23599,10 +23779,10 @@ f = "docProps/app.xml";
carr[1].forEach(function(c) { if(c.T == true) needtc = true; });
});
if(needtc) {
cf = "xl/threadedComments/threadedComment" + rId + "." + wbext;
cf = "xl/threadedComments/threadedComment" + rId + ".xml";
zip_add_file(zip, cf, write_tcmnt_xml(comments, people, opts));
ct.threadedcomments.push(cf);
add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + "." + wbext, RELS.TCMNT);
add_rels(wsrels, -1, "../threadedComments/threadedComment" + rId + ".xml", RELS.TCMNT);
}
cf = "xl/comments" + rId + "." + wbext;