/i);
for(j = 0; j < cells.length; ++j) {
var cell = cells[j].trim();
- if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1);
diff --git a/bits/80_parseods.js b/bits/80_parseods.js
index 3d1b808..096d660 100644
--- a/bits/80_parseods.js
+++ b/bits/80_parseods.js
@@ -289,9 +289,11 @@ var parse_content_xml = (function() {
case 'forms': break; // 12.25.2 13.2
case 'table-column': break; // 9.1.6
+ case 'table-header-rows': break; // 9.1.7
/* TODO: outline levels */
case 'table-row-group': break; // 9.1.9
case 'table-column-group': break; // 9.1.10
+ case 'table-header-columns': break; // 9.1.11
case 'null-date': break; // 9.4.2 TODO: date1904
@@ -299,14 +301,17 @@ var parse_content_xml = (function() {
case 'calculation-settings': break; // 9.4.1
case 'named-expressions': break; // 9.4.11
case 'named-range': break; // 9.4.12
+ case 'label-range': break; // 9.4.9
+ case 'label-ranges': break; // 9.4.10
case 'named-expression': break; // 9.4.13
case 'sort': break; // 9.4.19
case 'sort-by': break; // 9.4.20
case 'sort-groups': break; // 9.4.22
- case 'span': break; //
+ case 'tab': break; // 6.1.4
case 'line-break': break; // 6.1.5
- case 'p': case '文本串':
+ case 'span': break; // 6.1.7
+ case 'p': case '文本串': // 5.1.3
if(Rn[1]==='/') textp = (textp.length > 0 ? textp + "\n" : "") + parse_text_p(str.slice(textpidx,Rn.index), textptag);
else { textptag = parsexmltag(Rn[0], false); textpidx = Rn.index + Rn[0].length; }
break; //
@@ -326,11 +331,14 @@ var parse_content_xml = (function() {
case 'title': case '标题': break; // <*:title> OR
case 'desc': break; // <*:desc>
+ /* 9.2 Advanced Tables */
case 'table-source': break; // 9.2.6
+ case 'scenario': break; // 9.2.6
case 'iteration': break; // 9.4.3
case 'content-validations': break; // 9.4.4
case 'filter': break; // 9.5.2
@@ -385,6 +393,12 @@ var parse_content_xml = (function() {
case 'page-count': break; // TODO
case 'time': break; // TODO
+ /* 9.3 Advanced Table Cells */
+ case 'cell-range-source': break; // 9.3.1 */ {
case 'base64': x = Base64.decode(f.substr(0,24)); break;
case 'binary': x = f; break;
case 'array': return [f[0], f[1], f[2], f[3]];
- default: throw new Error("Unrecognized type " + (o ? o.type : "undefined"));
+ default: throw new Error("Unrecognized type " + (o && o.type || "undefined"));
}
return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)];
}
@@ -29,13 +29,35 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
return parse_zip(zip, o);
}
+function read_plaintext(data/*:string*/, o/*:ParseOpts*/)/*:Workbook*/ {
+ var i = 0;
+ main: while(i < data.length) switch(data.charCodeAt(i)) {
+ case 0x0A: case 0x0D: case 0x20: ++i; break;
+ case 0x3C: return parse_xlml(data.slice(i),o);
+ default: break main;
+ }
+ return PRN.to_workbook(data, o);
+}
+
+function read_plaintext_raw(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ {
+ var str = "", bytes = firstbyte(data, o);
+ switch(o.type) {
+ case 'base64': str = Base64.decode(data); break;
+ case 'binary': str = data; break;
+ case 'buffer': str = data.toString('binary'); break;
+ case 'array': str = cc2str(data); break;
+ default: throw new Error("Unrecognized type " + o.type);
+ }
+ if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str);
+ return read_plaintext(str, o);
+}
+
function read_utf16(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ {
var d = data;
if(o.type == 'base64') d = Base64.decode(d);
- d = cptable.utils.decode(1200, d.slice(2));
+ d = cptable.utils.decode(1200, d.slice(2), 'str');
o.type = "binary";
- if(d.charCodeAt(0) == 0x3C) return parse_xlml(d,o);
- return PRN.to_workbook(d, o);
+ return read_plaintext(d, o);
}
function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
@@ -56,6 +78,8 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break;
case 0x00: if(n[1] == 0x00 && n[2] >= 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break;
case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o);
+ case 0x7B: if(n[1] == 0x5C && n[2] == 0x72 && n[3] == 0x74) throw new Error("Unsupported RTF"); break;
+ case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
}
if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);
if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|"));
diff --git a/demos/systemjs/systemjs.html b/demos/systemjs/systemjs.html
new file mode 100644
index 0000000..2bf6504
--- /dev/null
+++ b/demos/systemjs/systemjs.html
@@ -0,0 +1,18 @@
+
+
+
diff --git a/docbits/80_parseopts.md b/docbits/80_parseopts.md
index 0797a6b..c4c4b1c 100644
--- a/docbits/80_parseopts.md
+++ b/docbits/80_parseopts.md
@@ -5,6 +5,7 @@ The exported `read` and `readFile` functions accept an options argument:
| Option Name | Default | Description |
| :---------- | ------: | :--------------------------------------------------- |
| type | | Input data encoding (see Input Type below) |
+| raw | | If true, plaintext parsing will not parse values ** |
| cellFormula | true | Save formulae to the .f field |
| cellHTML | true | Parse rich text and save HTML to the `.h` field |
| cellNF | false | Save number format string to the `.z` field |
@@ -24,6 +25,8 @@ The exported `read` and `readFile` functions accept an options argument:
- Even if `cellNF` is false, formatted text will be generated and saved to `.w`
- In some cases, sheets may be parsed even if `bookSheets` is false.
+- Excel aggressively tries to interpret values from CSV and other plaintext.
+ This leads to surprising behavior! The `raw` option suppresses value parsing.
- `bookSheets` and `bookProps` combine to give both sets of information
- `Deps` will be an empty object if `bookDeps` is falsy
- `bookFiles` behavior depends on file type:
@@ -70,8 +73,13 @@ file but Excel will know how to handle it. This library applies similar logic:
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plaintext |
| `0x49` | Plain Text | SYLK or plaintext |
| `0x54` | Plain Text | DIF or plaintext |
-| `0xFE` | UTF16 Encoded | SpreadsheetML or Flat ODS or UOS1 or plaintext |
+| `0xEF` | UTF8 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
+| `0xFF` | UTF16 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
| `0x00` | Record Stream | Lotus WK\* or Quattro Pro or plaintext |
+| `0x0A` | Plaintext | RTF or plaintext |
+| `0x0A` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
+| `0x0D` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
+| `0x20` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
DBF files are detected based on the first byte as well as the third and fourth
bytes (corresponding to month and day of the file date)
@@ -80,13 +88,17 @@ Plaintext format guessing follows the priority order:
| Format | Test |
|:-------|:--------------------------------------------------------------------|
-| HTML | starts with `
diff --git a/misc/docs/README.md b/misc/docs/README.md
index 1de47f4..39ed37e 100644
--- a/misc/docs/README.md
+++ b/misc/docs/README.md
@@ -1270,6 +1270,7 @@ The exported `read` and `readFile` functions accept an options argument:
| Option Name | Default | Description |
| :---------- | ------: | :--------------------------------------------------- |
| type | | Input data encoding (see Input Type below) |
+| raw | | If true, plaintext parsing will not parse values ** |
| cellFormula | true | Save formulae to the .f field |
| cellHTML | true | Parse rich text and save HTML to the `.h` field |
| cellNF | false | Save number format string to the `.z` field |
@@ -1289,6 +1290,8 @@ The exported `read` and `readFile` functions accept an options argument:
- Even if `cellNF` is false, formatted text will be generated and saved to `.w`
- In some cases, sheets may be parsed even if `bookSheets` is false.
+- Excel aggressively tries to interpret values from CSV and other plaintext.
+ This leads to surprising behavior! The `raw` option suppresses value parsing.
- `bookSheets` and `bookProps` combine to give both sets of information
- `Deps` will be an empty object if `bookDeps` is falsy
- `bookFiles` behavior depends on file type:
@@ -1333,8 +1336,13 @@ file but Excel will know how to handle it. This library applies similar logic:
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plaintext |
| `0x49` | Plain Text | SYLK or plaintext |
| `0x54` | Plain Text | DIF or plaintext |
-| `0xFE` | UTF16 Encoded | SpreadsheetML or Flat ODS or UOS1 or plaintext |
+| `0xEF` | UTF8 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
+| `0xFF` | UTF16 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
| `0x00` | Record Stream | Lotus WK\* or Quattro Pro or plaintext |
+| `0x0A` | Plaintext | RTF or plaintext |
+| `0x0A` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
+| `0x0D` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
+| `0x20` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext |
DBF files are detected based on the first byte as well as the third and fourth
bytes (corresponding to month and day of the file date)
@@ -1343,13 +1351,17 @@ Plaintext format guessing follows the priority order:
| Format | Test |
|:-------|:--------------------------------------------------------------------|
-| HTML | starts with ` 4 || vers.Major < 2) throw 'unrecognized major version code: ' + vers.Major;
+ if(vers.Minor != 2) throw new Error('unrecognized minor version code: ' + vers.Minor);
+ if(vers.Major > 4 || vers.Major < 2) throw new Error('unrecognized major version code: ' + vers.Major);
o.Flags = blob.read_shift(4); length -= 4;
var sz = blob.read_shift(4); length -= 4;
o.EncryptionHeader = parse_EncryptionHeader(blob, sz); length -= sz;
@@ -6735,7 +6747,7 @@ function parse_FilePassHeader(blob, length/*:number*/, oo) {
return o;
}
function parse_FilePass(blob, length/*:number*/, opts) {
- var o = { Type: blob.read_shift(2) }; /* wEncryptionType */
+ var o = { Type: opts.biff >= 8 ? blob.read_shift(2) : 0 }; /* wEncryptionType */
if(o.Type) parse_FilePassHeader(blob, length-2, o);
else parse_XORObfuscation(blob, length-2, opts, o);
return o;
@@ -6870,7 +6882,7 @@ function parse_borders(t, styles, themes, opts) {
case '': case '': break;
/* 18.8.4 border CT_Border */
- case '':
+ case '': case '':
border = {};
if (y.diagonalUp) { border.diagonalUp = y.diagonalUp; }
if (y.diagonalDown) { border.diagonalDown = y.diagonalDown; }
@@ -6942,6 +6954,7 @@ function parse_fills(t, styles, themes, opts) {
/* 18.8.24 gradientFill CT_GradientFill */
case '': break;
+ case '': styles.Fills.push(fill); fill = {}; break;
/* 18.8.32 patternFill CT_PatternFill */
@@ -7121,7 +7134,7 @@ function parse_numFmts(t, styles, opts) {
}
}
-function write_numFmts(NF/*:{[n:number]:string}*/, opts) {
+function write_numFmts(NF/*:{[n:number|string]:string}*/, opts) {
var o = [""];
[[5,8],[23,26],[41,44],[/*63*/50,/*66],[164,*/392]].forEach(function(r) {
for(var i = r[0]; i <= r[1]; ++i) if(NF[i] != null) o[o.length] = (writextag('numFmt',null,{numFmtId:i,formatCode:escapexml(NF[i])}));
@@ -11150,10 +11163,10 @@ function write_ws_xml_data(ws/*:Worksheet*/, opts, idx/*:number*/, wb/*:Workbook
}
if(rows) for(; R < rows.length; ++R) {
if(rows && rows[R]) {
- var params = ({r:R+1}/*:any*/);
- var row = rows[R];
+ params = ({r:R+1}/*:any*/);
+ row = rows[R];
if(row.hidden) params.hidden = 1;
- var height = -1;
+ height = -1;
if (row.hpx) height = px2pt(row.hpx);
else if (row.hpt) height = row.hpt;
if (height > -1) { params.ht = height; params.customHeight = 1; }
@@ -12412,13 +12425,13 @@ function parse_wb_xml(data, opts)/*:WorkbookFile*/ {
case '': break;
/* 18.2.1 bookViews CT_BookViews ? */
- case '': case '': break;
+ case '': case '': break;
/* 18.2.30 workbookView CT_BookView + */
case '': break;
/* 18.2.20 sheets CT_Sheets 1 */
- case '': case '': break; // aggregate sheet
+ case '': case '': break; // aggregate sheet
/* 18.2.19 sheet CT_Sheet + */
case '': case '': case '': break;
+ case '': case '': case '': break;
/* 18.2.7 ext CT_Extension + */
case '': pass=false; break;
@@ -13113,7 +13126,7 @@ function xlml_clean_comment(comment/*:any*/) {
}
function xlml_normalize(d)/*:string*/ {
- if(has_buf &&/*::typeof Buffer !== "undefined" && d != null &&*/ Buffer.isBuffer(d)) return d.toString('utf8');
+ if(has_buf &&/*::typeof Buffer !== "undefined" && d != null && d instanceof Buffer &&*/ Buffer.isBuffer(d)) return d.toString('utf8');
if(typeof d === 'string') return d;
throw new Error("Bad input format: expected Buffer or string");
}
@@ -13127,7 +13140,9 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ {
make_ssf(SSF);
var str = debom(xlml_normalize(d));
if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str));
- if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts);
+ var opening = str.slice(0, 1024).toLowerCase(), ishtml = false;
+ if(opening.indexOf("= 0) ishtml = true; });
+ if(ishtml) return HTML_.to_workbook(str, opts);
var Rn;
var state = [], tmp;
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
@@ -14061,7 +14076,14 @@ function parse_compobj(obj) {
throw new Error("Unsupported Unicode Extension");
}
-/* 2.4.58 Continue logic */
+/*
+ Continue logic for:
+ - 2.4.58 Continue
+ - 2.4.59 ContinueBigName
+ - 2.4.60 ContinueFrt
+ - 2.4.61 ContinueFrt11
+ - 2.4.62 ContinueFrt12
+*/
function slurp(R, blob, length/*:number*/, opts) {
var l = length;
var bufs = [];
@@ -14075,9 +14097,13 @@ function slurp(R, blob, length/*:number*/, opts) {
bufs.push(d);
blob.l += l;
var next = (XLSRecordEnum[__readUInt16LE(blob,blob.l)]);
- while(next != null && next.n === 'Continue') {
+ var start = 0;
+ while(next != null && next.n.slice(0,8) === 'Continue') {
l = __readUInt16LE(blob,blob.l+2);
- bufs.push(blob.slice(blob.l+4,blob.l+4+l));
+ start = blob.l + 4;
+ if(next.n == 'ContinueFrt') start += 4;
+ else if(next.n.slice(0,11) == 'ContinueFrt') start += 12;
+ bufs.push(blob.slice(start,blob.l+4+l));
blob.l += 4+l;
next = (XLSRecordEnum[__readUInt16LE(blob, blob.l)]);
}
@@ -14314,6 +14340,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
if(val.rgce && val.rgce[0] && val.rgce[0][0] && val.rgce[0][0][0] == 'PtgArea3d')
FilterDatabases[val.itab - 1] = { ref: encode_range(val.rgce[0][0][1][2]) };
break;
+ case 'ExternCount': opts.ExternCount = val; break;
case 'ExternSheet':
if(supbooks.length == 0) { supbooks[0] = []; supbooks[0].XTI = []; }
supbooks[supbooks.length - 1].XTI = supbooks[supbooks.length - 1].XTI.concat(val); supbooks.XTI = supbooks.XTI.concat(val); break;
@@ -14593,6 +14620,9 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 'SXVI': break; // TODO
case 'SXVDEx': break; // TODO
case 'SxIvd': break; // TODO
+ case 'SXString': break; // TODO
+ case 'Sync': break;
+ case 'Addin': break;
case 'SXDI': break; // TODO
case 'SXLI': break; // TODO
case 'SXEx': break; // TODO
@@ -14699,6 +14729,9 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 'DbOrParamQry': break;
case 'DBQueryExt': break;
+ case 'OleDbConn': break;
+ case 'ExtString': break;
+
/* Formatting */
case 'IFmtRecord': break;
case 'CondFmt': case 'CF': case 'CF12': case 'CFEx': break;
@@ -14790,7 +14823,6 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
default: switch(R.n) { /* nested */
/* BIFF5 records */
- case 'ExternCount': break;
case 'TabIdConf': case 'Radar': case 'RadarArea': case 'DropBar': case 'Intl': case 'CoordList': case 'SerAuxErrBar': break;
/* BIFF2-4 records */
@@ -14802,6 +14834,10 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 'SCENARIO': case 'DConBin': case 'PicF': case 'DataLabExt':
case 'Lel': case 'BopPop': case 'BopPopCustom': case 'RealTimeData':
case 'Name': break;
+ case 'LHNGraph': case 'FnGroupName': case 'AddMenu': case 'LPr': break;
+ case 'ListObj': case 'ListField': break;
+ case 'RRSort': break;
+ case 'BigName': break;
default: if(options.WTF) throw 'Unrecognized Record ' + R.n;
}}}}
} else blob.l += length;
@@ -14854,7 +14890,7 @@ var CompObjP, SummaryP, WorkbookP/*:Workbook*/;
if(CompObj) CompObjP = parse_compobj(CompObj);
if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/);
else {
- if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find);
+ if(Workbook) WorkbookP = parse_workbook(Workbook.content, options);
/* Quattro Pro 7-8 */
else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options);
/* Quattro Pro 9 */
@@ -16239,20 +16275,23 @@ var HTML_ = (function() {
var opts = _opts || {};
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/);
- var i = str.indexOf(" pair");
- var rows = str.slice(i, j).split(/(:?]*>)/);
+ var mtch = str.match(/");
+ var mtch2 = str.match(/<\/table/i);
+ var i = mtch.index, j = mtch2 && mtch2.index || str.length;
+ var rows = str.slice(i, j).split(/(:?]*>)/i);
var R = -1, C = 0, RS = 0, CS = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
var merges = [], midx = 0;
for(i = 0; i < rows.length; ++i) {
var row = rows[i].trim();
- if(row.substr(0,3) == " ");
+ var hd = row.substr(0,3).toLowerCase();
+ if(hd == " /i);
for(j = 0; j < cells.length; ++j) {
var cell = cells[j].trim();
- if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1);
@@ -16678,9 +16717,11 @@ var parse_content_xml = (function() {
case 'forms': break; // 12.25.2 13.2
case 'table-column': break; // 9.1.6
+ case 'table-header-rows': break; // 9.1.7
/* TODO: outline levels */
case 'table-row-group': break; // 9.1.9
case 'table-column-group': break; // 9.1.10
+ case 'table-header-columns': break; // 9.1.11
case 'null-date': break; // 9.4.2 TODO: date1904
@@ -16688,14 +16729,17 @@ var parse_content_xml = (function() {
case 'calculation-settings': break; // 9.4.1
case 'named-expressions': break; // 9.4.11
case 'named-range': break; // 9.4.12
+ case 'label-range': break; // 9.4.9
+ case 'label-ranges': break; // 9.4.10
case 'named-expression': break; // 9.4.13
case 'sort': break; // 9.4.19
case 'sort-by': break; // 9.4.20
case 'sort-groups': break; // 9.4.22
- case 'span': break; //
+ case 'tab': break; // 6.1.4
case 'line-break': break; // 6.1.5
- case 'p': case '文本串':
+ case 'span': break; // 6.1.7
+ case 'p': case '文本串': // 5.1.3
if(Rn[1]==='/') textp = (textp.length > 0 ? textp + "\n" : "") + parse_text_p(str.slice(textpidx,Rn.index), textptag);
else { textptag = parsexmltag(Rn[0], false); textpidx = Rn.index + Rn[0].length; }
break; //
@@ -16715,11 +16759,14 @@ var parse_content_xml = (function() {
case 'title': case '标题': break; // <*:title> OR
case 'desc': break; // <*:desc>
+ /* 9.2 Advanced Tables */
case 'table-source': break; // 9.2.6
+ case 'scenario': break; // 9.2.6
case 'iteration': break; // 9.4.3
case 'content-validations': break; // 9.4.4
case 'filter': break; // 9.5.2
@@ -16774,6 +16821,12 @@ var parse_content_xml = (function() {
case 'page-count': break; // TODO
case 'time': break; // TODO
+ /* 9.3 Advanced Table Cells */
+ case 'cell-range-source': break; // 9.3.1 */ {
case 'base64': x = Base64.decode(f.substr(0,24)); break;
case 'binary': x = f; break;
case 'array': return [f[0], f[1], f[2], f[3]];
- default: throw new Error("Unrecognized type " + (o ? o.type : "undefined"));
+ default: throw new Error("Unrecognized type " + (o && o.type || "undefined"));
}
return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)];
}
@@ -17470,13 +17524,35 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
return parse_zip(zip, o);
}
+function read_plaintext(data/*:string*/, o/*:ParseOpts*/)/*:Workbook*/ {
+ var i = 0;
+ main: while(i < data.length) switch(data.charCodeAt(i)) {
+ case 0x0A: case 0x0D: case 0x20: ++i; break;
+ case 0x3C: return parse_xlml(data.slice(i),o);
+ default: break main;
+ }
+ return PRN.to_workbook(data, o);
+}
+
+function read_plaintext_raw(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ {
+ var str = "", bytes = firstbyte(data, o);
+ switch(o.type) {
+ case 'base64': str = Base64.decode(data); break;
+ case 'binary': str = data; break;
+ case 'buffer': str = data.toString('binary'); break;
+ case 'array': str = cc2str(data); break;
+ default: throw new Error("Unrecognized type " + o.type);
+ }
+ if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str);
+ return read_plaintext(str, o);
+}
+
function read_utf16(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ {
var d = data;
if(o.type == 'base64') d = Base64.decode(d);
- d = cptable.utils.decode(1200, d.slice(2));
+ d = cptable.utils.decode(1200, d.slice(2), 'str');
o.type = "binary";
- if(d.charCodeAt(0) == 0x3C) return parse_xlml(d,o);
- return PRN.to_workbook(d, o);
+ return read_plaintext(d, o);
}
function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
@@ -17497,6 +17573,8 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break;
case 0x00: if(n[1] == 0x00 && n[2] >= 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break;
case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o);
+ case 0x7B: if(n[1] == 0x5C && n[2] == 0x72 && n[3] == 0x74) throw new Error("Unsupported RTF"); break;
+ case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
}
if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);
if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|"));
diff --git a/xlsx.js b/xlsx.js
index d88df54..a76bb54 100644
--- a/xlsx.js
+++ b/xlsx.js
@@ -95,6 +95,7 @@ var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' &
function new_raw_buf(len) {
/* jshint -W056 */
+ // $FlowIgnore
return new (has_buf ? Buffer : Array)(len);
/* jshint +W056 */
}
@@ -3366,6 +3367,7 @@ function parse_ext_props(data, p) {
break;
case "Named Ranges":
+ case "名前付き一覧":
case "Benannte Bereiche":
case "Navngivne områder":
p.NamedRanges = len;
@@ -3452,6 +3454,7 @@ function parse_cust_props(data, opts) {
p[name] = unescapexml(text);
break;
default:
+ if(type.slice(-1) == '/') break;
if(opts.WTF && typeof console !== 'undefined') console.warn('Unexpected', x, type, toks);
}
} else if(x.substr(0,2) === "") {/* empty */
@@ -3928,7 +3931,13 @@ function parse_XLUnicodeString2(blob, length, opts) {
}
/* [MS-XLS] 2.5.61 ControlInfo */
-var parse_ControlInfo = parsenoop;
+function parse_ControlInfo(blob, length, opts) {
+ var flags = blob.read_shift(1);
+ blob.l++;
+ var accel = blob.read_shift(2);
+ blob.l += 2;
+ return [flags, accel];
+}
/* [MS-OSHARED] 2.3.7.6 URLMoniker TODO: flags */
var parse_URLMoniker = function(blob) {
@@ -3991,7 +4000,7 @@ var parse_Hyperlink = function(blob, length) {
if((flags & 0x0101) === 0x0001) oleMoniker = parse_HyperlinkMoniker(blob, end - blob.l);
if(flags & 0x0008) location = parse_HyperlinkString(blob, end - blob.l);
if(flags & 0x0020) guid = blob.read_shift(16);
- if(flags & 0x0040) fileTime = parse_FILETIME(blob, 8);
+ if(flags & 0x0040) fileTime = parse_FILETIME(blob/*, 8*/);
blob.l = end;
var target = (targetFrameName||moniker||oleMoniker);
if(location) target+="#"+location;
@@ -5621,8 +5630,9 @@ var DIF = (function() {
})();
var PRN = (function() {
- function set_text_arr(data, arr, R, C) {
- if(data === 'TRUE') arr[R][C] = true;
+ function set_text_arr(data, arr, R, C, o) {
+ if(o.raw) arr[R][C] = data;
+ else if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
else if(data === ""){/* empty */}
else if(+data == +data) arr[R][C] = +data;
@@ -5631,6 +5641,7 @@ var PRN = (function() {
}
function prn_to_aoa_str(f, opts) {
+ var o = opts || {};
var arr = ([]);
if(!f || f.length === 0) return arr;
var lines = f.split(/[\r\n]/);
@@ -5647,9 +5658,9 @@ var PRN = (function() {
arr[R] = [];
/* TODO: confirm that widths are always 10 */
var C = 0;
- set_text_arr(lines[R].slice(0, start).trim(), arr, R, C);
+ set_text_arr(lines[R].slice(0, start).trim(), arr, R, C, o);
for(C = 1; C <= (lines[R].length - start)/10 + 1; ++C)
- set_text_arr(lines[R].slice(start+(C-1)*10,start+C*10).trim(),arr,R,C);
+ set_text_arr(lines[R].slice(start+(C-1)*10,start+C*10).trim(),arr,R,C,o);
}
return arr;
}
@@ -5766,6 +5777,7 @@ function read_wb_ID(d, opts) {
return PRN.to_workbook(d, opts);
}
}
+
var WK_ = (function() {
function lotushopper(data, cb, opts) {
if(!data) return;
@@ -6534,8 +6546,8 @@ function parse_EncInfoAgl(blob, vers) { throw new Error("File is password-protec
function parse_RC4CryptoHeader(blob, length) {
var o = {};
var vers = o.EncryptionVersionInfo = parse_CRYPTOVersion(blob, 4); length -= 4;
- if(vers.Minor != 2) throw 'unrecognized minor version code: ' + vers.Minor;
- if(vers.Major > 4 || vers.Major < 2) throw 'unrecognized major version code: ' + vers.Major;
+ if(vers.Minor != 2) throw new Error('unrecognized minor version code: ' + vers.Minor);
+ if(vers.Major > 4 || vers.Major < 2) throw new Error('unrecognized major version code: ' + vers.Major);
o.Flags = blob.read_shift(4); length -= 4;
var sz = blob.read_shift(4); length -= 4;
o.EncryptionHeader = parse_EncryptionHeader(blob, sz); length -= sz;
@@ -6672,7 +6684,7 @@ function parse_FilePassHeader(blob, length, oo) {
return o;
}
function parse_FilePass(blob, length, opts) {
- var o = { Type: blob.read_shift(2) }; /* wEncryptionType */
+ var o = { Type: opts.biff >= 8 ? blob.read_shift(2) : 0 }; /* wEncryptionType */
if(o.Type) parse_FilePassHeader(blob, length-2, o);
else parse_XORObfuscation(blob, length-2, opts, o);
return o;
@@ -6807,7 +6819,7 @@ function parse_borders(t, styles, themes, opts) {
case '': case '': break;
/* 18.8.4 border CT_Border */
- case '':
+ case '': case '':
border = {};
if (y.diagonalUp) { border.diagonalUp = y.diagonalUp; }
if (y.diagonalDown) { border.diagonalDown = y.diagonalDown; }
@@ -6879,6 +6891,7 @@ function parse_fills(t, styles, themes, opts) {
/* 18.8.24 gradientFill CT_GradientFill */
case '': break;
+ case '': styles.Fills.push(fill); fill = {}; break;
/* 18.8.32 patternFill CT_PatternFill */
@@ -11084,10 +11097,10 @@ function write_ws_xml_data(ws, opts, idx, wb, rels) {
}
if(rows) for(; R < rows.length; ++R) {
if(rows && rows[R]) {
- var params = ({r:R+1});
- var row = rows[R];
+ params = ({r:R+1});
+ row = rows[R];
if(row.hidden) params.hidden = 1;
- var height = -1;
+ height = -1;
if (row.hpx) height = px2pt(row.hpx);
else if (row.hpt) height = row.hpt;
if (height > -1) { params.ht = height; params.customHeight = 1; }
@@ -12345,13 +12358,13 @@ function parse_wb_xml(data, opts) {
case '': break;
/* 18.2.1 bookViews CT_BookViews ? */
- case '': case '': break;
+ case '': case '': break;
/* 18.2.30 workbookView CT_BookView + */
case '': break;
/* 18.2.20 sheets CT_Sheets 1 */
- case '': case '': break; // aggregate sheet
+ case '': case '': break; // aggregate sheet
/* 18.2.19 sheet CT_Sheet + */
case '': case '': case '': break;
+ case '': case '': case '': break;
/* 18.2.7 ext CT_Extension + */
case '': pass=false; break;
@@ -13057,7 +13070,9 @@ function parse_xlml_xml(d, _opts) {
make_ssf(SSF);
var str = debom(xlml_normalize(d));
if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str));
- if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts);
+ var opening = str.slice(0, 1024).toLowerCase(), ishtml = false;
+ if(opening.indexOf("= 0) ishtml = true; });
+ if(ishtml) return HTML_.to_workbook(str, opts);
var Rn;
var state = [], tmp;
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
@@ -13988,7 +14003,14 @@ function parse_compobj(obj) {
throw new Error("Unsupported Unicode Extension");
}
-/* 2.4.58 Continue logic */
+/*
+ Continue logic for:
+ - 2.4.58 Continue
+ - 2.4.59 ContinueBigName
+ - 2.4.60 ContinueFrt
+ - 2.4.61 ContinueFrt11
+ - 2.4.62 ContinueFrt12
+*/
function slurp(R, blob, length, opts) {
var l = length;
var bufs = [];
@@ -14002,9 +14024,13 @@ function slurp(R, blob, length, opts) {
bufs.push(d);
blob.l += l;
var next = (XLSRecordEnum[__readUInt16LE(blob,blob.l)]);
- while(next != null && next.n === 'Continue') {
+ var start = 0;
+ while(next != null && next.n.slice(0,8) === 'Continue') {
l = __readUInt16LE(blob,blob.l+2);
- bufs.push(blob.slice(blob.l+4,blob.l+4+l));
+ start = blob.l + 4;
+ if(next.n == 'ContinueFrt') start += 4;
+ else if(next.n.slice(0,11) == 'ContinueFrt') start += 12;
+ bufs.push(blob.slice(start,blob.l+4+l));
blob.l += 4+l;
next = (XLSRecordEnum[__readUInt16LE(blob, blob.l)]);
}
@@ -14240,6 +14266,7 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
if(val.rgce && val.rgce[0] && val.rgce[0][0] && val.rgce[0][0][0] == 'PtgArea3d')
FilterDatabases[val.itab - 1] = { ref: encode_range(val.rgce[0][0][1][2]) };
break;
+ case 'ExternCount': opts.ExternCount = val; break;
case 'ExternSheet':
if(supbooks.length == 0) { supbooks[0] = []; supbooks[0].XTI = []; }
supbooks[supbooks.length - 1].XTI = supbooks[supbooks.length - 1].XTI.concat(val); supbooks.XTI = supbooks.XTI.concat(val); break;
@@ -14519,6 +14546,9 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
case 'SXVI': break; // TODO
case 'SXVDEx': break; // TODO
case 'SxIvd': break; // TODO
+ case 'SXString': break; // TODO
+ case 'Sync': break;
+ case 'Addin': break;
case 'SXDI': break; // TODO
case 'SXLI': break; // TODO
case 'SXEx': break; // TODO
@@ -14625,6 +14655,9 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
case 'DbOrParamQry': break;
case 'DBQueryExt': break;
+ case 'OleDbConn': break;
+ case 'ExtString': break;
+
/* Formatting */
case 'IFmtRecord': break;
case 'CondFmt': case 'CF': case 'CF12': case 'CFEx': break;
@@ -14716,7 +14749,6 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
default: switch(R.n) { /* nested */
/* BIFF5 records */
- case 'ExternCount': break;
case 'TabIdConf': case 'Radar': case 'RadarArea': case 'DropBar': case 'Intl': case 'CoordList': case 'SerAuxErrBar': break;
/* BIFF2-4 records */
@@ -14728,6 +14760,10 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
case 'SCENARIO': case 'DConBin': case 'PicF': case 'DataLabExt':
case 'Lel': case 'BopPop': case 'BopPopCustom': case 'RealTimeData':
case 'Name': break;
+ case 'LHNGraph': case 'FnGroupName': case 'AddMenu': case 'LPr': break;
+ case 'ListObj': case 'ListField': break;
+ case 'RRSort': break;
+ case 'BigName': break;
default: if(options.WTF) throw 'Unrecognized Record ' + R.n;
}}}}
} else blob.l += length;
@@ -14780,7 +14816,7 @@ var CompObjP, SummaryP, WorkbookP;
if(CompObj) CompObjP = parse_compobj(CompObj);
if(options.bookProps && !options.bookSheets) WorkbookP = ({});
else {
- if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find);
+ if(Workbook) WorkbookP = parse_workbook(Workbook.content, options);
/* Quattro Pro 7-8 */
else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options);
/* Quattro Pro 9 */
@@ -16165,20 +16201,23 @@ var HTML_ = (function() {
var opts = _opts || {};
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
var ws = opts.dense ? ([]) : ({});
- var i = str.indexOf(" pair");
- var rows = str.slice(i, j).split(/(:?]*>)/);
+ var mtch = str.match(/");
+ var mtch2 = str.match(/<\/table/i);
+ var i = mtch.index, j = mtch2 && mtch2.index || str.length;
+ var rows = str.slice(i, j).split(/(:?]*>)/i);
var R = -1, C = 0, RS = 0, CS = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
var merges = [], midx = 0;
for(i = 0; i < rows.length; ++i) {
var row = rows[i].trim();
- if(row.substr(0,3) == " ");
+ var hd = row.substr(0,3).toLowerCase();
+ if(hd == " /i);
for(j = 0; j < cells.length; ++j) {
var cell = cells[j].trim();
- if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1);
@@ -16604,9 +16643,11 @@ var parse_content_xml = (function() {
case 'forms': break; // 12.25.2 13.2
case 'table-column': break; // 9.1.6
+ case 'table-header-rows': break; // 9.1.7
/* TODO: outline levels */
case 'table-row-group': break; // 9.1.9
case 'table-column-group': break; // 9.1.10
+ case 'table-header-columns': break; // 9.1.11
case 'null-date': break; // 9.4.2 TODO: date1904
@@ -16614,14 +16655,17 @@ var parse_content_xml = (function() {
case 'calculation-settings': break; // 9.4.1
case 'named-expressions': break; // 9.4.11
case 'named-range': break; // 9.4.12
+ case 'label-range': break; // 9.4.9
+ case 'label-ranges': break; // 9.4.10
case 'named-expression': break; // 9.4.13
case 'sort': break; // 9.4.19
case 'sort-by': break; // 9.4.20
case 'sort-groups': break; // 9.4.22
- case 'span': break; //
+ case 'tab': break; // 6.1.4
case 'line-break': break; // 6.1.5
- case 'p': case '文本串':
+ case 'span': break; // 6.1.7
+ case 'p': case '文本串': // 5.1.3
if(Rn[1]==='/') textp = (textp.length > 0 ? textp + "\n" : "") + parse_text_p(str.slice(textpidx,Rn.index), textptag);
else { textptag = parsexmltag(Rn[0], false); textpidx = Rn.index + Rn[0].length; }
break; //
@@ -16641,11 +16685,14 @@ var parse_content_xml = (function() {
case 'title': case '标题': break; // <*:title> OR
case 'desc': break; // <*:desc>
+ /* 9.2 Advanced Tables */
case 'table-source': break; // 9.2.6
+ case 'scenario': break; // 9.2.6
case 'iteration': break; // 9.4.3
case 'content-validations': break; // 9.4.4
case 'filter': break; // 9.5.2
@@ -16700,6 +16747,12 @@ var parse_content_xml = (function() {
case 'page-count': break; // TODO
case 'time': break; // TODO
+ /* 9.3 Advanced Table Cells */
+ case 'cell-range-source': break; // 9.3.1 = 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break;
case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o);
+ case 0x7B: if(n[1] == 0x5C && n[2] == 0x72 && n[3] == 0x74) throw new Error("Unsupported RTF"); break;
+ case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
}
if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);
if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|"));
| | |