From 807eac273b47aa62ff64717f0d81590eb5eb4424 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Wed, 26 Jul 2017 04:35:28 -0400 Subject: [PATCH] miscellany - systemjs browser example - more precise file type resolution - small corner cases from test corpus - removed neq in tests (fixes #735 h/t @TeamworkGuy2) - package.json devDependencies versions (fixes #740 h/t @the-spyke) --- CHANGELOG.md | 5 ++ README.md | 16 +++- bits/05_buf.js | 1 + bits/34_extprops.js | 1 + bits/35_custprops.js | 1 + bits/38_xlstypes.js | 10 ++- bits/40_harb.js | 11 ++- bits/44_offcrypto.js | 8 +- bits/47_styxml.js | 5 +- bits/67_wsxml.js | 6 +- bits/72_wbxml.js | 6 +- bits/75_xlml.js | 6 +- bits/76_xls.js | 31 +++++-- bits/79_html.js | 17 ++-- bits/80_parseods.js | 20 ++++- bits/85_parsezip.js | 10 +-- bits/86_writezip.js | 1 - bits/87_read.js | 32 ++++++- demos/systemjs/systemjs.html | 18 ++++ docbits/80_parseopts.md | 16 +++- misc/docs/README.md | 16 +++- package.json | 15 ++-- test.js | 4 +- tests/core.js | 4 +- tests/write.js | 7 +- xlsx.flow.js | 166 +++++++++++++++++++++++++---------- xlsx.js | 160 ++++++++++++++++++++++++--------- 27 files changed, 444 insertions(+), 149 deletions(-) create mode 100644 demos/systemjs/systemjs.html diff --git a/CHANGELOG.md b/CHANGELOG.md index 1be9d7a..20e2cec 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,11 @@ but not limited to API changes and file location changes. Minor behavioral changes may not be included if they are not expected to break existing code. +## Unreleased (2017-??-??) + +* XLML/HTML resolution logic looks further into the data stream to decide type +* Errors thrown on suspected RTF files + ## 0.10.5 (2017-06-09) * HTML Table output header/footer should not include `` tag diff --git a/README.md b/README.md index 1a9417d..773f121 100644 --- a/README.md +++ b/README.md @@ -1381,6 +1381,7 @@ The exported `read` and `readFile` functions accept an options argument: | Option Name | Default | Description | | :---------- | ------: | :--------------------------------------------------- | | type | | Input data encoding (see Input Type below) | +| raw | | If true, plaintext parsing will not parse values ** | | cellFormula | true | Save formulae to the .f field | | cellHTML | true | Parse rich text and save HTML to the `.h` field | | cellNF | false | Save number format string to the `.z` field | @@ -1400,6 +1401,8 @@ The exported `read` and `readFile` functions accept an options argument: - Even if `cellNF` is false, formatted text will be generated and saved to `.w` - In some cases, sheets may be parsed even if `bookSheets` is false. +- Excel aggressively tries to interpret values from CSV and other plaintext. + This leads to surprising behavior! The `raw` option suppresses value parsing. - `bookSheets` and `bookProps` combine to give both sets of information - `Deps` will be an empty object if `bookDeps` is falsy - `bookFiles` behavior depends on file type: @@ -1446,8 +1449,13 @@ file but Excel will know how to handle it. This library applies similar logic: | `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plaintext | | `0x49` | Plain Text | SYLK or plaintext | | `0x54` | Plain Text | DIF or plaintext | -| `0xFE` | UTF16 Encoded | SpreadsheetML or Flat ODS or UOS1 or plaintext | +| `0xEF` | UTF8 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0xFF` | UTF16 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | | `0x00` | Record Stream | Lotus WK\* or Quattro Pro or plaintext | +| `0x0A` | Plaintext | RTF or plaintext | +| `0x0A` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0x0D` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0x20` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | DBF files are detected based on the first byte as well as the third and fourth bytes (corresponding to month and day of the file date) @@ -1456,13 +1464,17 @@ Plaintext format guessing follows the priority order: | Format | Test | |:-------|:--------------------------------------------------------------------| -| HTML | starts with `
diff --git a/bits/05_buf.js b/bits/05_buf.js index f6334a9..f08e398 100644 --- a/bits/05_buf.js +++ b/bits/05_buf.js @@ -2,6 +2,7 @@ var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' & function new_raw_buf(len/*:number*/) { /* jshint -W056 */ + // $FlowIgnore return new (has_buf ? Buffer : Array)(len); /* jshint +W056 */ } diff --git a/bits/34_extprops.js b/bits/34_extprops.js index 6f8be96..1462983 100644 --- a/bits/34_extprops.js +++ b/bits/34_extprops.js @@ -57,6 +57,7 @@ function parse_ext_props(data, p) { break; case "Named Ranges": + case "名前付き一覧": case "Benannte Bereiche": case "Navngivne områder": p.NamedRanges = len; diff --git a/bits/35_custprops.js b/bits/35_custprops.js index 8efcd1f..214ad4b 100644 --- a/bits/35_custprops.js +++ b/bits/35_custprops.js @@ -37,6 +37,7 @@ function parse_cust_props(data/*:string*/, opts) { p[name] = unescapexml(text); break; default: + if(type.slice(-1) == '/') break; if(opts.WTF && typeof console !== 'undefined') console.warn('Unexpected', x, type, toks); } } else if(x.substr(0,2) === " 4 || vers.Major < 2) throw 'unrecognized major version code: ' + vers.Major; + if(vers.Minor != 2) throw new Error('unrecognized minor version code: ' + vers.Minor); + if(vers.Major > 4 || vers.Major < 2) throw new Error('unrecognized major version code: ' + vers.Major); o.Flags = blob.read_shift(4); length -= 4; var sz = blob.read_shift(4); length -= 4; o.EncryptionHeader = parse_EncryptionHeader(blob, sz); length -= sz; @@ -294,7 +294,7 @@ function parse_FilePassHeader(blob, length/*:number*/, oo) { return o; } function parse_FilePass(blob, length/*:number*/, opts) { - var o = { Type: blob.read_shift(2) }; /* wEncryptionType */ + var o = { Type: opts.biff >= 8 ? blob.read_shift(2) : 0 }; /* wEncryptionType */ if(o.Type) parse_FilePassHeader(blob, length-2, o); else parse_XORObfuscation(blob, length-2, opts, o); return o; diff --git a/bits/47_styxml.js b/bits/47_styxml.js index 1ad777e..2b49e2e 100644 --- a/bits/47_styxml.js +++ b/bits/47_styxml.js @@ -8,7 +8,7 @@ function parse_borders(t, styles, themes, opts) { case '': case '': break; /* 18.8.4 border CT_Border */ - case '': + case '': case '': border = {}; if (y.diagonalUp) { border.diagonalUp = y.diagonalUp; } if (y.diagonalDown) { border.diagonalDown = y.diagonalDown; } @@ -80,6 +80,7 @@ function parse_fills(t, styles, themes, opts) { /* 18.8.24 gradientFill CT_GradientFill */ case '': break; + case '': styles.Fills.push(fill); fill = {}; break; /* 18.8.32 patternFill CT_PatternFill */ @@ -259,7 +260,7 @@ function parse_numFmts(t, styles, opts) { } } -function write_numFmts(NF/*:{[n:number]:string}*/, opts) { +function write_numFmts(NF/*:{[n:number|string]:string}*/, opts) { var o = [""]; [[5,8],[23,26],[41,44],[/*63*/50,/*66],[164,*/392]].forEach(function(r) { for(var i = r[0]; i <= r[1]; ++i) if(NF[i] != null) o[o.length] = (writextag('numFmt',null,{numFmtId:i,formatCode:escapexml(NF[i])})); diff --git a/bits/67_wsxml.js b/bits/67_wsxml.js index bdc81c5..327cd5c 100644 --- a/bits/67_wsxml.js +++ b/bits/67_wsxml.js @@ -410,10 +410,10 @@ function write_ws_xml_data(ws/*:Worksheet*/, opts, idx/*:number*/, wb/*:Workbook } if(rows) for(; R < rows.length; ++R) { if(rows && rows[R]) { - var params = ({r:R+1}/*:any*/); - var row = rows[R]; + params = ({r:R+1}/*:any*/); + row = rows[R]; if(row.hidden) params.hidden = 1; - var height = -1; + height = -1; if (row.hpx) height = px2pt(row.hpx); else if (row.hpt) height = row.hpt; if (height > -1) { params.ht = height; params.customHeight = 1; } diff --git a/bits/72_wbxml.js b/bits/72_wbxml.js index 95f5123..3953617 100644 --- a/bits/72_wbxml.js +++ b/bits/72_wbxml.js @@ -44,13 +44,13 @@ function parse_wb_xml(data, opts)/*:WorkbookFile*/ { case '': break; /* 18.2.1 bookViews CT_BookViews ? */ - case '': case '': break; + case '': case '': break; /* 18.2.30 workbookView CT_BookView + */ case '': break; /* 18.2.20 sheets CT_Sheets 1 */ - case '': case '': break; // aggregate sheet + case '': case '': break; // aggregate sheet /* 18.2.19 sheet CT_Sheet + */ case '': case '': case '': break; + case '': case '': case '': break; /* 18.2.7 ext CT_Extension + */ case '': pass=false; break; diff --git a/bits/75_xlml.js b/bits/75_xlml.js index a6bcdbc..5646d3f 100644 --- a/bits/75_xlml.js +++ b/bits/75_xlml.js @@ -161,7 +161,7 @@ function xlml_clean_comment(comment/*:any*/) { } function xlml_normalize(d)/*:string*/ { - if(has_buf &&/*::typeof Buffer !== "undefined" && d != null &&*/ Buffer.isBuffer(d)) return d.toString('utf8'); + if(has_buf &&/*::typeof Buffer !== "undefined" && d != null && d instanceof Buffer &&*/ Buffer.isBuffer(d)) return d.toString('utf8'); if(typeof d === 'string') return d; throw new Error("Bad input format: expected Buffer or string"); } @@ -175,7 +175,9 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ { make_ssf(SSF); var str = debom(xlml_normalize(d)); if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str)); - if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts); + var opening = str.slice(0, 1024).toLowerCase(), ishtml = false; + if(opening.indexOf("= 0) ishtml = true; }); + if(ishtml) return HTML_.to_workbook(str, opts); var Rn; var state = [], tmp; if(DENSE != null && opts.dense == null) opts.dense = DENSE; diff --git a/bits/76_xls.js b/bits/76_xls.js index e948151..6dd5b97 100644 --- a/bits/76_xls.js +++ b/bits/76_xls.js @@ -25,7 +25,14 @@ function parse_compobj(obj) { throw new Error("Unsupported Unicode Extension"); } -/* 2.4.58 Continue logic */ +/* + Continue logic for: + - 2.4.58 Continue + - 2.4.59 ContinueBigName + - 2.4.60 ContinueFrt + - 2.4.61 ContinueFrt11 + - 2.4.62 ContinueFrt12 +*/ function slurp(R, blob, length/*:number*/, opts) { var l = length; var bufs = []; @@ -39,9 +46,13 @@ function slurp(R, blob, length/*:number*/, opts) { bufs.push(d); blob.l += l; var next = (XLSRecordEnum[__readUInt16LE(blob,blob.l)]); - while(next != null && next.n === 'Continue') { + var start = 0; + while(next != null && next.n.slice(0,8) === 'Continue') { l = __readUInt16LE(blob,blob.l+2); - bufs.push(blob.slice(blob.l+4,blob.l+4+l)); + start = blob.l + 4; + if(next.n == 'ContinueFrt') start += 4; + else if(next.n.slice(0,11) == 'ContinueFrt') start += 12; + bufs.push(blob.slice(start,blob.l+4+l)); blob.l += 4+l; next = (XLSRecordEnum[__readUInt16LE(blob, blob.l)]); } @@ -278,6 +289,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { if(val.rgce && val.rgce[0] && val.rgce[0][0] && val.rgce[0][0][0] == 'PtgArea3d') FilterDatabases[val.itab - 1] = { ref: encode_range(val.rgce[0][0][1][2]) }; break; + case 'ExternCount': opts.ExternCount = val; break; case 'ExternSheet': if(supbooks.length == 0) { supbooks[0] = []; supbooks[0].XTI = []; } supbooks[supbooks.length - 1].XTI = supbooks[supbooks.length - 1].XTI.concat(val); supbooks.XTI = supbooks.XTI.concat(val); break; @@ -557,6 +569,9 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { case 'SXVI': break; // TODO case 'SXVDEx': break; // TODO case 'SxIvd': break; // TODO + case 'SXString': break; // TODO + case 'Sync': break; + case 'Addin': break; case 'SXDI': break; // TODO case 'SXLI': break; // TODO case 'SXEx': break; // TODO @@ -663,6 +678,9 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { case 'DbOrParamQry': break; case 'DBQueryExt': break; + case 'OleDbConn': break; + case 'ExtString': break; + /* Formatting */ case 'IFmtRecord': break; case 'CondFmt': case 'CF': case 'CF12': case 'CFEx': break; @@ -754,7 +772,6 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { default: switch(R.n) { /* nested */ /* BIFF5 records */ - case 'ExternCount': break; case 'TabIdConf': case 'Radar': case 'RadarArea': case 'DropBar': case 'Intl': case 'CoordList': case 'SerAuxErrBar': break; /* BIFF2-4 records */ @@ -766,6 +783,10 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { case 'SCENARIO': case 'DConBin': case 'PicF': case 'DataLabExt': case 'Lel': case 'BopPop': case 'BopPopCustom': case 'RealTimeData': case 'Name': break; + case 'LHNGraph': case 'FnGroupName': case 'AddMenu': case 'LPr': break; + case 'ListObj': case 'ListField': break; + case 'RRSort': break; + case 'BigName': break; default: if(options.WTF) throw 'Unrecognized Record ' + R.n; }}}} } else blob.l += length; @@ -818,7 +839,7 @@ var CompObjP, SummaryP, WorkbookP/*:Workbook*/; if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/); else { - if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find); + if(Workbook) WorkbookP = parse_workbook(Workbook.content, options); /* Quattro Pro 7-8 */ else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); /* Quattro Pro 9 */ diff --git a/bits/79_html.js b/bits/79_html.js index be0914c..9237582 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -4,20 +4,23 @@ var HTML_ = (function() { var opts = _opts || {}; if(DENSE != null && opts.dense == null) opts.dense = DENSE; var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); - var i = str.indexOf(" /
pair"); - var rows = str.slice(i, j).split(/(:?]*>)/); + var mtch = str.match(/"); + var mtch2 = str.match(/<\/table/i); + var i = mtch.index, j = mtch2 && mtch2.index || str.length; + var rows = str.slice(i, j).split(/(:?]*>)/i); var R = -1, C = 0, RS = 0, CS = 0; var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; var merges = [], midx = 0; for(i = 0; i < rows.length; ++i) { var row = rows[i].trim(); - if(row.substr(0,3) == ""); + var hd = row.substr(0,3).toLowerCase(); + if(hd == "/i); for(j = 0; j < cells.length; ++j) { var cell = cells[j].trim(); - if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1); diff --git a/bits/80_parseods.js b/bits/80_parseods.js index 3d1b808..096d660 100644 --- a/bits/80_parseods.js +++ b/bits/80_parseods.js @@ -289,9 +289,11 @@ var parse_content_xml = (function() { case 'forms': break; // 12.25.2 13.2 case 'table-column': break; // 9.1.6 + case 'table-header-rows': break; // 9.1.7 /* TODO: outline levels */ case 'table-row-group': break; // 9.1.9 case 'table-column-group': break; // 9.1.10 + case 'table-header-columns': break; // 9.1.11 case 'null-date': break; // 9.4.2 TODO: date1904 @@ -299,14 +301,17 @@ var parse_content_xml = (function() { case 'calculation-settings': break; // 9.4.1 case 'named-expressions': break; // 9.4.11 case 'named-range': break; // 9.4.12 + case 'label-range': break; // 9.4.9 + case 'label-ranges': break; // 9.4.10 case 'named-expression': break; // 9.4.13 case 'sort': break; // 9.4.19 case 'sort-by': break; // 9.4.20 case 'sort-groups': break; // 9.4.22 - case 'span': break; // + case 'tab': break; // 6.1.4 case 'line-break': break; // 6.1.5 - case 'p': case '文本串': + case 'span': break; // 6.1.7 + case 'p': case '文本串': // 5.1.3 if(Rn[1]==='/') textp = (textp.length > 0 ? textp + "\n" : "") + parse_text_p(str.slice(textpidx,Rn.index), textptag); else { textptag = parsexmltag(Rn[0], false); textpidx = Rn.index + Rn[0].length; } break; // @@ -326,11 +331,14 @@ var parse_content_xml = (function() { case 'title': case '标题': break; // <*:title> OR case 'desc': break; // <*:desc> + /* 9.2 Advanced Tables */ case 'table-source': break; // 9.2.6 + case 'scenario': break; // 9.2.6 case 'iteration': break; // 9.4.3 case 'content-validations': break; // 9.4.4 case 'filter': break; // 9.5.2 @@ -385,6 +393,12 @@ var parse_content_xml = (function() { case 'page-count': break; // TODO case 'time': break; // TODO + /* 9.3 Advanced Table Cells */ + case 'cell-range-source': break; // 9.3.1 */ { case 'base64': x = Base64.decode(f.substr(0,24)); break; case 'binary': x = f; break; case 'array': return [f[0], f[1], f[2], f[3]]; - default: throw new Error("Unrecognized type " + (o ? o.type : "undefined")); + default: throw new Error("Unrecognized type " + (o && o.type || "undefined")); } return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)]; } @@ -29,13 +29,35 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { return parse_zip(zip, o); } +function read_plaintext(data/*:string*/, o/*:ParseOpts*/)/*:Workbook*/ { + var i = 0; + main: while(i < data.length) switch(data.charCodeAt(i)) { + case 0x0A: case 0x0D: case 0x20: ++i; break; + case 0x3C: return parse_xlml(data.slice(i),o); + default: break main; + } + return PRN.to_workbook(data, o); +} + +function read_plaintext_raw(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ { + var str = "", bytes = firstbyte(data, o); + switch(o.type) { + case 'base64': str = Base64.decode(data); break; + case 'binary': str = data; break; + case 'buffer': str = data.toString('binary'); break; + case 'array': str = cc2str(data); break; + default: throw new Error("Unrecognized type " + o.type); + } + if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str); + return read_plaintext(str, o); +} + function read_utf16(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ { var d = data; if(o.type == 'base64') d = Base64.decode(d); - d = cptable.utils.decode(1200, d.slice(2)); + d = cptable.utils.decode(1200, d.slice(2), 'str'); o.type = "binary"; - if(d.charCodeAt(0) == 0x3C) return parse_xlml(d,o); - return PRN.to_workbook(d, o); + return read_plaintext(d, o); } function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { @@ -56,6 +78,8 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break; case 0x00: if(n[1] == 0x00 && n[2] >= 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break; case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o); + case 0x7B: if(n[1] == 0x5C && n[2] == 0x72 && n[3] == 0x74) throw new Error("Unsupported RTF"); break; + case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); } if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o); if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|")); diff --git a/demos/systemjs/systemjs.html b/demos/systemjs/systemjs.html new file mode 100644 index 0000000..2bf6504 --- /dev/null +++ b/demos/systemjs/systemjs.html @@ -0,0 +1,18 @@ + + + diff --git a/docbits/80_parseopts.md b/docbits/80_parseopts.md index 0797a6b..c4c4b1c 100644 --- a/docbits/80_parseopts.md +++ b/docbits/80_parseopts.md @@ -5,6 +5,7 @@ The exported `read` and `readFile` functions accept an options argument: | Option Name | Default | Description | | :---------- | ------: | :--------------------------------------------------- | | type | | Input data encoding (see Input Type below) | +| raw | | If true, plaintext parsing will not parse values ** | | cellFormula | true | Save formulae to the .f field | | cellHTML | true | Parse rich text and save HTML to the `.h` field | | cellNF | false | Save number format string to the `.z` field | @@ -24,6 +25,8 @@ The exported `read` and `readFile` functions accept an options argument: - Even if `cellNF` is false, formatted text will be generated and saved to `.w` - In some cases, sheets may be parsed even if `bookSheets` is false. +- Excel aggressively tries to interpret values from CSV and other plaintext. + This leads to surprising behavior! The `raw` option suppresses value parsing. - `bookSheets` and `bookProps` combine to give both sets of information - `Deps` will be an empty object if `bookDeps` is falsy - `bookFiles` behavior depends on file type: @@ -70,8 +73,13 @@ file but Excel will know how to handle it. This library applies similar logic: | `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plaintext | | `0x49` | Plain Text | SYLK or plaintext | | `0x54` | Plain Text | DIF or plaintext | -| `0xFE` | UTF16 Encoded | SpreadsheetML or Flat ODS or UOS1 or plaintext | +| `0xEF` | UTF8 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0xFF` | UTF16 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | | `0x00` | Record Stream | Lotus WK\* or Quattro Pro or plaintext | +| `0x0A` | Plaintext | RTF or plaintext | +| `0x0A` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0x0D` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0x20` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | DBF files are detected based on the first byte as well as the third and fourth bytes (corresponding to month and day of the file date) @@ -80,13 +88,17 @@ Plaintext format guessing follows the priority order: | Format | Test | |:-------|:--------------------------------------------------------------------| -| HTML | starts with `
diff --git a/misc/docs/README.md b/misc/docs/README.md index 1de47f4..39ed37e 100644 --- a/misc/docs/README.md +++ b/misc/docs/README.md @@ -1270,6 +1270,7 @@ The exported `read` and `readFile` functions accept an options argument: | Option Name | Default | Description | | :---------- | ------: | :--------------------------------------------------- | | type | | Input data encoding (see Input Type below) | +| raw | | If true, plaintext parsing will not parse values ** | | cellFormula | true | Save formulae to the .f field | | cellHTML | true | Parse rich text and save HTML to the `.h` field | | cellNF | false | Save number format string to the `.z` field | @@ -1289,6 +1290,8 @@ The exported `read` and `readFile` functions accept an options argument: - Even if `cellNF` is false, formatted text will be generated and saved to `.w` - In some cases, sheets may be parsed even if `bookSheets` is false. +- Excel aggressively tries to interpret values from CSV and other plaintext. + This leads to surprising behavior! The `raw` option suppresses value parsing. - `bookSheets` and `bookProps` combine to give both sets of information - `Deps` will be an empty object if `bookDeps` is falsy - `bookFiles` behavior depends on file type: @@ -1333,8 +1336,13 @@ file but Excel will know how to handle it. This library applies similar logic: | `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plaintext | | `0x49` | Plain Text | SYLK or plaintext | | `0x54` | Plain Text | DIF or plaintext | -| `0xFE` | UTF16 Encoded | SpreadsheetML or Flat ODS or UOS1 or plaintext | +| `0xEF` | UTF8 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0xFF` | UTF16 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | | `0x00` | Record Stream | Lotus WK\* or Quattro Pro or plaintext | +| `0x0A` | Plaintext | RTF or plaintext | +| `0x0A` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0x0D` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | +| `0x20` | Plaintext | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | DBF files are detected based on the first byte as well as the third and fourth bytes (corresponding to month and day of the file date) @@ -1343,13 +1351,17 @@ Plaintext format guessing follows the priority order: | Format | Test | |:-------|:--------------------------------------------------------------------| -| HTML | starts with ` 4 || vers.Major < 2) throw 'unrecognized major version code: ' + vers.Major; + if(vers.Minor != 2) throw new Error('unrecognized minor version code: ' + vers.Minor); + if(vers.Major > 4 || vers.Major < 2) throw new Error('unrecognized major version code: ' + vers.Major); o.Flags = blob.read_shift(4); length -= 4; var sz = blob.read_shift(4); length -= 4; o.EncryptionHeader = parse_EncryptionHeader(blob, sz); length -= sz; @@ -6735,7 +6747,7 @@ function parse_FilePassHeader(blob, length/*:number*/, oo) { return o; } function parse_FilePass(blob, length/*:number*/, opts) { - var o = { Type: blob.read_shift(2) }; /* wEncryptionType */ + var o = { Type: opts.biff >= 8 ? blob.read_shift(2) : 0 }; /* wEncryptionType */ if(o.Type) parse_FilePassHeader(blob, length-2, o); else parse_XORObfuscation(blob, length-2, opts, o); return o; @@ -6870,7 +6882,7 @@ function parse_borders(t, styles, themes, opts) { case '': case '': break; /* 18.8.4 border CT_Border */ - case '': + case '': case '': border = {}; if (y.diagonalUp) { border.diagonalUp = y.diagonalUp; } if (y.diagonalDown) { border.diagonalDown = y.diagonalDown; } @@ -6942,6 +6954,7 @@ function parse_fills(t, styles, themes, opts) { /* 18.8.24 gradientFill CT_GradientFill */ case '': break; + case '': styles.Fills.push(fill); fill = {}; break; /* 18.8.32 patternFill CT_PatternFill */ @@ -7121,7 +7134,7 @@ function parse_numFmts(t, styles, opts) { } } -function write_numFmts(NF/*:{[n:number]:string}*/, opts) { +function write_numFmts(NF/*:{[n:number|string]:string}*/, opts) { var o = [""]; [[5,8],[23,26],[41,44],[/*63*/50,/*66],[164,*/392]].forEach(function(r) { for(var i = r[0]; i <= r[1]; ++i) if(NF[i] != null) o[o.length] = (writextag('numFmt',null,{numFmtId:i,formatCode:escapexml(NF[i])})); @@ -11150,10 +11163,10 @@ function write_ws_xml_data(ws/*:Worksheet*/, opts, idx/*:number*/, wb/*:Workbook } if(rows) for(; R < rows.length; ++R) { if(rows && rows[R]) { - var params = ({r:R+1}/*:any*/); - var row = rows[R]; + params = ({r:R+1}/*:any*/); + row = rows[R]; if(row.hidden) params.hidden = 1; - var height = -1; + height = -1; if (row.hpx) height = px2pt(row.hpx); else if (row.hpt) height = row.hpt; if (height > -1) { params.ht = height; params.customHeight = 1; } @@ -12412,13 +12425,13 @@ function parse_wb_xml(data, opts)/*:WorkbookFile*/ { case '': break; /* 18.2.1 bookViews CT_BookViews ? */ - case '': case '': break; + case '': case '': break; /* 18.2.30 workbookView CT_BookView + */ case '': break; /* 18.2.20 sheets CT_Sheets 1 */ - case '': case '': break; // aggregate sheet + case '': case '': break; // aggregate sheet /* 18.2.19 sheet CT_Sheet + */ case '': case '': case '': break; + case '': case '': case '': break; /* 18.2.7 ext CT_Extension + */ case '': pass=false; break; @@ -13113,7 +13126,7 @@ function xlml_clean_comment(comment/*:any*/) { } function xlml_normalize(d)/*:string*/ { - if(has_buf &&/*::typeof Buffer !== "undefined" && d != null &&*/ Buffer.isBuffer(d)) return d.toString('utf8'); + if(has_buf &&/*::typeof Buffer !== "undefined" && d != null && d instanceof Buffer &&*/ Buffer.isBuffer(d)) return d.toString('utf8'); if(typeof d === 'string') return d; throw new Error("Bad input format: expected Buffer or string"); } @@ -13127,7 +13140,9 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ { make_ssf(SSF); var str = debom(xlml_normalize(d)); if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str)); - if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts); + var opening = str.slice(0, 1024).toLowerCase(), ishtml = false; + if(opening.indexOf("= 0) ishtml = true; }); + if(ishtml) return HTML_.to_workbook(str, opts); var Rn; var state = [], tmp; if(DENSE != null && opts.dense == null) opts.dense = DENSE; @@ -14061,7 +14076,14 @@ function parse_compobj(obj) { throw new Error("Unsupported Unicode Extension"); } -/* 2.4.58 Continue logic */ +/* + Continue logic for: + - 2.4.58 Continue + - 2.4.59 ContinueBigName + - 2.4.60 ContinueFrt + - 2.4.61 ContinueFrt11 + - 2.4.62 ContinueFrt12 +*/ function slurp(R, blob, length/*:number*/, opts) { var l = length; var bufs = []; @@ -14075,9 +14097,13 @@ function slurp(R, blob, length/*:number*/, opts) { bufs.push(d); blob.l += l; var next = (XLSRecordEnum[__readUInt16LE(blob,blob.l)]); - while(next != null && next.n === 'Continue') { + var start = 0; + while(next != null && next.n.slice(0,8) === 'Continue') { l = __readUInt16LE(blob,blob.l+2); - bufs.push(blob.slice(blob.l+4,blob.l+4+l)); + start = blob.l + 4; + if(next.n == 'ContinueFrt') start += 4; + else if(next.n.slice(0,11) == 'ContinueFrt') start += 12; + bufs.push(blob.slice(start,blob.l+4+l)); blob.l += 4+l; next = (XLSRecordEnum[__readUInt16LE(blob, blob.l)]); } @@ -14314,6 +14340,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { if(val.rgce && val.rgce[0] && val.rgce[0][0] && val.rgce[0][0][0] == 'PtgArea3d') FilterDatabases[val.itab - 1] = { ref: encode_range(val.rgce[0][0][1][2]) }; break; + case 'ExternCount': opts.ExternCount = val; break; case 'ExternSheet': if(supbooks.length == 0) { supbooks[0] = []; supbooks[0].XTI = []; } supbooks[supbooks.length - 1].XTI = supbooks[supbooks.length - 1].XTI.concat(val); supbooks.XTI = supbooks.XTI.concat(val); break; @@ -14593,6 +14620,9 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { case 'SXVI': break; // TODO case 'SXVDEx': break; // TODO case 'SxIvd': break; // TODO + case 'SXString': break; // TODO + case 'Sync': break; + case 'Addin': break; case 'SXDI': break; // TODO case 'SXLI': break; // TODO case 'SXEx': break; // TODO @@ -14699,6 +14729,9 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { case 'DbOrParamQry': break; case 'DBQueryExt': break; + case 'OleDbConn': break; + case 'ExtString': break; + /* Formatting */ case 'IFmtRecord': break; case 'CondFmt': case 'CF': case 'CF12': case 'CFEx': break; @@ -14790,7 +14823,6 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { default: switch(R.n) { /* nested */ /* BIFF5 records */ - case 'ExternCount': break; case 'TabIdConf': case 'Radar': case 'RadarArea': case 'DropBar': case 'Intl': case 'CoordList': case 'SerAuxErrBar': break; /* BIFF2-4 records */ @@ -14802,6 +14834,10 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { case 'SCENARIO': case 'DConBin': case 'PicF': case 'DataLabExt': case 'Lel': case 'BopPop': case 'BopPopCustom': case 'RealTimeData': case 'Name': break; + case 'LHNGraph': case 'FnGroupName': case 'AddMenu': case 'LPr': break; + case 'ListObj': case 'ListField': break; + case 'RRSort': break; + case 'BigName': break; default: if(options.WTF) throw 'Unrecognized Record ' + R.n; }}}} } else blob.l += length; @@ -14854,7 +14890,7 @@ var CompObjP, SummaryP, WorkbookP/*:Workbook*/; if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/); else { - if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find); + if(Workbook) WorkbookP = parse_workbook(Workbook.content, options); /* Quattro Pro 7-8 */ else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); /* Quattro Pro 9 */ @@ -16239,20 +16275,23 @@ var HTML_ = (function() { var opts = _opts || {}; if(DENSE != null && opts.dense == null) opts.dense = DENSE; var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); - var i = str.indexOf(" /
pair"); - var rows = str.slice(i, j).split(/(:?]*>)/); + var mtch = str.match(/"); + var mtch2 = str.match(/<\/table/i); + var i = mtch.index, j = mtch2 && mtch2.index || str.length; + var rows = str.slice(i, j).split(/(:?]*>)/i); var R = -1, C = 0, RS = 0, CS = 0; var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; var merges = [], midx = 0; for(i = 0; i < rows.length; ++i) { var row = rows[i].trim(); - if(row.substr(0,3) == ""); + var hd = row.substr(0,3).toLowerCase(); + if(hd == "/i); for(j = 0; j < cells.length; ++j) { var cell = cells[j].trim(); - if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1); @@ -16678,9 +16717,11 @@ var parse_content_xml = (function() { case 'forms': break; // 12.25.2 13.2 case 'table-column': break; // 9.1.6 + case 'table-header-rows': break; // 9.1.7 /* TODO: outline levels */ case 'table-row-group': break; // 9.1.9 case 'table-column-group': break; // 9.1.10 + case 'table-header-columns': break; // 9.1.11 case 'null-date': break; // 9.4.2 TODO: date1904 @@ -16688,14 +16729,17 @@ var parse_content_xml = (function() { case 'calculation-settings': break; // 9.4.1 case 'named-expressions': break; // 9.4.11 case 'named-range': break; // 9.4.12 + case 'label-range': break; // 9.4.9 + case 'label-ranges': break; // 9.4.10 case 'named-expression': break; // 9.4.13 case 'sort': break; // 9.4.19 case 'sort-by': break; // 9.4.20 case 'sort-groups': break; // 9.4.22 - case 'span': break; // + case 'tab': break; // 6.1.4 case 'line-break': break; // 6.1.5 - case 'p': case '文本串': + case 'span': break; // 6.1.7 + case 'p': case '文本串': // 5.1.3 if(Rn[1]==='/') textp = (textp.length > 0 ? textp + "\n" : "") + parse_text_p(str.slice(textpidx,Rn.index), textptag); else { textptag = parsexmltag(Rn[0], false); textpidx = Rn.index + Rn[0].length; } break; // @@ -16715,11 +16759,14 @@ var parse_content_xml = (function() { case 'title': case '标题': break; // <*:title> OR case 'desc': break; // <*:desc> + /* 9.2 Advanced Tables */ case 'table-source': break; // 9.2.6 + case 'scenario': break; // 9.2.6 case 'iteration': break; // 9.4.3 case 'content-validations': break; // 9.4.4 case 'filter': break; // 9.5.2 @@ -16774,6 +16821,12 @@ var parse_content_xml = (function() { case 'page-count': break; // TODO case 'time': break; // TODO + /* 9.3 Advanced Table Cells */ + case 'cell-range-source': break; // 9.3.1 */ { case 'base64': x = Base64.decode(f.substr(0,24)); break; case 'binary': x = f; break; case 'array': return [f[0], f[1], f[2], f[3]]; - default: throw new Error("Unrecognized type " + (o ? o.type : "undefined")); + default: throw new Error("Unrecognized type " + (o && o.type || "undefined")); } return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)]; } @@ -17470,13 +17524,35 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { return parse_zip(zip, o); } +function read_plaintext(data/*:string*/, o/*:ParseOpts*/)/*:Workbook*/ { + var i = 0; + main: while(i < data.length) switch(data.charCodeAt(i)) { + case 0x0A: case 0x0D: case 0x20: ++i; break; + case 0x3C: return parse_xlml(data.slice(i),o); + default: break main; + } + return PRN.to_workbook(data, o); +} + +function read_plaintext_raw(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ { + var str = "", bytes = firstbyte(data, o); + switch(o.type) { + case 'base64': str = Base64.decode(data); break; + case 'binary': str = data; break; + case 'buffer': str = data.toString('binary'); break; + case 'array': str = cc2str(data); break; + default: throw new Error("Unrecognized type " + o.type); + } + if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str); + return read_plaintext(str, o); +} + function read_utf16(data/*:RawData*/, o/*:ParseOpts*/)/*:Workbook*/ { var d = data; if(o.type == 'base64') d = Base64.decode(d); - d = cptable.utils.decode(1200, d.slice(2)); + d = cptable.utils.decode(1200, d.slice(2), 'str'); o.type = "binary"; - if(d.charCodeAt(0) == 0x3C) return parse_xlml(d,o); - return PRN.to_workbook(d, o); + return read_plaintext(d, o); } function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { @@ -17497,6 +17573,8 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break; case 0x00: if(n[1] == 0x00 && n[2] >= 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break; case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o); + case 0x7B: if(n[1] == 0x5C && n[2] == 0x72 && n[3] == 0x74) throw new Error("Unsupported RTF"); break; + case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); } if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o); if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|")); diff --git a/xlsx.js b/xlsx.js index d88df54..a76bb54 100644 --- a/xlsx.js +++ b/xlsx.js @@ -95,6 +95,7 @@ var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' & function new_raw_buf(len) { /* jshint -W056 */ + // $FlowIgnore return new (has_buf ? Buffer : Array)(len); /* jshint +W056 */ } @@ -3366,6 +3367,7 @@ function parse_ext_props(data, p) { break; case "Named Ranges": + case "名前付き一覧": case "Benannte Bereiche": case "Navngivne områder": p.NamedRanges = len; @@ -3452,6 +3454,7 @@ function parse_cust_props(data, opts) { p[name] = unescapexml(text); break; default: + if(type.slice(-1) == '/') break; if(opts.WTF && typeof console !== 'undefined') console.warn('Unexpected', x, type, toks); } } else if(x.substr(0,2) === " 4 || vers.Major < 2) throw 'unrecognized major version code: ' + vers.Major; + if(vers.Minor != 2) throw new Error('unrecognized minor version code: ' + vers.Minor); + if(vers.Major > 4 || vers.Major < 2) throw new Error('unrecognized major version code: ' + vers.Major); o.Flags = blob.read_shift(4); length -= 4; var sz = blob.read_shift(4); length -= 4; o.EncryptionHeader = parse_EncryptionHeader(blob, sz); length -= sz; @@ -6672,7 +6684,7 @@ function parse_FilePassHeader(blob, length, oo) { return o; } function parse_FilePass(blob, length, opts) { - var o = { Type: blob.read_shift(2) }; /* wEncryptionType */ + var o = { Type: opts.biff >= 8 ? blob.read_shift(2) : 0 }; /* wEncryptionType */ if(o.Type) parse_FilePassHeader(blob, length-2, o); else parse_XORObfuscation(blob, length-2, opts, o); return o; @@ -6807,7 +6819,7 @@ function parse_borders(t, styles, themes, opts) { case '': case '': break; /* 18.8.4 border CT_Border */ - case '': + case '': case '': border = {}; if (y.diagonalUp) { border.diagonalUp = y.diagonalUp; } if (y.diagonalDown) { border.diagonalDown = y.diagonalDown; } @@ -6879,6 +6891,7 @@ function parse_fills(t, styles, themes, opts) { /* 18.8.24 gradientFill CT_GradientFill */ case '': break; + case '': styles.Fills.push(fill); fill = {}; break; /* 18.8.32 patternFill CT_PatternFill */ @@ -11084,10 +11097,10 @@ function write_ws_xml_data(ws, opts, idx, wb, rels) { } if(rows) for(; R < rows.length; ++R) { if(rows && rows[R]) { - var params = ({r:R+1}); - var row = rows[R]; + params = ({r:R+1}); + row = rows[R]; if(row.hidden) params.hidden = 1; - var height = -1; + height = -1; if (row.hpx) height = px2pt(row.hpx); else if (row.hpt) height = row.hpt; if (height > -1) { params.ht = height; params.customHeight = 1; } @@ -12345,13 +12358,13 @@ function parse_wb_xml(data, opts) { case '': break; /* 18.2.1 bookViews CT_BookViews ? */ - case '': case '': break; + case '': case '': break; /* 18.2.30 workbookView CT_BookView + */ case '': break; /* 18.2.20 sheets CT_Sheets 1 */ - case '': case '': break; // aggregate sheet + case '': case '': break; // aggregate sheet /* 18.2.19 sheet CT_Sheet + */ case '': case '': case '': break; + case '': case '': case '': break; /* 18.2.7 ext CT_Extension + */ case '': pass=false; break; @@ -13057,7 +13070,9 @@ function parse_xlml_xml(d, _opts) { make_ssf(SSF); var str = debom(xlml_normalize(d)); if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str)); - if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts); + var opening = str.slice(0, 1024).toLowerCase(), ishtml = false; + if(opening.indexOf("= 0) ishtml = true; }); + if(ishtml) return HTML_.to_workbook(str, opts); var Rn; var state = [], tmp; if(DENSE != null && opts.dense == null) opts.dense = DENSE; @@ -13988,7 +14003,14 @@ function parse_compobj(obj) { throw new Error("Unsupported Unicode Extension"); } -/* 2.4.58 Continue logic */ +/* + Continue logic for: + - 2.4.58 Continue + - 2.4.59 ContinueBigName + - 2.4.60 ContinueFrt + - 2.4.61 ContinueFrt11 + - 2.4.62 ContinueFrt12 +*/ function slurp(R, blob, length, opts) { var l = length; var bufs = []; @@ -14002,9 +14024,13 @@ function slurp(R, blob, length, opts) { bufs.push(d); blob.l += l; var next = (XLSRecordEnum[__readUInt16LE(blob,blob.l)]); - while(next != null && next.n === 'Continue') { + var start = 0; + while(next != null && next.n.slice(0,8) === 'Continue') { l = __readUInt16LE(blob,blob.l+2); - bufs.push(blob.slice(blob.l+4,blob.l+4+l)); + start = blob.l + 4; + if(next.n == 'ContinueFrt') start += 4; + else if(next.n.slice(0,11) == 'ContinueFrt') start += 12; + bufs.push(blob.slice(start,blob.l+4+l)); blob.l += 4+l; next = (XLSRecordEnum[__readUInt16LE(blob, blob.l)]); } @@ -14240,6 +14266,7 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break; if(val.rgce && val.rgce[0] && val.rgce[0][0] && val.rgce[0][0][0] == 'PtgArea3d') FilterDatabases[val.itab - 1] = { ref: encode_range(val.rgce[0][0][1][2]) }; break; + case 'ExternCount': opts.ExternCount = val; break; case 'ExternSheet': if(supbooks.length == 0) { supbooks[0] = []; supbooks[0].XTI = []; } supbooks[supbooks.length - 1].XTI = supbooks[supbooks.length - 1].XTI.concat(val); supbooks.XTI = supbooks.XTI.concat(val); break; @@ -14519,6 +14546,9 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break; case 'SXVI': break; // TODO case 'SXVDEx': break; // TODO case 'SxIvd': break; // TODO + case 'SXString': break; // TODO + case 'Sync': break; + case 'Addin': break; case 'SXDI': break; // TODO case 'SXLI': break; // TODO case 'SXEx': break; // TODO @@ -14625,6 +14655,9 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break; case 'DbOrParamQry': break; case 'DBQueryExt': break; + case 'OleDbConn': break; + case 'ExtString': break; + /* Formatting */ case 'IFmtRecord': break; case 'CondFmt': case 'CF': case 'CF12': case 'CFEx': break; @@ -14716,7 +14749,6 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break; default: switch(R.n) { /* nested */ /* BIFF5 records */ - case 'ExternCount': break; case 'TabIdConf': case 'Radar': case 'RadarArea': case 'DropBar': case 'Intl': case 'CoordList': case 'SerAuxErrBar': break; /* BIFF2-4 records */ @@ -14728,6 +14760,10 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break; case 'SCENARIO': case 'DConBin': case 'PicF': case 'DataLabExt': case 'Lel': case 'BopPop': case 'BopPopCustom': case 'RealTimeData': case 'Name': break; + case 'LHNGraph': case 'FnGroupName': case 'AddMenu': case 'LPr': break; + case 'ListObj': case 'ListField': break; + case 'RRSort': break; + case 'BigName': break; default: if(options.WTF) throw 'Unrecognized Record ' + R.n; }}}} } else blob.l += length; @@ -14780,7 +14816,7 @@ var CompObjP, SummaryP, WorkbookP; if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}); else { - if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find); + if(Workbook) WorkbookP = parse_workbook(Workbook.content, options); /* Quattro Pro 7-8 */ else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); /* Quattro Pro 9 */ @@ -16165,20 +16201,23 @@ var HTML_ = (function() { var opts = _opts || {}; if(DENSE != null && opts.dense == null) opts.dense = DENSE; var ws = opts.dense ? ([]) : ({}); - var i = str.indexOf(" /
pair"); - var rows = str.slice(i, j).split(/(:?]*>)/); + var mtch = str.match(/"); + var mtch2 = str.match(/<\/table/i); + var i = mtch.index, j = mtch2 && mtch2.index || str.length; + var rows = str.slice(i, j).split(/(:?]*>)/i); var R = -1, C = 0, RS = 0, CS = 0; var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; var merges = [], midx = 0; for(i = 0; i < rows.length; ++i) { var row = rows[i].trim(); - if(row.substr(0,3) == ""); + var hd = row.substr(0,3).toLowerCase(); + if(hd == "/i); for(j = 0; j < cells.length; ++j) { var cell = cells[j].trim(); - if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1); @@ -16604,9 +16643,11 @@ var parse_content_xml = (function() { case 'forms': break; // 12.25.2 13.2 case 'table-column': break; // 9.1.6 + case 'table-header-rows': break; // 9.1.7 /* TODO: outline levels */ case 'table-row-group': break; // 9.1.9 case 'table-column-group': break; // 9.1.10 + case 'table-header-columns': break; // 9.1.11 case 'null-date': break; // 9.4.2 TODO: date1904 @@ -16614,14 +16655,17 @@ var parse_content_xml = (function() { case 'calculation-settings': break; // 9.4.1 case 'named-expressions': break; // 9.4.11 case 'named-range': break; // 9.4.12 + case 'label-range': break; // 9.4.9 + case 'label-ranges': break; // 9.4.10 case 'named-expression': break; // 9.4.13 case 'sort': break; // 9.4.19 case 'sort-by': break; // 9.4.20 case 'sort-groups': break; // 9.4.22 - case 'span': break; // + case 'tab': break; // 6.1.4 case 'line-break': break; // 6.1.5 - case 'p': case '文本串': + case 'span': break; // 6.1.7 + case 'p': case '文本串': // 5.1.3 if(Rn[1]==='/') textp = (textp.length > 0 ? textp + "\n" : "") + parse_text_p(str.slice(textpidx,Rn.index), textptag); else { textptag = parsexmltag(Rn[0], false); textpidx = Rn.index + Rn[0].length; } break; // @@ -16641,11 +16685,14 @@ var parse_content_xml = (function() { case 'title': case '标题': break; // <*:title> OR case 'desc': break; // <*:desc> + /* 9.2 Advanced Tables */ case 'table-source': break; // 9.2.6 + case 'scenario': break; // 9.2.6 case 'iteration': break; // 9.4.3 case 'content-validations': break; // 9.4.4 case 'filter': break; // 9.5.2 @@ -16700,6 +16747,12 @@ var parse_content_xml = (function() { case 'page-count': break; // TODO case 'time': break; // TODO + /* 9.3 Advanced Table Cells */ + case 'cell-range-source': break; // 9.3.1 = 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break; case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o); + case 0x7B: if(n[1] == 0x5C && n[2] == 0x72 && n[3] == 0x74) throw new Error("Unsupported RTF"); break; + case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); } if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o); if(0x20>n[0]||n[0]>0x7F) throw new Error("Unsupported file " + n.join("|"));