diff --git a/.gitignore b/.gitignore index e4ab83c..899a94e 100644 --- a/.gitignore +++ b/.gitignore @@ -7,7 +7,7 @@ v8.log tmp *.txt *.[cC][sS][vV] -*.[dD][iI][fF] +*.[dD][iIbB][fF] *.[pP][rR][nN] *.[sS][lL][kK] *.socialcalc @@ -17,6 +17,9 @@ tmp *.[fF][oO][dD][sS] *.[xX][mM][lL] *.[uU][oO][sS] +*.[wW][kKqQbB][S1234567890] +*.[qQ][pP][wW] +*.123 *.htm *.html *.sheetjs diff --git a/.npmignore b/.npmignore index 050a796..037ccfa 100644 --- a/.npmignore +++ b/.npmignore @@ -5,10 +5,12 @@ index.html misc/ node_modules *.tgz +_book +book.json tmp *.txt *.[cC][sS][vV] -*.[dD][iI][fF] +*.[dD][iIbB][fF] *.[pP][rR][nN] *.[sS][lL][kK] *.socialcalc @@ -18,6 +20,9 @@ tmp *.[fF][oO][dD][sS] *.[xX][mM][lL] *.[uU][oO][sS] +*.[wW][kKqQbB][S1234567890] +*.[qQ][pP][wW] +*.123 *.htm *.html *.sheetjs @@ -39,5 +44,3 @@ test.js bits/ docbits/ tests/ -_book -book.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 626060a..318457b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ changes may not be included if they are not expected to break existing code. ## Unreleased +* + +## 0.9.9 (2017-04-03) + * default output format changed to XLSB * comment text line endings are now normalized * errors thrown on write when worksheets have invalid names diff --git a/README.md b/README.md index 5d70ec8..8945b08 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6. [**Commercial Support**](http://sheetjs.com/reinforcements) +[**Rendered Documentation**](https://sheetjs.gitbooks.io/docs/) + [**File format support for known spreadsheet data formats:**](#file-formats) ![circo graph of format support](formats.png) @@ -68,9 +70,12 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6. * [Excel 97-2004 Binary (BIFF8)](#excel-97-2004-binary-biff8) * [Excel 2003-2004 (SpreadsheetML)](#excel-2003-2004-spreadsheetml) * [Excel 2007+ Binary (XLSB, BIFF12)](#excel-2007-binary-xlsb-biff12) - * [OpenDocument Spreadsheet (ODS/FODS)](#opendocument-spreadsheet-odsfods) - + [Uniform Office Spreadsheet (UOS1/2)](#uniform-office-spreadsheet-uos12) * [Delimiter-Separated Values (CSV/TXT)](#delimiter-separated-values-csvtxt) + * [Other Workbook Formats](#other-workbook-formats) + + [Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123)](#lotus-1-2-3-wkswk1wk2wk3wk4123) + + [Quattro Pro (WQ1/WQ2/WB1/WB2/WB3/QPW)](#quattro-pro-wq1wq2wb1wb2wb3qpw) + + [OpenDocument Spreadsheet (ODS/FODS)](#opendocument-spreadsheet-odsfods) + + [Uniform Office Spreadsheet (UOS1/2)](#uniform-office-spreadsheet-uos12) * [Other Single-Worksheet Formats](#other-single-worksheet-formats) + [dBASE and Visual FoxPro (DBF)](#dbase-and-visual-foxpro-dbf) + [Symbolic Link (SYLK)](#symbolic-link-sylk) @@ -872,13 +877,14 @@ file but Excel will know how to handle it. This library applies similar logic: | Byte 0 | Raw File Type | Spreadsheet Types | |:-------|:--------------|:----------------------------------------------------| -| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB | +| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB or WQ3/QPW | | `0x09` | BIFF Stream | BIFF 2/3/4/5 | | `0x3C` | XML/HTML | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | | `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plaintext | | `0x49` | Plain Text | SYLK or plaintext | | `0x54` | Plain Text | DIF or plaintext | | `0xFE` | UTF16 Encoded | SpreadsheetML or Flat ODS or UOS1 or plaintext | +| `0x00` | Record Stream | Lotus WK\* or Quattro Pro or plaintext | DBF files are detected based on the first byte as well as the third and fourth bytes (corresponding to month and day of the file date) @@ -1181,6 +1187,8 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats: | Flat XML ODF Spreadsheet (FODS) | :o: | :o: | | Uniform Office Format Spreadsheet (标文通 UOS1/UOS2) | :o: | | | dBASE II/III/IV / Visual FoxPro (DBF) | :o: | | +| Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123) | :o: | | +| Quattro Pro Spreadsheet (WQ1/WQ2/WB1/WB2/WB3/QPW) | :o: | | | **Other Common Spreadsheet Output Formats** |:-----:|:-----:| | HTML Tables | :o: | | @@ -1231,7 +1239,34 @@ in an XLSX sub-file can be mapped to XLSB records in a corresponding sub-file. The `MS-XLSB` specification covers the basics of the file format, and other specifications expand on serialization of features like properties. -### OpenDocument Spreadsheet (ODS/FODS) +### Delimiter-Separated Values (CSV/TXT) + +Excel CSV deviates from RFC4180 in a number of important ways. The generated +CSV files should generally work in Excel although they may not work in RFC4180 +compatible readers. The parser should generally understand Excel CSV. + +Excel TXT uses tab as the delimiter and codepage 1200. + +### Other Workbook Formats + +Support for other formats is generally far XLS/XLSB/XLSX support, due in large +part to a lack of publicly available documentation. Test files were produced in +the respective apps and compared to their XLS exports to determine structure. +The main focus is data extraction. + +#### Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123) + +The Lotus formats consist of binary records similar to the BIFF structure. Lotus +did release a whitepaper decades ago covering the original WK1 format. Other +features were deduced by producing files and comparing to Excel support. + +#### Quattro Pro (WQ1/WQ2/WB1/WB2/WB3/QPW) + +The Quattro Pro formats use binary records in the same way as BIFF and Lotus. +Some of the newer formats (namely WB3 and QPW) use a CFB enclosure just like +BIFF8 XLS. + +#### OpenDocument Spreadsheet (ODS/FODS) ODS is an XML-in-ZIP format akin to XLSX while FODS is an XML format akin to SpreadsheetML. Both are detailed in the OASIS standard, but tools like LO/OO @@ -1243,14 +1278,6 @@ UOS is a very similar format, and it comes in 2 varieties corresponding to ODS and FODS respectively. For the most part, the difference between the formats lies in the names of tags and attributes. -### Delimiter-Separated Values (CSV/TXT) - -Excel CSV deviates from RFC4180 in a number of important ways. The generated -CSV files should generally work in Excel although they may not work in RFC4180 -compatible readers. The parser should generally understand Excel CSV. - -Excel TXT uses tab as the delimiter and codepage 1200. - ### Other Single-Worksheet Formats Many older formats supported only one worksheet: diff --git a/bits/40_harb.js b/bits/40_harb.js index b4a8bde..df5875e 100644 --- a/bits/40_harb.js +++ b/bits/40_harb.js @@ -412,11 +412,12 @@ var PRN = (function() { if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t"; var R = 0, C = 0, v = 0; var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0; + str = str.replace(/\r\n/g, "\n"); for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) { case 0x22: instr = !instr; break; case sepcc: case 0x0a: if(instr) break; var s = str.slice(start, end); - var cell = ({}/*:any*/) + var cell = ({}/*:any*/); if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); } else if(s == "TRUE") { cell.t = 'b'; cell.v = true; } else if(s == "FALSE") { cell.t = 'b'; cell.v = false; } @@ -426,7 +427,7 @@ var PRN = (function() { start = end+1; if(range.e.c < C) range.e.c = C; if(range.e.r < R) range.e.r = R; - if(cc == sepcc) ++C; else { C = 0; ++R; }; break; + if(cc == sepcc) ++C; else { C = 0; ++R; } break; default: break; } diff --git a/bits/41_lotus.js b/bits/41_lotus.js new file mode 100644 index 0000000..a9349ff --- /dev/null +++ b/bits/41_lotus.js @@ -0,0 +1,320 @@ +var WK_ = (function() { + function lotushopper(data, cb/*:RecordHopperCB*/, opts/*:any*/) { + if(!data) return; + prep_blob(data, data.l || 0); + var Enum = opts.Enum || WK1Enum; + while(data.l < data.length) { + var RT = data.read_shift(2); + var R = Enum[RT] || Enum[0xFF]; + var length = data.read_shift(2); + var tgt = data.l + length; + var d = R.f(data, length, opts); + data.l = tgt; + if(cb(d, R, RT)) return; + } + } + + function lotus_to_workbook(d/*:RawData*/, opts) { + switch(opts.type) { + case 'base64': return lotus_to_workbook_buf(s2a(Base64.decode(d)), opts); + case 'binary': return lotus_to_workbook_buf(s2a(d), opts); + case 'buffer': + case 'array': return lotus_to_workbook_buf(d, opts); + } + throw "Unsupported type " + opts.type; + } + + function lotus_to_workbook_buf(d,opts)/*:Workbook*/ { + if(!d) return d; + var o = opts || {}; + + var s = {}, n = "Sheet1", sidx = 0; + var sheets = {}, snames = [n]; + + var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; + + if(d[2] == 0x02) o.Enum = WK1Enum; + else if(d[2] == 0x1a) o.Enum = WK3Enum; + else if(d[2] == 0x0e) { o.Enum = WK3Enum; o.qpro = true; d.l = 0; } + else throw new Error("Unrecognized LOTUS BOF " + d[2]); + lotushopper(d, function(val, R, RT) { + if(d[2] == 0x02) switch(RT) { + case 0x00: + o.vers = val; + if(val >= 0x1000) o.qpro = true; + break; + case 0x06: refguess = val; break; /* RANGE */ + case 0x0F: /* LABEL */ + if(!opts.qpro) val[1].v = val[1].v.substr(1); + /* falls through */ + case 0x0D: /* INTEGER */ + case 0x0E: /* NUMBER */ + case 0x10: /* FORMULA */ + case 0x33: /* STRING */ + s[encode_cell(val[0])] = val[1]; + /* TODO: FORMAT */ + break; + } else switch(RT) { + case 0x16: /* LABEL16 */ + val[1].v = val[1].v.substr(1); + /* falls through */ + case 0x17: /* NUMBER17 */ + case 0x18: /* NUMBER18 */ + case 0x19: /* FORMULA19 */ + case 0x25: /* NUMBER25 */ + case 0x27: /* NUMBER27 */ + case 0x28: /* FORMULA28 */ + if(val[3] > sidx) { + s["!ref"] = encode_range(refguess); + sheets[n] = s; + s = {}; + refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; + sidx = val[3]; n = "Sheet" + (sidx + 1); + snames.push(n); + } + s[encode_cell(val[0])] = val[1]; + if(refguess.e.c < val[0].c) refguess.e.c = val[0].c; + if(refguess.e.r < val[0].r) refguess.e.r = val[0].r; + break; + default: break; + } + }, o); + + s["!ref"] = encode_range(refguess); + sheets[n] = s; + return { SheetNames: snames, Sheets:sheets }; + } + + function parse_RANGE(blob, length) { + var o = {s:{c:0,r:0},e:{c:0,r:0}}; + o.s.c = blob.read_shift(2); + o.s.r = blob.read_shift(2); + o.e.c = blob.read_shift(2); + o.e.r = blob.read_shift(2); + if(o.s.c == 0xFFFF) o.s.c = o.e.c = o.s.r = o.e.r = 0; + return o; + } + + function parse_cell(blob, length, opts) { + var o = [{c:0,r:0}, {t:'n',v:0}, 0]; + if(opts.qpro && opts.vers != 0x5120) { + o[0].c = blob.read_shift(1); + blob.l++; + o[0].r = blob.read_shift(2); + blob.l+=2; + } else { + o[2] = blob.read_shift(1); + o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2); + } + return o; + } + + function parse_LABEL(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + o[1].t = 's'; + if(opts.vers == 0x5120) { + blob.l++; + var len = blob.read_shift(1); + o[1].v = blob.read_shift(len, 'utf8'); + return o; + } + if(opts.qpro) blob.l++; + o[1].v = blob.read_shift(tgt - blob.l, 'cstr'); + return o; + } + + function parse_INTEGER(blob, length, opts) { + var o = parse_cell(blob, length, opts); + o[1].v = blob.read_shift(2, 'i'); + return o; + } + + function parse_NUMBER(blob, length, opts) { + var o = parse_cell(blob, length, opts); + o[1].v = blob.read_shift(8, 'f'); + return o; + } + + function parse_FORMULA(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + /* TODO: formula */ + o[1].v = blob.read_shift(8, 'f'); + if(opts.qpro) blob.l = tgt; + else { + var flen = blob.read_shift(2); + blob.l += flen; + } + return o; + } + + function parse_cell_3(blob, length) { + var o = [{c:0,r:0}, {t:'n',v:0}, 0]; + o[0].r = blob.read_shift(2); o[3] = blob[blob.l++]; o[0].c = blob[blob.l++]; + return o; + } + + function parse_LABEL_16(blob, length) { + var o = parse_cell_3(blob, length); + o[1].t = 's'; + o[1].v = blob.read_shift(length - 4, 'cstr'); + return o; + } + + function parse_NUMBER_18(blob, length) { + var o = parse_cell_3(blob, length); + o[1].v = blob.read_shift(2); + var v = o[1].v >> 1; + /* TODO: figure out all of the corner cases */ + if(o[1].v & 0x1) { + switch(v & 0x07) { + case 1: v = (v >> 3) * 500; break; + case 2: v = (v >> 3) / 20; break; + case 4: v = (v >> 3) / 2000; break; + case 6: v = (v >> 3) / 16; break; + case 7: v = (v >> 3) / 64; break; + default: throw "unknown NUMBER_18 encoding " + (v & 0x07); + } + } + o[1].v = v; + return o; + } + + function parse_NUMBER_17(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(4); + var v2 = blob.read_shift(4); + var e = blob.read_shift(2); + if(e == 0xFFFF) { o[1].v = 0; return o; } + var s = e & 0x8000; e = (e&0x7FFF) - 16446; + o[1].v = ((e > 0 ? (v2 << e) : (v2 >>> -e)) + (e > -32 ? (v1 << (e + 32)) : (v1 >>> -(e + 32)))); + return o; + } + + function parse_FORMULA_19(blob, length) { + var o = parse_NUMBER_17(blob, 14); + blob.l += length - 14; /* TODO: formula */ + return o; + } + + function parse_NUMBER_25(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(4); + o[1].v = v1 >> 6; + return o; + } + + function parse_NUMBER_27(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(8,'f'); + o[1].v = v1; + return o; + } + + function parse_FORMULA_28(blob, length) { + var o = parse_NUMBER_27(blob, 14); + blob.l += length - 10; /* TODO: formula */ + return o; + } + + var WK1Enum = { + /*::[*/0x0000/*::]*/: { n:"BOF", f:parseuint16 }, + /*::[*/0x0001/*::]*/: { n:"EOF", f:parsenoop }, + /*::[*/0x0002/*::]*/: { n: "CALCMODE", f:parsenoop }, + /*::[*/0x0003/*::]*/: { n:"CALCORDER", f:parsenoop }, + /*::[*/0x0004/*::]*/: { n:"SPLIT", f:parsenoop }, + /*::[*/0x0005/*::]*/: { n:"SYNC", f:parsenoop }, + /*::[*/0x0006/*::]*/: { n:"RANGE", f:parse_RANGE }, + /*::[*/0x0007/*::]*/: { n:"WINDOW1", f:parsenoop }, + /*::[*/0x0008/*::]*/: { n:"COLW1", f:parsenoop }, + /*::[*/0x0009/*::]*/: { n:"WINTWO", f:parsenoop }, + /*::[*/0x000A/*::]*/: { n:"COLW2", f:parsenoop }, + /*::[*/0x000B/*::]*/: { n:"NAME", f:parsenoop }, + /*::[*/0x000C/*::]*/: { n:"BLANK", f:parsenoop }, + /*::[*/0x000D/*::]*/: { n:"INTEGER", f:parse_INTEGER }, + /*::[*/0x000E/*::]*/: { n:"NUMBER", f:parse_NUMBER }, + /*::[*/0x000F/*::]*/: { n:"LABEL", f:parse_LABEL }, + /*::[*/0x0010/*::]*/: { n:"FORMULA", f:parse_FORMULA }, + /*::[*/0x0018/*::]*/: { n:"TABLE", f:parsenoop }, + /*::[*/0x0019/*::]*/: { n:"ORANGE", f:parsenoop }, + /*::[*/0x001A/*::]*/: { n:"PRANGE", f:parsenoop }, + /*::[*/0x001B/*::]*/: { n:"SRANGE", f:parsenoop }, + /*::[*/0x001C/*::]*/: { n:"FRANGE", f:parsenoop }, + /*::[*/0x001D/*::]*/: { n:"KRANGE1", f:parsenoop }, + /*::[*/0x0020/*::]*/: { n:"HRANGE", f:parsenoop }, + /*::[*/0x0023/*::]*/: { n:"KRANGE2", f:parsenoop }, + /*::[*/0x0024/*::]*/: { n:"PROTEC", f:parsenoop }, + /*::[*/0x0025/*::]*/: { n:"FOOTER", f:parsenoop }, + /*::[*/0x0026/*::]*/: { n:"HEADER", f:parsenoop }, + /*::[*/0x0027/*::]*/: { n:"SETUP", f:parsenoop }, + /*::[*/0x0028/*::]*/: { n:"MARGINS", f:parsenoop }, + /*::[*/0x0029/*::]*/: { n:"LABELFMT", f:parsenoop }, + /*::[*/0x002A/*::]*/: { n:"TITLES", f:parsenoop }, + /*::[*/0x002B/*::]*/: { n:"SHEETJS", f:parsenoop }, + /*::[*/0x002D/*::]*/: { n:"GRAPH", f:parsenoop }, + /*::[*/0x002E/*::]*/: { n:"NGRAPH", f:parsenoop }, + /*::[*/0x002F/*::]*/: { n:"CALCCOUNT", f:parsenoop }, + /*::[*/0x0030/*::]*/: { n:"UNFORMATTED", f:parsenoop }, + /*::[*/0x0031/*::]*/: { n:"CURSORW12", f:parsenoop }, + /*::[*/0x0032/*::]*/: { n:"WINDOW", f:parsenoop }, + /*::[*/0x0033/*::]*/: { n:"STRING", f:parse_LABEL }, + /*::[*/0x0037/*::]*/: { n:"PASSWORD", f:parsenoop }, + /*::[*/0x0038/*::]*/: { n:"LOCKED", f:parsenoop }, + /*::[*/0x003C/*::]*/: { n:"QUERY", f:parsenoop }, + /*::[*/0x003D/*::]*/: { n:"QUERYNAME", f:parsenoop }, + /*::[*/0x003E/*::]*/: { n:"PRINT", f:parsenoop }, + /*::[*/0x003F/*::]*/: { n:"PRINTNAME", f:parsenoop }, + /*::[*/0x0040/*::]*/: { n:"GRAPH2", f:parsenoop }, + /*::[*/0x0041/*::]*/: { n:"GRAPHNAME", f:parsenoop }, + /*::[*/0x0042/*::]*/: { n:"ZOOM", f:parsenoop }, + /*::[*/0x0043/*::]*/: { n:"SYMSPLIT", f:parsenoop }, + /*::[*/0x0044/*::]*/: { n:"NSROWS", f:parsenoop }, + /*::[*/0x0045/*::]*/: { n:"NSCOLS", f:parsenoop }, + /*::[*/0x0046/*::]*/: { n:"RULER", f:parsenoop }, + /*::[*/0x0047/*::]*/: { n:"NNAME", f:parsenoop }, + /*::[*/0x0048/*::]*/: { n:"ACOMM", f:parsenoop }, + /*::[*/0x0049/*::]*/: { n:"AMACRO", f:parsenoop }, + /*::[*/0x004A/*::]*/: { n:"PARSE", f:parsenoop }, + /*::[*/0x00FF/*::]*/: { n:"", f:parsenoop } + }; + + var WK3Enum = { + /*::[*/0x0000/*::]*/: { n:"BOF", f:parsenoop }, + /*::[*/0x0001/*::]*/: { n:"EOF", f:parsenoop }, + /*::[*/0x0003/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0004/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0005/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0006/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0007/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0009/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000a/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000b/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000c/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000e/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000f/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0010/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0011/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0012/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0013/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0015/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0016/*::]*/: { n:"LABEL16", f:parse_LABEL_16}, + /*::[*/0x0017/*::]*/: { n:"NUMBER17", f:parse_NUMBER_17 }, + /*::[*/0x0018/*::]*/: { n:"NUMBER18", f:parse_NUMBER_18 }, + /*::[*/0x0019/*::]*/: { n:"FORMULA19", f:parse_FORMULA_19}, + /*::[*/0x001a/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001b/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001c/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001d/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001e/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001f/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0021/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0025/*::]*/: { n:"NUMBER25", f:parse_NUMBER_25 }, + /*::[*/0x0027/*::]*/: { n:"NUMBER27", f:parse_NUMBER_27 }, + /*::[*/0x0028/*::]*/: { n:"FORMULA28", f:parse_FORMULA_28 }, + /*::[*/0x00FF/*::]*/: { n:"", f:parsenoop } + }; + return { + to_workbook: lotus_to_workbook + }; +})(); diff --git a/bits/76_xls.js b/bits/76_xls.js index f766967..b100462 100644 --- a/bits/76_xls.js +++ b/bits/76_xls.js @@ -734,6 +734,10 @@ if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/); else { if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find); + /* Quattro Pro 7-8 */ + else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); + /* Quattro Pro 9 */ + else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options); else throw new Error("Cannot find Workbook stream"); } diff --git a/bits/77_parsetab.js b/bits/77_parsetab.js index 072dbba..c410a58 100644 --- a/bits/77_parsetab.js +++ b/bits/77_parsetab.js @@ -1236,7 +1236,7 @@ var XLSRecordEnum = { /*::[*/0x08c5/*::]*/: { n:"ListCF", f:parsenoop }, /*::[*/0x08c6/*::]*/: { n:"FMQry", f:parsenoop }, /*::[*/0x08c7/*::]*/: { n:"FMSQry", f:parsenoop }, - /*::[*/0x08c8/*::]*/: { n:"PLV", f:parsenoop }, /* supposedly PLV for Excel 11 */ + /*::[*/0x08c8/*::]*/: { n:"PLV", f:parsenoop }, /*::[*/0x08c9/*::]*/: { n:"LnExt", f:parsenoop }, /*::[*/0x08ca/*::]*/: { n:"MkrExt", f:parsenoop }, /*::[*/0x08cb/*::]*/: { n:"CrtCoopt", f:parsenoop }, diff --git a/bits/79_html.js b/bits/79_html.js index c878d96..4c4a43c 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -55,6 +55,7 @@ function parse_dom_table(table/*:HTMLElement*/, opts/*:?any*/)/*:Worksheet*/ { var o = {t:'s', v:v}; if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)}; ws[encode_cell({c:C, r:R})] = o; + if(range.e.c < C) range.e.c = C; C += CS; } } diff --git a/bits/87_read.js b/bits/87_read.js index 611cd91..b323921 100644 --- a/bits/87_read.js +++ b/bits/87_read.js @@ -47,6 +47,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break; case 0xEF: return parse_xlml(d, o); case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break; + case 0x00: if(n[1] == 0x00 && n[2] >= 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break; case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o); } if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o); diff --git a/docbits/00_intro.md b/docbits/00_intro.md index c5d23f5..db9cb64 100644 --- a/docbits/00_intro.md +++ b/docbits/00_intro.md @@ -11,6 +11,8 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6. [**Commercial Support**](http://sheetjs.com/reinforcements) +[**Rendered Documentation**](https://sheetjs.gitbooks.io/docs/) + [**File format support for known spreadsheet data formats:**](#file-formats) ![circo graph of format support](formats.png) diff --git a/docbits/80_parseopts.md b/docbits/80_parseopts.md index a9918f5..2d7890b 100644 --- a/docbits/80_parseopts.md +++ b/docbits/80_parseopts.md @@ -61,13 +61,14 @@ file but Excel will know how to handle it. This library applies similar logic: | Byte 0 | Raw File Type | Spreadsheet Types | |:-------|:--------------|:----------------------------------------------------| -| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB | +| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB or WQ3/QPW | | `0x09` | BIFF Stream | BIFF 2/3/4/5 | | `0x3C` | XML/HTML | SpreadsheetML / Flat ODS / UOS1 / HTML / plaintext | | `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plaintext | | `0x49` | Plain Text | SYLK or plaintext | | `0x54` | Plain Text | DIF or plaintext | | `0xFE` | UTF16 Encoded | SpreadsheetML or Flat ODS or UOS1 or plaintext | +| `0x00` | Record Stream | Lotus WK\* or Quattro Pro or plaintext | DBF files are detected based on the first byte as well as the third and fourth bytes (corresponding to month and day of the file date) diff --git a/docbits/85_filetype.md b/docbits/85_filetype.md index fb6bba3..9cf5bc3 100644 --- a/docbits/85_filetype.md +++ b/docbits/85_filetype.md @@ -24,6 +24,8 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats: | Flat XML ODF Spreadsheet (FODS) | :o: | :o: | | Uniform Office Format Spreadsheet (标文通 UOS1/UOS2) | :o: | | | dBASE II/III/IV / Visual FoxPro (DBF) | :o: | | +| Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123) | :o: | | +| Quattro Pro Spreadsheet (WQ1/WQ2/WB1/WB2/WB3/QPW) | :o: | | | **Other Common Spreadsheet Output Formats** |:-----:|:-----:| | HTML Tables | :o: | | @@ -74,7 +76,34 @@ in an XLSX sub-file can be mapped to XLSB records in a corresponding sub-file. The `MS-XLSB` specification covers the basics of the file format, and other specifications expand on serialization of features like properties. -### OpenDocument Spreadsheet (ODS/FODS) +### Delimiter-Separated Values (CSV/TXT) + +Excel CSV deviates from RFC4180 in a number of important ways. The generated +CSV files should generally work in Excel although they may not work in RFC4180 +compatible readers. The parser should generally understand Excel CSV. + +Excel TXT uses tab as the delimiter and codepage 1200. + +### Other Workbook Formats + +Support for other formats is generally far XLS/XLSB/XLSX support, due in large +part to a lack of publicly available documentation. Test files were produced in +the respective apps and compared to their XLS exports to determine structure. +The main focus is data extraction. + +#### Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123) + +The Lotus formats consist of binary records similar to the BIFF structure. Lotus +did release a whitepaper decades ago covering the original WK1 format. Other +features were deduced by producing files and comparing to Excel support. + +#### Quattro Pro (WQ1/WQ2/WB1/WB2/WB3/QPW) + +The Quattro Pro formats use binary records in the same way as BIFF and Lotus. +Some of the newer formats (namely WB3 and QPW) use a CFB enclosure just like +BIFF8 XLS. + +#### OpenDocument Spreadsheet (ODS/FODS) ODS is an XML-in-ZIP format akin to XLSX while FODS is an XML format akin to SpreadsheetML. Both are detailed in the OASIS standard, but tools like LO/OO @@ -86,14 +115,6 @@ UOS is a very similar format, and it comes in 2 varieties corresponding to ODS and FODS respectively. For the most part, the difference between the formats lies in the names of tags and attributes. -### Delimiter-Separated Values (CSV/TXT) - -Excel CSV deviates from RFC4180 in a number of important ways. The generated -CSV files should generally work in Excel although they may not work in RFC4180 -compatible readers. The parser should generally understand Excel CSV. - -Excel TXT uses tab as the delimiter and codepage 1200. - ### Other Single-Worksheet Formats Many older formats supported only one worksheet: diff --git a/formats.dot b/formats.dot index b387889..4dc7566 100644 --- a/formats.dot +++ b/formats.dot @@ -25,6 +25,10 @@ digraph G { dif [label="DIF"]; slk [label="SYLK"]; prn [label="PRN"]; + wk1 [label="WK1/2\n123"]; + wk3 [label="WK3/4"]; + wqb [label="WQ*\nWB*"]; + qpw [label="QPW"]; } subgraph WORKBOOK { @@ -42,6 +46,8 @@ digraph G { fods -> csf csf -> fods uos -> csf + wk3 -> csf + qpw -> csf } subgraph WORKSHEET { edge [color=aquamarine4]; @@ -52,6 +58,8 @@ digraph G { csf -> slk slk -> csf csf -> dif + wk1 -> csf + wqb -> csf dif -> csf prn -> csf csf -> prn diff --git a/formats.png b/formats.png index 1ca3215..145c98e 100644 Binary files a/formats.png and b/formats.png differ diff --git a/misc/docs/SUMMARY.md b/misc/docs/SUMMARY.md index 8ff8bef..fd322af 100644 --- a/misc/docs/SUMMARY.md +++ b/misc/docs/SUMMARY.md @@ -47,9 +47,12 @@ * [Excel 97-2004 Binary (BIFF8)](README.md#excel-97-2004-binary-biff8) * [Excel 2003-2004 (SpreadsheetML)](README.md#excel-2003-2004-spreadsheetml) * [Excel 2007+ Binary (XLSB, BIFF12)](README.md#excel-2007-binary-xlsb-biff12) - * [OpenDocument Spreadsheet (ODS/FODS)](README.md#opendocument-spreadsheet-odsfods) - + [Uniform Office Spreadsheet (UOS1/2)](README.md#uniform-office-spreadsheet-uos12) * [Delimiter-Separated Values (CSV/TXT)](README.md#delimiter-separated-values-csvtxt) + * [Other Workbook Formats](README.md#other-workbook-formats) + + [Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123)](README.md#lotus-1-2-3-wkswk1wk2wk3wk4123) + + [Quattro Pro (WQ1/WQ2/WB1/WB2/WB3/QPW)](README.md#quattro-pro-wq1wq2wb1wb2wb3qpw) + + [OpenDocument Spreadsheet (ODS/FODS)](README.md#opendocument-spreadsheet-odsfods) + + [Uniform Office Spreadsheet (UOS1/2)](README.md#uniform-office-spreadsheet-uos12) * [Other Single-Worksheet Formats](README.md#other-single-worksheet-formats) + [dBASE and Visual FoxPro (DBF)](README.md#dbase-and-visual-foxpro-dbf) + [Symbolic Link (SYLK)](README.md#symbolic-link-sylk) diff --git a/tests/write.js b/tests/write.js index c40c416..d8ea1d5 100644 --- a/tests/write.js +++ b/tests/write.js @@ -117,6 +117,7 @@ XLSX.writeFile(wb, 'sheetjs.slk'); XLSX.writeFile(wb, 'sheetjs.csv'); XLSX.writeFile(wb, 'sheetjs.txt'); XLSX.writeFile(wb, 'sheetjs.prn'); +XLSX.writeFile(wb, 'sheetjs.dif'); /* test by reading back files */ XLSX.readFile('sheetjs.xlsx'); @@ -127,6 +128,7 @@ XLSX.readFile('sheetjs.xml.xls'); XLSX.readFile('sheetjs.ods'); XLSX.readFile('sheetjs.fods'); XLSX.readFile('sheetjs.slk'); -//XLSX.readFile('sheetjs.csv'); -//XLSX.readFile('sheetjs.txt'); +XLSX.readFile('sheetjs.csv'); +XLSX.readFile('sheetjs.txt'); XLSX.readFile('sheetjs.prn'); +XLSX.readFile('sheetjs.dif'); diff --git a/xlsx.flow.js b/xlsx.flow.js index 8955c96..1c18093 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -5172,11 +5172,12 @@ var PRN = (function() { if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t"; var R = 0, C = 0, v = 0; var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0; + str = str.replace(/\r\n/g, "\n"); for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) { case 0x22: instr = !instr; break; case sepcc: case 0x0a: if(instr) break; var s = str.slice(start, end); - var cell = ({}/*:any*/) + var cell = ({}/*:any*/); if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); } else if(s == "TRUE") { cell.t = 'b'; cell.v = true; } else if(s == "FALSE") { cell.t = 'b'; cell.v = false; } @@ -5186,7 +5187,7 @@ var PRN = (function() { start = end+1; if(range.e.c < C) range.e.c = C; if(range.e.r < R) range.e.r = R; - if(cc == sepcc) ++C; else { C = 0; ++R; }; break; + if(cc == sepcc) ++C; else { C = 0; ++R; } break; default: break; } @@ -5236,6 +5237,326 @@ var PRN = (function() { }; })(); +var WK_ = (function() { + function lotushopper(data, cb/*:RecordHopperCB*/, opts/*:any*/) { + if(!data) return; + prep_blob(data, data.l || 0); + var Enum = opts.Enum || WK1Enum; + while(data.l < data.length) { + var RT = data.read_shift(2); + var R = Enum[RT] || Enum[0xFF]; + var length = data.read_shift(2); + var tgt = data.l + length; + var d = R.f(data, length, opts); + data.l = tgt; + if(cb(d, R, RT)) return; + } + } + + function lotus_to_workbook(d/*:RawData*/, opts) { + switch(opts.type) { + case 'base64': return lotus_to_workbook_buf(s2a(Base64.decode(d)), opts); + case 'binary': return lotus_to_workbook_buf(s2a(d), opts); + case 'buffer': + case 'array': return lotus_to_workbook_buf(d, opts); + } + throw "Unsupported type " + opts.type; + } + + function lotus_to_workbook_buf(d,opts)/*:Workbook*/ { + if(!d) return d; + var o = opts || {}; + + var s = {}, n = "Sheet1", sidx = 0; + var sheets = {}, snames = [n]; + + var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; + + if(d[2] == 0x02) o.Enum = WK1Enum; + else if(d[2] == 0x1a) o.Enum = WK3Enum; + else if(d[2] == 0x0e) { o.Enum = WK3Enum; o.qpro = true; d.l = 0; } + else throw new Error("Unrecognized LOTUS BOF " + d[2]); + lotushopper(d, function(val, R, RT) { + if(d[2] == 0x02) switch(RT) { + case 0x00: + o.vers = val; + if(val >= 0x1000) o.qpro = true; + break; + case 0x06: refguess = val; break; /* RANGE */ + case 0x0F: /* LABEL */ + if(!opts.qpro) val[1].v = val[1].v.substr(1); + /* falls through */ + case 0x0D: /* INTEGER */ + case 0x0E: /* NUMBER */ + case 0x10: /* FORMULA */ + case 0x33: /* STRING */ + s[encode_cell(val[0])] = val[1]; + /* TODO: FORMAT */ + break; + } else switch(RT) { + case 0x16: /* LABEL16 */ + val[1].v = val[1].v.substr(1); + /* falls through */ + case 0x17: /* NUMBER17 */ + case 0x18: /* NUMBER18 */ + case 0x19: /* FORMULA19 */ + case 0x25: /* NUMBER25 */ + case 0x27: /* NUMBER27 */ + case 0x28: /* FORMULA28 */ + if(val[3] > sidx) { + s["!ref"] = encode_range(refguess); + sheets[n] = s; + s = {}; + refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; + sidx = val[3]; n = "Sheet" + (sidx + 1); + snames.push(n); + } + s[encode_cell(val[0])] = val[1]; + if(refguess.e.c < val[0].c) refguess.e.c = val[0].c; + if(refguess.e.r < val[0].r) refguess.e.r = val[0].r; + break; + default: break; + } + }, o); + + s["!ref"] = encode_range(refguess); + sheets[n] = s; + return { SheetNames: snames, Sheets:sheets }; + } + + function parse_RANGE(blob, length) { + var o = {s:{c:0,r:0},e:{c:0,r:0}}; + o.s.c = blob.read_shift(2); + o.s.r = blob.read_shift(2); + o.e.c = blob.read_shift(2); + o.e.r = blob.read_shift(2); + if(o.s.c == 0xFFFF) o.s.c = o.e.c = o.s.r = o.e.r = 0; + return o; + } + + function parse_cell(blob, length, opts) { + var o = [{c:0,r:0}, {t:'n',v:0}, 0]; + if(opts.qpro && opts.vers != 0x5120) { + o[0].c = blob.read_shift(1); + blob.l++; + o[0].r = blob.read_shift(2); + blob.l+=2; + } else { + o[2] = blob.read_shift(1); + o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2); + } + return o; + } + + function parse_LABEL(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + o[1].t = 's'; + if(opts.vers == 0x5120) { + blob.l++; + var len = blob.read_shift(1); + o[1].v = blob.read_shift(len, 'utf8'); + return o; + } + if(opts.qpro) blob.l++; + o[1].v = blob.read_shift(tgt - blob.l, 'cstr'); + return o; + } + + function parse_INTEGER(blob, length, opts) { + var o = parse_cell(blob, length, opts); + o[1].v = blob.read_shift(2, 'i'); + return o; + } + + function parse_NUMBER(blob, length, opts) { + var o = parse_cell(blob, length, opts); + o[1].v = blob.read_shift(8, 'f'); + return o; + } + + function parse_FORMULA(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + /* TODO: formula */ + o[1].v = blob.read_shift(8, 'f'); + if(opts.qpro) blob.l = tgt; + else { + var flen = blob.read_shift(2); + blob.l += flen; + } + return o; + } + + function parse_cell_3(blob, length) { + var o = [{c:0,r:0}, {t:'n',v:0}, 0]; + o[0].r = blob.read_shift(2); o[3] = blob[blob.l++]; o[0].c = blob[blob.l++]; + return o; + } + + function parse_LABEL_16(blob, length) { + var o = parse_cell_3(blob, length); + o[1].t = 's'; + o[1].v = blob.read_shift(length - 4, 'cstr'); + return o; + } + + function parse_NUMBER_18(blob, length) { + var o = parse_cell_3(blob, length); + o[1].v = blob.read_shift(2); + var v = o[1].v >> 1; + /* TODO: figure out all of the corner cases */ + if(o[1].v & 0x1) { + switch(v & 0x07) { + case 1: v = (v >> 3) * 500; break; + case 2: v = (v >> 3) / 20; break; + case 4: v = (v >> 3) / 2000; break; + case 6: v = (v >> 3) / 16; break; + case 7: v = (v >> 3) / 64; break; + default: throw "unknown NUMBER_18 encoding " + (v & 0x07); + } + } + o[1].v = v; + return o; + } + + function parse_NUMBER_17(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(4); + var v2 = blob.read_shift(4); + var e = blob.read_shift(2); + if(e == 0xFFFF) { o[1].v = 0; return o; } + var s = e & 0x8000; e = (e&0x7FFF) - 16446; + o[1].v = ((e > 0 ? (v2 << e) : (v2 >>> -e)) + (e > -32 ? (v1 << (e + 32)) : (v1 >>> -(e + 32)))); + return o; + } + + function parse_FORMULA_19(blob, length) { + var o = parse_NUMBER_17(blob, 14); + blob.l += length - 14; /* TODO: formula */ + return o; + } + + function parse_NUMBER_25(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(4); + o[1].v = v1 >> 6; + return o; + } + + function parse_NUMBER_27(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(8,'f'); + o[1].v = v1; + return o; + } + + function parse_FORMULA_28(blob, length) { + var o = parse_NUMBER_27(blob, 14); + blob.l += length - 10; /* TODO: formula */ + return o; + } + + var WK1Enum = { + /*::[*/0x0000/*::]*/: { n:"BOF", f:parseuint16 }, + /*::[*/0x0001/*::]*/: { n:"EOF", f:parsenoop }, + /*::[*/0x0002/*::]*/: { n: "CALCMODE", f:parsenoop }, + /*::[*/0x0003/*::]*/: { n:"CALCORDER", f:parsenoop }, + /*::[*/0x0004/*::]*/: { n:"SPLIT", f:parsenoop }, + /*::[*/0x0005/*::]*/: { n:"SYNC", f:parsenoop }, + /*::[*/0x0006/*::]*/: { n:"RANGE", f:parse_RANGE }, + /*::[*/0x0007/*::]*/: { n:"WINDOW1", f:parsenoop }, + /*::[*/0x0008/*::]*/: { n:"COLW1", f:parsenoop }, + /*::[*/0x0009/*::]*/: { n:"WINTWO", f:parsenoop }, + /*::[*/0x000A/*::]*/: { n:"COLW2", f:parsenoop }, + /*::[*/0x000B/*::]*/: { n:"NAME", f:parsenoop }, + /*::[*/0x000C/*::]*/: { n:"BLANK", f:parsenoop }, + /*::[*/0x000D/*::]*/: { n:"INTEGER", f:parse_INTEGER }, + /*::[*/0x000E/*::]*/: { n:"NUMBER", f:parse_NUMBER }, + /*::[*/0x000F/*::]*/: { n:"LABEL", f:parse_LABEL }, + /*::[*/0x0010/*::]*/: { n:"FORMULA", f:parse_FORMULA }, + /*::[*/0x0018/*::]*/: { n:"TABLE", f:parsenoop }, + /*::[*/0x0019/*::]*/: { n:"ORANGE", f:parsenoop }, + /*::[*/0x001A/*::]*/: { n:"PRANGE", f:parsenoop }, + /*::[*/0x001B/*::]*/: { n:"SRANGE", f:parsenoop }, + /*::[*/0x001C/*::]*/: { n:"FRANGE", f:parsenoop }, + /*::[*/0x001D/*::]*/: { n:"KRANGE1", f:parsenoop }, + /*::[*/0x0020/*::]*/: { n:"HRANGE", f:parsenoop }, + /*::[*/0x0023/*::]*/: { n:"KRANGE2", f:parsenoop }, + /*::[*/0x0024/*::]*/: { n:"PROTEC", f:parsenoop }, + /*::[*/0x0025/*::]*/: { n:"FOOTER", f:parsenoop }, + /*::[*/0x0026/*::]*/: { n:"HEADER", f:parsenoop }, + /*::[*/0x0027/*::]*/: { n:"SETUP", f:parsenoop }, + /*::[*/0x0028/*::]*/: { n:"MARGINS", f:parsenoop }, + /*::[*/0x0029/*::]*/: { n:"LABELFMT", f:parsenoop }, + /*::[*/0x002A/*::]*/: { n:"TITLES", f:parsenoop }, + /*::[*/0x002B/*::]*/: { n:"SHEETJS", f:parsenoop }, + /*::[*/0x002D/*::]*/: { n:"GRAPH", f:parsenoop }, + /*::[*/0x002E/*::]*/: { n:"NGRAPH", f:parsenoop }, + /*::[*/0x002F/*::]*/: { n:"CALCCOUNT", f:parsenoop }, + /*::[*/0x0030/*::]*/: { n:"UNFORMATTED", f:parsenoop }, + /*::[*/0x0031/*::]*/: { n:"CURSORW12", f:parsenoop }, + /*::[*/0x0032/*::]*/: { n:"WINDOW", f:parsenoop }, + /*::[*/0x0033/*::]*/: { n:"STRING", f:parse_LABEL }, + /*::[*/0x0037/*::]*/: { n:"PASSWORD", f:parsenoop }, + /*::[*/0x0038/*::]*/: { n:"LOCKED", f:parsenoop }, + /*::[*/0x003C/*::]*/: { n:"QUERY", f:parsenoop }, + /*::[*/0x003D/*::]*/: { n:"QUERYNAME", f:parsenoop }, + /*::[*/0x003E/*::]*/: { n:"PRINT", f:parsenoop }, + /*::[*/0x003F/*::]*/: { n:"PRINTNAME", f:parsenoop }, + /*::[*/0x0040/*::]*/: { n:"GRAPH2", f:parsenoop }, + /*::[*/0x0041/*::]*/: { n:"GRAPHNAME", f:parsenoop }, + /*::[*/0x0042/*::]*/: { n:"ZOOM", f:parsenoop }, + /*::[*/0x0043/*::]*/: { n:"SYMSPLIT", f:parsenoop }, + /*::[*/0x0044/*::]*/: { n:"NSROWS", f:parsenoop }, + /*::[*/0x0045/*::]*/: { n:"NSCOLS", f:parsenoop }, + /*::[*/0x0046/*::]*/: { n:"RULER", f:parsenoop }, + /*::[*/0x0047/*::]*/: { n:"NNAME", f:parsenoop }, + /*::[*/0x0048/*::]*/: { n:"ACOMM", f:parsenoop }, + /*::[*/0x0049/*::]*/: { n:"AMACRO", f:parsenoop }, + /*::[*/0x004A/*::]*/: { n:"PARSE", f:parsenoop }, + /*::[*/0x00FF/*::]*/: { n:"", f:parsenoop } + }; + + var WK3Enum = { + /*::[*/0x0000/*::]*/: { n:"BOF", f:parsenoop }, + /*::[*/0x0001/*::]*/: { n:"EOF", f:parsenoop }, + /*::[*/0x0003/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0004/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0005/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0006/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0007/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0009/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000a/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000b/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000c/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000e/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x000f/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0010/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0011/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0012/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0013/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0015/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0016/*::]*/: { n:"LABEL16", f:parse_LABEL_16}, + /*::[*/0x0017/*::]*/: { n:"NUMBER17", f:parse_NUMBER_17 }, + /*::[*/0x0018/*::]*/: { n:"NUMBER18", f:parse_NUMBER_18 }, + /*::[*/0x0019/*::]*/: { n:"FORMULA19", f:parse_FORMULA_19}, + /*::[*/0x001a/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001b/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001c/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001d/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001e/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x001f/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0021/*::]*/: { n:"??", f:parsenoop }, + /*::[*/0x0025/*::]*/: { n:"NUMBER25", f:parse_NUMBER_25 }, + /*::[*/0x0027/*::]*/: { n:"NUMBER27", f:parse_NUMBER_27 }, + /*::[*/0x0028/*::]*/: { n:"FORMULA28", f:parse_FORMULA_28 }, + /*::[*/0x00FF/*::]*/: { n:"", f:parsenoop } + }; + return { + to_workbook: lotus_to_workbook + }; +})(); /* 18.4.1 charset to codepage mapping */ var CS2CP = ({ /*::[*/0/*::]*/: 1252, /* ANSI */ @@ -12750,6 +13071,10 @@ if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/); else { if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find); + /* Quattro Pro 7-8 */ + else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); + /* Quattro Pro 9 */ + else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options); else throw new Error("Cannot find Workbook stream"); } @@ -14013,7 +14338,7 @@ var XLSRecordEnum = { /*::[*/0x08c5/*::]*/: { n:"ListCF", f:parsenoop }, /*::[*/0x08c6/*::]*/: { n:"FMQry", f:parsenoop }, /*::[*/0x08c7/*::]*/: { n:"FMSQry", f:parsenoop }, - /*::[*/0x08c8/*::]*/: { n:"PLV", f:parsenoop }, /* supposedly PLV for Excel 11 */ + /*::[*/0x08c8/*::]*/: { n:"PLV", f:parsenoop }, /*::[*/0x08c9/*::]*/: { n:"LnExt", f:parsenoop }, /*::[*/0x08ca/*::]*/: { n:"MkrExt", f:parsenoop }, /*::[*/0x08cb/*::]*/: { n:"CrtCoopt", f:parsenoop }, @@ -14187,6 +14512,7 @@ function parse_dom_table(table/*:HTMLElement*/, opts/*:?any*/)/*:Worksheet*/ { var o = {t:'s', v:v}; if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)}; ws[encode_cell({c:C, r:R})] = o; + if(range.e.c < C) range.e.c = C; C += CS; } } @@ -15124,6 +15450,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break; case 0xEF: return parse_xlml(d, o); case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break; + case 0x00: if(n[1] == 0x00 && n[2] >= 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break; case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o); } if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o); diff --git a/xlsx.js b/xlsx.js index c420af2..e214229 100644 --- a/xlsx.js +++ b/xlsx.js @@ -5116,11 +5116,12 @@ var PRN = (function() { if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t"; var R = 0, C = 0, v = 0; var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0; + str = str.replace(/\r\n/g, "\n"); for(;end < str.length;++end) switch((cc=str.charCodeAt(end))) { case 0x22: instr = !instr; break; case sepcc: case 0x0a: if(instr) break; var s = str.slice(start, end); - var cell = ({}) + var cell = ({}); if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); } else if(s == "TRUE") { cell.t = 'b'; cell.v = true; } else if(s == "FALSE") { cell.t = 'b'; cell.v = false; } @@ -5130,7 +5131,7 @@ var PRN = (function() { start = end+1; if(range.e.c < C) range.e.c = C; if(range.e.r < R) range.e.r = R; - if(cc == sepcc) ++C; else { C = 0; ++R; }; break; + if(cc == sepcc) ++C; else { C = 0; ++R; } break; default: break; } @@ -5180,6 +5181,326 @@ var PRN = (function() { }; })(); +var WK_ = (function() { + function lotushopper(data, cb, opts) { + if(!data) return; + prep_blob(data, data.l || 0); + var Enum = opts.Enum || WK1Enum; + while(data.l < data.length) { + var RT = data.read_shift(2); + var R = Enum[RT] || Enum[0xFF]; + var length = data.read_shift(2); + var tgt = data.l + length; + var d = R.f(data, length, opts); + data.l = tgt; + if(cb(d, R, RT)) return; + } + } + + function lotus_to_workbook(d, opts) { + switch(opts.type) { + case 'base64': return lotus_to_workbook_buf(s2a(Base64.decode(d)), opts); + case 'binary': return lotus_to_workbook_buf(s2a(d), opts); + case 'buffer': + case 'array': return lotus_to_workbook_buf(d, opts); + } + throw "Unsupported type " + opts.type; + } + + function lotus_to_workbook_buf(d,opts) { + if(!d) return d; + var o = opts || {}; + + var s = {}, n = "Sheet1", sidx = 0; + var sheets = {}, snames = [n]; + + var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; + + if(d[2] == 0x02) o.Enum = WK1Enum; + else if(d[2] == 0x1a) o.Enum = WK3Enum; + else if(d[2] == 0x0e) { o.Enum = WK3Enum; o.qpro = true; d.l = 0; } + else throw new Error("Unrecognized LOTUS BOF " + d[2]); + lotushopper(d, function(val, R, RT) { + if(d[2] == 0x02) switch(RT) { + case 0x00: + o.vers = val; + if(val >= 0x1000) o.qpro = true; + break; + case 0x06: refguess = val; break; /* RANGE */ + case 0x0F: /* LABEL */ + if(!opts.qpro) val[1].v = val[1].v.substr(1); + /* falls through */ + case 0x0D: /* INTEGER */ + case 0x0E: /* NUMBER */ + case 0x10: /* FORMULA */ + case 0x33: /* STRING */ + s[encode_cell(val[0])] = val[1]; + /* TODO: FORMAT */ + break; + } else switch(RT) { + case 0x16: /* LABEL16 */ + val[1].v = val[1].v.substr(1); + /* falls through */ + case 0x17: /* NUMBER17 */ + case 0x18: /* NUMBER18 */ + case 0x19: /* FORMULA19 */ + case 0x25: /* NUMBER25 */ + case 0x27: /* NUMBER27 */ + case 0x28: /* FORMULA28 */ + if(val[3] > sidx) { + s["!ref"] = encode_range(refguess); + sheets[n] = s; + s = {}; + refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; + sidx = val[3]; n = "Sheet" + (sidx + 1); + snames.push(n); + } + s[encode_cell(val[0])] = val[1]; + if(refguess.e.c < val[0].c) refguess.e.c = val[0].c; + if(refguess.e.r < val[0].r) refguess.e.r = val[0].r; + break; + default: break; + } + }, o); + + s["!ref"] = encode_range(refguess); + sheets[n] = s; + return { SheetNames: snames, Sheets:sheets }; + } + + function parse_RANGE(blob, length) { + var o = {s:{c:0,r:0},e:{c:0,r:0}}; + o.s.c = blob.read_shift(2); + o.s.r = blob.read_shift(2); + o.e.c = blob.read_shift(2); + o.e.r = blob.read_shift(2); + if(o.s.c == 0xFFFF) o.s.c = o.e.c = o.s.r = o.e.r = 0; + return o; + } + + function parse_cell(blob, length, opts) { + var o = [{c:0,r:0}, {t:'n',v:0}, 0]; + if(opts.qpro && opts.vers != 0x5120) { + o[0].c = blob.read_shift(1); + blob.l++; + o[0].r = blob.read_shift(2); + blob.l+=2; + } else { + o[2] = blob.read_shift(1); + o[0].c = blob.read_shift(2); o[0].r = blob.read_shift(2); + } + return o; + } + + function parse_LABEL(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + o[1].t = 's'; + if(opts.vers == 0x5120) { + blob.l++; + var len = blob.read_shift(1); + o[1].v = blob.read_shift(len, 'utf8'); + return o; + } + if(opts.qpro) blob.l++; + o[1].v = blob.read_shift(tgt - blob.l, 'cstr'); + return o; + } + + function parse_INTEGER(blob, length, opts) { + var o = parse_cell(blob, length, opts); + o[1].v = blob.read_shift(2, 'i'); + return o; + } + + function parse_NUMBER(blob, length, opts) { + var o = parse_cell(blob, length, opts); + o[1].v = blob.read_shift(8, 'f'); + return o; + } + + function parse_FORMULA(blob, length, opts) { + var tgt = blob.l + length; + var o = parse_cell(blob, length, opts); + /* TODO: formula */ + o[1].v = blob.read_shift(8, 'f'); + if(opts.qpro) blob.l = tgt; + else { + var flen = blob.read_shift(2); + blob.l += flen; + } + return o; + } + + function parse_cell_3(blob, length) { + var o = [{c:0,r:0}, {t:'n',v:0}, 0]; + o[0].r = blob.read_shift(2); o[3] = blob[blob.l++]; o[0].c = blob[blob.l++]; + return o; + } + + function parse_LABEL_16(blob, length) { + var o = parse_cell_3(blob, length); + o[1].t = 's'; + o[1].v = blob.read_shift(length - 4, 'cstr'); + return o; + } + + function parse_NUMBER_18(blob, length) { + var o = parse_cell_3(blob, length); + o[1].v = blob.read_shift(2); + var v = o[1].v >> 1; + /* TODO: figure out all of the corner cases */ + if(o[1].v & 0x1) { + switch(v & 0x07) { + case 1: v = (v >> 3) * 500; break; + case 2: v = (v >> 3) / 20; break; + case 4: v = (v >> 3) / 2000; break; + case 6: v = (v >> 3) / 16; break; + case 7: v = (v >> 3) / 64; break; + default: throw "unknown NUMBER_18 encoding " + (v & 0x07); + } + } + o[1].v = v; + return o; + } + + function parse_NUMBER_17(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(4); + var v2 = blob.read_shift(4); + var e = blob.read_shift(2); + if(e == 0xFFFF) { o[1].v = 0; return o; } + var s = e & 0x8000; e = (e&0x7FFF) - 16446; + o[1].v = ((e > 0 ? (v2 << e) : (v2 >>> -e)) + (e > -32 ? (v1 << (e + 32)) : (v1 >>> -(e + 32)))); + return o; + } + + function parse_FORMULA_19(blob, length) { + var o = parse_NUMBER_17(blob, 14); + blob.l += length - 14; /* TODO: formula */ + return o; + } + + function parse_NUMBER_25(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(4); + o[1].v = v1 >> 6; + return o; + } + + function parse_NUMBER_27(blob, length) { + var o = parse_cell_3(blob, length); + var v1 = blob.read_shift(8,'f'); + o[1].v = v1; + return o; + } + + function parse_FORMULA_28(blob, length) { + var o = parse_NUMBER_27(blob, 14); + blob.l += length - 10; /* TODO: formula */ + return o; + } + + var WK1Enum = { +0x0000: { n:"BOF", f:parseuint16 }, +0x0001: { n:"EOF", f:parsenoop }, +0x0002: { n: "CALCMODE", f:parsenoop }, +0x0003: { n:"CALCORDER", f:parsenoop }, +0x0004: { n:"SPLIT", f:parsenoop }, +0x0005: { n:"SYNC", f:parsenoop }, +0x0006: { n:"RANGE", f:parse_RANGE }, +0x0007: { n:"WINDOW1", f:parsenoop }, +0x0008: { n:"COLW1", f:parsenoop }, +0x0009: { n:"WINTWO", f:parsenoop }, +0x000A: { n:"COLW2", f:parsenoop }, +0x000B: { n:"NAME", f:parsenoop }, +0x000C: { n:"BLANK", f:parsenoop }, +0x000D: { n:"INTEGER", f:parse_INTEGER }, +0x000E: { n:"NUMBER", f:parse_NUMBER }, +0x000F: { n:"LABEL", f:parse_LABEL }, +0x0010: { n:"FORMULA", f:parse_FORMULA }, +0x0018: { n:"TABLE", f:parsenoop }, +0x0019: { n:"ORANGE", f:parsenoop }, +0x001A: { n:"PRANGE", f:parsenoop }, +0x001B: { n:"SRANGE", f:parsenoop }, +0x001C: { n:"FRANGE", f:parsenoop }, +0x001D: { n:"KRANGE1", f:parsenoop }, +0x0020: { n:"HRANGE", f:parsenoop }, +0x0023: { n:"KRANGE2", f:parsenoop }, +0x0024: { n:"PROTEC", f:parsenoop }, +0x0025: { n:"FOOTER", f:parsenoop }, +0x0026: { n:"HEADER", f:parsenoop }, +0x0027: { n:"SETUP", f:parsenoop }, +0x0028: { n:"MARGINS", f:parsenoop }, +0x0029: { n:"LABELFMT", f:parsenoop }, +0x002A: { n:"TITLES", f:parsenoop }, +0x002B: { n:"SHEETJS", f:parsenoop }, +0x002D: { n:"GRAPH", f:parsenoop }, +0x002E: { n:"NGRAPH", f:parsenoop }, +0x002F: { n:"CALCCOUNT", f:parsenoop }, +0x0030: { n:"UNFORMATTED", f:parsenoop }, +0x0031: { n:"CURSORW12", f:parsenoop }, +0x0032: { n:"WINDOW", f:parsenoop }, +0x0033: { n:"STRING", f:parse_LABEL }, +0x0037: { n:"PASSWORD", f:parsenoop }, +0x0038: { n:"LOCKED", f:parsenoop }, +0x003C: { n:"QUERY", f:parsenoop }, +0x003D: { n:"QUERYNAME", f:parsenoop }, +0x003E: { n:"PRINT", f:parsenoop }, +0x003F: { n:"PRINTNAME", f:parsenoop }, +0x0040: { n:"GRAPH2", f:parsenoop }, +0x0041: { n:"GRAPHNAME", f:parsenoop }, +0x0042: { n:"ZOOM", f:parsenoop }, +0x0043: { n:"SYMSPLIT", f:parsenoop }, +0x0044: { n:"NSROWS", f:parsenoop }, +0x0045: { n:"NSCOLS", f:parsenoop }, +0x0046: { n:"RULER", f:parsenoop }, +0x0047: { n:"NNAME", f:parsenoop }, +0x0048: { n:"ACOMM", f:parsenoop }, +0x0049: { n:"AMACRO", f:parsenoop }, +0x004A: { n:"PARSE", f:parsenoop }, +0x00FF: { n:"", f:parsenoop } + }; + + var WK3Enum = { +0x0000: { n:"BOF", f:parsenoop }, +0x0001: { n:"EOF", f:parsenoop }, +0x0003: { n:"??", f:parsenoop }, +0x0004: { n:"??", f:parsenoop }, +0x0005: { n:"??", f:parsenoop }, +0x0006: { n:"??", f:parsenoop }, +0x0007: { n:"??", f:parsenoop }, +0x0009: { n:"??", f:parsenoop }, +0x000a: { n:"??", f:parsenoop }, +0x000b: { n:"??", f:parsenoop }, +0x000c: { n:"??", f:parsenoop }, +0x000e: { n:"??", f:parsenoop }, +0x000f: { n:"??", f:parsenoop }, +0x0010: { n:"??", f:parsenoop }, +0x0011: { n:"??", f:parsenoop }, +0x0012: { n:"??", f:parsenoop }, +0x0013: { n:"??", f:parsenoop }, +0x0015: { n:"??", f:parsenoop }, +0x0016: { n:"LABEL16", f:parse_LABEL_16}, +0x0017: { n:"NUMBER17", f:parse_NUMBER_17 }, +0x0018: { n:"NUMBER18", f:parse_NUMBER_18 }, +0x0019: { n:"FORMULA19", f:parse_FORMULA_19}, +0x001a: { n:"??", f:parsenoop }, +0x001b: { n:"??", f:parsenoop }, +0x001c: { n:"??", f:parsenoop }, +0x001d: { n:"??", f:parsenoop }, +0x001e: { n:"??", f:parsenoop }, +0x001f: { n:"??", f:parsenoop }, +0x0021: { n:"??", f:parsenoop }, +0x0025: { n:"NUMBER25", f:parse_NUMBER_25 }, +0x0027: { n:"NUMBER27", f:parse_NUMBER_27 }, +0x0028: { n:"FORMULA28", f:parse_FORMULA_28 }, +0x00FF: { n:"", f:parsenoop } + }; + return { + to_workbook: lotus_to_workbook + }; +})(); /* 18.4.1 charset to codepage mapping */ var CS2CP = ({ 0: 1252, /* ANSI */ @@ -12689,6 +13010,10 @@ if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}); else { if(Workbook) WorkbookP = parse_workbook(Workbook.content, options, !!Workbook.find); + /* Quattro Pro 7-8 */ + else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); + /* Quattro Pro 9 */ + else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options); else throw new Error("Cannot find Workbook stream"); } @@ -13952,7 +14277,7 @@ var XLSRecordEnum = { 0x08c5: { n:"ListCF", f:parsenoop }, 0x08c6: { n:"FMQry", f:parsenoop }, 0x08c7: { n:"FMSQry", f:parsenoop }, -0x08c8: { n:"PLV", f:parsenoop }, /* supposedly PLV for Excel 11 */ +0x08c8: { n:"PLV", f:parsenoop }, 0x08c9: { n:"LnExt", f:parsenoop }, 0x08ca: { n:"MkrExt", f:parsenoop }, 0x08cb: { n:"CrtCoopt", f:parsenoop }, @@ -14126,6 +14451,7 @@ function parse_dom_table(table, opts) { var o = {t:'s', v:v}; if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)}; ws[encode_cell({c:C, r:R})] = o; + if(range.e.c < C) range.e.c = C; C += CS; } } @@ -15059,6 +15385,7 @@ function readSync(data, opts) { case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break; case 0xEF: return parse_xlml(d, o); case 0xFF: if(n[1] == 0xFE){ return read_utf16(d, o); } break; + case 0x00: if(n[1] == 0x00 && n[2] >= 0x02 && n[3] == 0x00) return WK_.to_workbook(d, o); break; case 0x03: case 0x83: case 0x8B: return DBF.to_workbook(d, o); } if(n[2] <= 12 && n[3] <= 31) return DBF.to_workbook(d, o);