From 81b7614e45dec3c6793392940ea08746d34191cb Mon Sep 17 00:00:00 2001 From: SheetJS Date: Tue, 10 Aug 2021 22:53:38 -0400 Subject: [PATCH] SYLK shared formulae --- .gitattributes | 3 + README.md | 26 +++---- bits/40_harb.js | 155 +++++++++++++++++++++++-------------- bits/87_read.js | 5 +- demos/function/README.md | 4 +- dist/cpexcel.js | 7 +- docbits/00_intro.md | 3 +- docbits/12_optional.md | 2 +- docbits/15_phil.md | 2 +- docbits/20_import.md | 6 +- docbits/25_manip.md | 2 +- docbits/50_csf.md | 2 +- docbits/90_test.md | 7 +- misc/docs/README.md | 26 +++---- package.json | 2 +- xlsx.flow.js | 162 ++++++++++++++++++++++++--------------- xlsx.js | 160 ++++++++++++++++++++++++-------------- xlsx.mini.flow.js | 162 ++++++++++++++++++++++++--------------- xlsx.mini.js | 162 ++++++++++++++++++++++++--------------- 19 files changed, 550 insertions(+), 348 deletions(-) diff --git a/.gitattributes b/.gitattributes index 62dcdc1..f26ed36 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,6 @@ +*.*s linguist-documentation +*.html linguist-documentation + *.md text eol=lf bits/*.js text eol=lf test.js text eol=lf diff --git a/README.md b/README.md index ba39783..e64250b 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# [SheetJS js-xlsx](http://sheetjs.com) +# [SheetJS](http://sheetjs.com) Parser and writer for various spreadsheet formats. Pure-JS cleanroom implementation from official specifications, related documents, and test files. @@ -36,7 +36,6 @@ enhancements, additional features like styling, and dedicated support. [![Build Status](https://saucelabs.com/browser-matrix/sheetjs.svg)](https://saucelabs.com/u/sheetjs) -[![Build Status](https://travis-ci.org/SheetJS/sheetjs.svg?branch=master)](https://travis-ci.org/SheetJS/sheetjs) [![Build Status](https://semaphoreci.com/api/v1/sheetjs/sheetjs/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/sheetjs) [![Coverage Status](http://img.shields.io/coveralls/SheetJS/sheetjs/master.svg)](https://coveralls.io/r/SheetJS/sheetjs?branch=master) [![Dependencies Status](https://david-dm.org/sheetjs/sheetjs/status.svg)](https://david-dm.org/sheetjs/sheetjs) @@ -258,7 +257,7 @@ be configured to remove support with `resolve.alias`: ### ECMAScript 5 Compatibility Since the library uses functions like `Array#forEach`, older browsers require -[shims to provide missing functions](http://oss.sheetjs.com/js-xlsx/shim.js). +[shims to provide missing functions](http://oss.sheetjs.com/sheetjs/shim.js). To use the shim, add the shim before the script tag that loads `xlsx.js`: @@ -283,7 +282,7 @@ Third-party libraries either supported one format, or they involved a separate set of classes for each supported file type. Even though XLSB was introduced in Excel 2007, nothing outside of SheetJS or Excel supported the format. -To promote a format-agnostic view, js-xlsx starts from a pure-JS representation +To promote a format-agnostic view, SheetJS starts from a pure-JS representation that we call the ["Common Spreadsheet Format"](#common-spreadsheet-format). Emphasizing a uniform object representation enables new features like format conversion (reading an XLSX template and saving as XLS) and circumvents the mess @@ -389,7 +388,7 @@ var workbook = XLSX.read(htmlstr, {type:'string'}); Browser download file (ajax) (click to show) Note: for a more complete example that works in older browsers, check the demo -at . The [`xhr` demo](demos/xhr/) +at . The [`xhr` demo](demos/xhr/) includes more examples with `XMLHttpRequest` and `fetch`. ```js @@ -465,7 +464,7 @@ More specialized cases, including mobile app file processing, are covered in the ### Parsing Examples -- HTML5 File API / Base64 Text / Web Workers +- HTML5 File API / Base64 Text / Web Workers Note that older versions of IE do not support HTML5 File API, so the Base64 mode is used for testing. @@ -489,7 +488,7 @@ On Windows XP and up you can get the Base64 encoding using `certutil`: -- XMLHttpRequest +- XMLHttpRequest ### Streaming Read @@ -627,7 +626,7 @@ error if the workbook is empty. - read + modify + write files -- node +- node The node version installs a command line tool `xlsx` which can read spreadsheet files and output the contents in various formats. The source is available at @@ -882,7 +881,7 @@ Utilities are available in the `XLSX.utils` object and are described in the ## Common Spreadsheet Format -js-xlsx conforms to the Common Spreadsheet Format (CSF): +SheetJS conforms to the Common Spreadsheet Format (CSF): ### General Structures @@ -1605,7 +1604,7 @@ The visibility setting is stored in the `Hidden` property of sheet props array. | 1 | Hidden | | 2 | Very Hidden | -With : +With : ```js > wb.Workbook.Sheets.map(function(x) { return [x.name, x.Hidden] }) @@ -2728,14 +2727,11 @@ $ open -a Chromium.app http://localhost:8000/stress.html - Safari 6+ (iOS and Desktop) - Edge 13+, FF 18+, and Opera 12+ -Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links: +Tests utilize the mocha testing framework. - - for XLSX module in nodejs - - for XLSX module in nodejs - - for XLS\* modules - for XLS\* modules using Sauce Labs -The Travis-CI test suite also includes tests for various time zones. To change +The test suite also includes tests for various time zones. To change the timezone locally, set the TZ environment variable: ```bash diff --git a/bits/40_harb.js b/bits/40_harb.js index 4deb49b..849d785 100644 --- a/bits/40_harb.js +++ b/bits/40_harb.js @@ -35,7 +35,9 @@ var dbf_codepage_map = { /*::[*/0x4D/*::]*/: 936, /*::[*/0x4E/*::]*/: 949, /*::[*/0x4F/*::]*/: 950, /*::[*/0x50/*::]*/: 874, /*::[*/0x57/*::]*/: 1252, /*::[*/0x58/*::]*/: 1252, - /*::[*/0x59/*::]*/: 1252, + /*::[*/0x59/*::]*/: 1252, /*::[*/0x6C/*::]*/: 863, + /*::[*/0x86/*::]*/: 737, /*::[*/0x87/*::]*/: 852, + /*::[*/0x88/*::]*/: 857, /*::[*/0xCC/*::]*/: 1257, /*::[*/0xFF/*::]*/: 16969 }; @@ -59,7 +61,6 @@ var DBF_SUPPORTED_VERSIONS = [0x02, 0x03, 0x30, 0x31, 0x83, 0x8B, 0x8C, 0xF5]; /* TODO: find an actual specification */ function dbf_to_aoa(buf, opts)/*:AOA*/ { var out/*:AOA*/ = []; - /* TODO: browser based */ var d/*:Block*/ = (new_raw_buf(1)/*:any*/); switch(opts.type) { case 'base64': d = s2a(Base64.decode(buf)); break; @@ -68,44 +69,55 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { case 'array': d = buf; break; } prep_blob(d, 0); + /* header */ var ft = d.read_shift(1); - var memo = false; + var memo = !!(ft & 0x88); var vfp = false, l7 = false; switch(ft) { - case 0x02: case 0x03: break; - case 0x30: vfp = true; memo = true; break; - case 0x31: vfp = true; break; - case 0x83: memo = true; break; - case 0x8B: memo = true; break; - case 0x8C: memo = true; l7 = true; break; - case 0xF5: memo = true; break; + case 0x02: break; // dBASE II + case 0x03: break; // dBASE III + case 0x30: vfp = true; memo = true; break; // VFP + case 0x31: vfp = true; memo = true; break; // VFP with autoincrement + // 0x43 dBASE IV SQL table files + // 0x63 dBASE IV SQL system files + case 0x83: break; // dBASE III with memo + case 0x8B: break; // dBASE IV with memo + case 0x8C: l7 = true; break; // dBASE Level 7 with memo + // case 0xCB dBASE IV SQL table files with memo + case 0xF5: break; // FoxPro 2.x with memo + // case 0xFB FoxBASE default: throw new Error("DBF Unsupported Version: " + ft.toString(16)); } - var /*filedate = new Date(),*/ nrow = 0, fpos = 0; + + var nrow = 0, fpos = 0x0209; if(ft == 0x02) nrow = d.read_shift(2); - /*filedate = new Date(d.read_shift(1) + 1900, d.read_shift(1) - 1, d.read_shift(1));*/d.l += 3; - if(ft != 0x02) nrow = d.read_shift(4); if(nrow > 1048576) nrow = 1e6; - if(ft != 0x02) fpos = d.read_shift(2); - var rlen = d.read_shift(2); + d.l += 3; // dBASE II stores DDMMYY date, others use YYMMDD + if(ft != 0x02) nrow = d.read_shift(4); + if(nrow > 1048576) nrow = 1e6; + + if(ft != 0x02) fpos = d.read_shift(2); // header length + var rlen = d.read_shift(2); // record length var /*flags = 0,*/ current_cp = opts.codepage || 1252; - if(ft != 0x02) { - d.l+=16; - /*flags = */d.read_shift(1); - //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); + if(ft != 0x02) { // 20 reserved bytes + d.l+=16; + /*flags = */d.read_shift(1); + //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); - /* codepage present in FoxPro */ - if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; - d.l+=1; + /* codepage present in FoxPro and dBASE Level 7 */ + if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; + d.l+=1; - d.l+=2; + d.l+=2; } - if(l7) d.l += 36; + if(l7) d.l += 36; // Level 7: 32 byte "Language driver name", 4 byte reserved + /*:: type DBFField = { name:string; len:number; type:string; } */ var fields/*:Array*/ = [], field/*:DBFField*/ = ({}/*:any*/); - var hend = fpos - 10 - (vfp ? 264 : 0), ww = l7 ? 32 : 11; - while(ft == 0x02 ? d.l < d.length && d[d.l] != 0x0d: d.l < hend) { + var hend = Math.min(d.length, (ft == 0x02 ? 0x209 : (fpos - 10 - (vfp ? 264 : 0)))); + var ww = l7 ? 32 : 11; + while(d.l < hend && d[d.l] != 0x0d) { field = ({}/*:any*/); field.name = cptable.utils.decode(current_cp, d.slice(d.l, d.l+ww)).replace(/[\u0000\r\n].*$/g,""); d.l += ww; @@ -117,42 +129,45 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { if(field.name.length) fields.push(field); if(ft != 0x02) d.l += l7 ? 13 : 14; switch(field.type) { - case 'B': // VFP Double + case 'B': // Double (VFP) / Binary (dBASE L7) if((!vfp || field.len != 8) && opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'G': // General - case 'P': // Picture + case 'G': // General (FoxPro and dBASE L7) + case 'P': // Picture (FoxPro and dBASE L7) if(opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'C': // character - case 'D': // date - case 'F': // floating point - case 'I': // long - case 'L': // boolean - case 'M': // memo - case 'N': // number - case 'O': // double - case 'T': // datetime - case 'Y': // currency - case '0': // VFP _NullFlags - case '@': // timestamp - case '+': // autoincrement + case '+': // Autoincrement (dBASE L7 only) + case '0': // _NullFlags (VFP only) + case '@': // Timestamp (dBASE L7 only) + case 'C': // Character (dBASE II) + case 'D': // Date (dBASE III) + case 'F': // Float (dBASE IV) + case 'I': // Long (VFP and dBASE L7) + case 'L': // Logical (dBASE II) + case 'M': // Memo (dBASE III) + case 'N': // Number (dBASE II) + case 'O': // Double (dBASE L7 only) + case 'T': // Datetime (VFP only) + case 'Y': // Currency (VFP only) break; default: throw new Error('Unknown Field Type: ' + field.type); } } + if(d[d.l] !== 0x0D) d.l = fpos-1; - else if(ft == 0x02) d.l = 0x209; - if(ft != 0x02) { - if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); - d.l = fpos; - } + if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); + d.l = fpos; + /* data */ var R = 0, C = 0; out[0] = []; for(C = 0; C != fields.length; ++C) out[0][C] = fields[C].name; while(nrow-- > 0) { - if(d[d.l] === 0x2A) { d.l+=rlen; continue; } + if(d[d.l] === 0x2A) { + // TODO: record marked as deleted -- create a hidden row? + d.l+=rlen; + continue; + } ++d.l; out[++R] = []; C = 0; for(C = 0; C != fields.length; ++C) { @@ -161,8 +176,8 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { var s = cptable.utils.decode(current_cp, dd); switch(fields[C].type) { case 'C': - out[R][C] = cptable.utils.decode(current_cp, dd); - out[R][C] = out[R][C].trim(); + // NOTE: it is conventional to write ' / / ' for empty dates + if(s.trim().length) out[R][C] = s.replace(/\s+$/,""); break; case 'D': if(s.length === 8) out[R][C] = new Date(+s.slice(0,4), +s.slice(4,6)-1, +s.slice(6,8)); @@ -170,18 +185,24 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { break; case 'F': out[R][C] = parseFloat(s.trim()); break; case '+': case 'I': out[R][C] = l7 ? dd.read_shift(-4, 'i') ^ 0x80000000 : dd.read_shift(4, 'i'); break; - case 'L': switch(s.toUpperCase()) { + case 'L': switch(s.trim().toUpperCase()) { case 'Y': case 'T': out[R][C] = true; break; case 'N': case 'F': out[R][C] = false; break; - case ' ': case '?': out[R][C] = false; break; /* NOTE: technically uninitialized */ + case '': case '?': break; default: throw new Error("DBF Unrecognized L:|" + s + "|"); } break; case 'M': /* TODO: handle memo files */ if(!memo) throw new Error("DBF Unexpected MEMO for type " + ft.toString(16)); out[R][C] = "##MEMO##" + (l7 ? parseInt(s.trim(), 10): dd.read_shift(4)); break; - case 'N': out[R][C] = +s.replace(/\u0000/g,"").trim(); break; - case '@': out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); break; + case 'N': + s = s.replace(/\u0000/g,"").trim(); + // NOTE: dBASE II interprets " . " as 0 + if(s && s != ".") out[R][C] = +s || 0; break; + case '@': + // NOTE: dBASE specs appear to be incorrect + out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); + break; case 'T': out[R][C] = new Date((dd.read_shift(4) - 0x253D8C) * 0x5265C00 + dd.read_shift(4)); break; case 'Y': out[R][C] = dd.read_shift(4,'i')/1e4; break; case 'O': out[R][C] = -dd.read_shift(-8, 'f'); break; @@ -374,7 +395,7 @@ var SYLK = (function() { formats.push(rstr.slice(3).replace(/;;/g, ";")); break; case 'C': - var C_seen_K = false, C_seen_X = false; + var C_seen_K = false, C_seen_X = false, C_seen_S = false, C_seen_E = false, _R = -1, _C = -1; for(rj=1; rj -1 && arr[_R][_C]; + if(!shrbase || !shrbase[1]) throw new Error("SYLK shared formula cannot find base"); + arr[R][C][1] = shift_formula_str(shrbase[1], {r: R - _R, c: C - _C}); + } break; case 'F': var F_seen = 0; @@ -736,9 +775,9 @@ var ETH = (function() { var PRN = (function() { function set_text_arr(data/*:string*/, arr/*:AOA*/, R/*:number*/, C/*:number*/, o/*:any*/) { if(o.raw) arr[R][C] = data; + else if(data === ""){/* empty */} else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(data === ""){/* empty */} else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; @@ -897,7 +936,7 @@ var PRN = (function() { default: throw new Error("Unrecognized type " + opts.type); } if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3)); - else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str)); + else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(28591,str)); if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts); return prn_to_sheet_str(str, opts); } diff --git a/bits/87_read.js b/bits/87_read.js index d3b2d84..963e9c2 100644 --- a/bits/87_read.js +++ b/bits/87_read.js @@ -85,7 +85,10 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0xD0: if(n[1] === 0xCF && n[2] === 0x11 && n[3] === 0xE0 && n[4] === 0xA1 && n[5] === 0xB1 && n[6] === 0x1A && n[7] === 0xE1) return read_cfb(CFB.read(d, o), o); break; case 0x09: if(n[1] <= 0x04) return parse_xlscfb(d, o); break; case 0x3C: return parse_xlml(d, o); - case 0x49: if(n[1] === 0x44) return read_wb_ID(d, o); break; + case 0x49: + if(n[1] === 0x49 && n[2] === 0x2a && n[3] === 0x00) throw new Error("TIFF Image File is not a spreadsheet"); + if(n[1] === 0x44) return read_wb_ID(d, o); + break; case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break; case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str); case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str); diff --git a/demos/function/README.md b/demos/function/README.md index bee3fa5..09038ed 100644 --- a/demos/function/README.md +++ b/demos/function/README.md @@ -116,8 +116,8 @@ in the ZIP file. When reading form data, be sure to include the necessary binary types on the AWS API Gateway console. To do this, navigate to the "Binary Media Types" section in the settings tab of the console. -For reading a file, you may need to add "multipart/form-data". -For downloading a file, you may need to add "application/vnd.ms-excel". +For reading a file, you may need to add `"multipart/form-data"`. +For downloading a file, you may need to add `"application/vnd.ms-excel"`. #### Azure Functions diff --git a/dist/cpexcel.js b/dist/cpexcel.js index 6030cee..4f195eb 100644 --- a/dist/cpexcel.js +++ b/dist/cpexcel.js @@ -1,6 +1,6 @@ -/* cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */ +/*! cpexcel.js (C) 2013-present SheetJS -- http://sheetjs.com */ /*jshint -W100 */ -var cptable = {version:"1.14.0"}; +var cptable = {version:"1.15.0"}; cptable[437] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàåçêëèïîìÄÅÉæÆôöòûùÿÖÜ¢£¥₧ƒáíóúñѪº¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })(); cptable[620] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÇüéâäàąçêëèïîćÄĄĘęłôöĆûùŚÖÜ¢Ł¥śƒŹŻóÓńŃźż¿⌐¬½¼¡«»░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀αßΓπΣσµτΦΘΩδ∞φε∩≡±≥≤⌠⌡÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })(); cptable[737] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρσςτυφχψ░▒▓│┤╡╢╖╕╣║╗╝╜╛┐└┴┬├─┼╞╟╚╔╩╦╠═╬╧╨╤╥╙╘╒╓╫╪┘┌█▄▌▐▀ωάέήϊίόύϋώΆΈΉΊΌΎΏ±≥≤ΪΫ÷≈°∙·√ⁿ²■ ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })(); @@ -973,9 +973,10 @@ return {"enc": e, "dec": d }; })(); cptable[10029] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄĀāÉĄÖÜáąČäčĆć鏟ĎíďĒēĖóėôöõúĚěü†°Ę£§•¶ß®©™ę¨≠ģĮįĪ≤≥īĶ∂∑łĻļĽľĹĺŅņѬ√ńŇ∆«»… ňŐÕőŌ–—“”‘’÷◊ōŔŕŘ‹›řŖŗŠ‚„šŚśÁŤťÍŽžŪÓÔūŮÚůŰűŲųÝýķŻŁżĢˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })(); cptable[10079] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûüÝ°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸ⁄¤ÐðÞþý·‚„‰ÂÊÁËÈÍÎÏÌÓÔ�ÒÚÛÙıˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })(); cptable[10081] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~ÄÅÇÉÑÖÜáàâäãåçéèêëíìîïñóòôöõúùûü†°¢£§•¶ß®©™´¨≠ÆØ∞±≤≥¥µ∂∑∏π∫ªºΩæø¿¡¬√ƒ≈∆«»… ÀÃÕŒœ–—“”‘’÷◊ÿŸĞğİıŞş‡·‚„‰ÂÊÁËÈÍÎÏÌÓÔ�ÒÚÛÙ�ˆ˜¯˘˙˚¸˝˛ˇ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })(); +cptable[28591] = (function(){ var d = "\u0000\u0001\u0002\u0003\u0004\u0005\u0006\u0007\b\t\n\u000b\f\r\u000e\u000f\u0010\u0011\u0012\u0013\u0014\u0015\u0016\u0017\u0018\u0019\u001a\u001b\u001c\u001d\u001e\u001f !\"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_`abcdefghijklmnopqrstuvwxyz{|}~€‚ƒ„…†‡ˆ‰Š‹ŒŽ‘’“”•–—˜™š›œžŸ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ", D = [], e = {}; for(var i=0;i!=d.length;++i) { if(d.charCodeAt(i) !== 0xFFFD) e[d.charAt(i)] = i; D[i] = d.charAt(i); } return {"enc": e, "dec": D }; })(); // eslint-disable-next-line no-undef if (typeof module !== 'undefined' && module.exports && typeof DO_NOT_EXPORT_CODEPAGE === 'undefined') module.exports = cptable; -/* cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */ +/*! cputils.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* vim: set ft=javascript: */ /*jshint newcap: false */ (function(root, factory) { diff --git a/docbits/00_intro.md b/docbits/00_intro.md index bc9dd0c..d4de4f3 100644 --- a/docbits/00_intro.md +++ b/docbits/00_intro.md @@ -1,4 +1,4 @@ -# [SheetJS js-xlsx](http://sheetjs.com) +# [SheetJS](http://sheetjs.com) Parser and writer for various spreadsheet formats. Pure-JS cleanroom implementation from official specifications, related documents, and test files. @@ -36,7 +36,6 @@ enhancements, additional features like styling, and dedicated support. [![Build Status](https://saucelabs.com/browser-matrix/sheetjs.svg)](https://saucelabs.com/u/sheetjs) -[![Build Status](https://travis-ci.org/SheetJS/sheetjs.svg?branch=master)](https://travis-ci.org/SheetJS/sheetjs) [![Build Status](https://semaphoreci.com/api/v1/sheetjs/sheetjs/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/sheetjs) [![Coverage Status](http://img.shields.io/coveralls/SheetJS/sheetjs/master.svg)](https://coveralls.io/r/SheetJS/sheetjs?branch=master) [![Dependencies Status](https://david-dm.org/sheetjs/sheetjs/status.svg)](https://david-dm.org/sheetjs/sheetjs) diff --git a/docbits/12_optional.md b/docbits/12_optional.md index 2b56fbe..c25d021 100644 --- a/docbits/12_optional.md +++ b/docbits/12_optional.md @@ -34,7 +34,7 @@ be configured to remove support with `resolve.alias`: ### ECMAScript 5 Compatibility Since the library uses functions like `Array#forEach`, older browsers require -[shims to provide missing functions](http://oss.sheetjs.com/js-xlsx/shim.js). +[shims to provide missing functions](http://oss.sheetjs.com/sheetjs/shim.js). To use the shim, add the shim before the script tag that loads `xlsx.js`: diff --git a/docbits/15_phil.md b/docbits/15_phil.md index 72558e2..a593cea 100644 --- a/docbits/15_phil.md +++ b/docbits/15_phil.md @@ -8,7 +8,7 @@ Third-party libraries either supported one format, or they involved a separate set of classes for each supported file type. Even though XLSB was introduced in Excel 2007, nothing outside of SheetJS or Excel supported the format. -To promote a format-agnostic view, js-xlsx starts from a pure-JS representation +To promote a format-agnostic view, SheetJS starts from a pure-JS representation that we call the ["Common Spreadsheet Format"](#common-spreadsheet-format). Emphasizing a uniform object representation enables new features like format conversion (reading an XLSX template and saving as XLS) and circumvents the mess diff --git a/docbits/20_import.md b/docbits/20_import.md index 08dc1f8..572b67e 100644 --- a/docbits/20_import.md +++ b/docbits/20_import.md @@ -75,7 +75,7 @@ var workbook = XLSX.read(htmlstr, {type:'string'}); Browser download file (ajax) (click to show) Note: for a more complete example that works in older browsers, check the demo -at . The [`xhr` demo](demos/xhr/) +at . The [`xhr` demo](demos/xhr/) includes more examples with `XMLHttpRequest` and `fetch`. ```js @@ -151,7 +151,7 @@ More specialized cases, including mobile app file processing, are covered in the ### Parsing Examples -- HTML5 File API / Base64 Text / Web Workers +- HTML5 File API / Base64 Text / Web Workers Note that older versions of IE do not support HTML5 File API, so the Base64 mode is used for testing. @@ -175,5 +175,5 @@ On Windows XP and up you can get the Base64 encoding using `certutil`: -- XMLHttpRequest +- XMLHttpRequest diff --git a/docbits/25_manip.md b/docbits/25_manip.md index 8d89a87..c2164d2 100644 --- a/docbits/25_manip.md +++ b/docbits/25_manip.md @@ -67,7 +67,7 @@ error if the workbook is empty. - read + modify + write files -- node +- node The node version installs a command line tool `xlsx` which can read spreadsheet files and output the contents in various formats. The source is available at diff --git a/docbits/50_csf.md b/docbits/50_csf.md index e6b4615..1a20e8b 100644 --- a/docbits/50_csf.md +++ b/docbits/50_csf.md @@ -1,6 +1,6 @@ ## Common Spreadsheet Format -js-xlsx conforms to the Common Spreadsheet Format (CSF): +SheetJS conforms to the Common Spreadsheet Format (CSF): ### General Structures diff --git a/docbits/90_test.md b/docbits/90_test.md index c7ef1ab..0720a82 100644 --- a/docbits/90_test.md +++ b/docbits/90_test.md @@ -71,14 +71,11 @@ $ open -a Chromium.app http://localhost:8000/stress.html - Safari 6+ (iOS and Desktop) - Edge 13+, FF 18+, and Opera 12+ -Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links: +Tests utilize the mocha testing framework. - - for XLSX module in nodejs - - for XLSX module in nodejs - - for XLS\* modules - for XLS\* modules using Sauce Labs -The Travis-CI test suite also includes tests for various time zones. To change +The test suite also includes tests for various time zones. To change the timezone locally, set the TZ environment variable: ```bash diff --git a/misc/docs/README.md b/misc/docs/README.md index 4f33ab9..b7a1799 100644 --- a/misc/docs/README.md +++ b/misc/docs/README.md @@ -1,4 +1,4 @@ -# [SheetJS js-xlsx](http://sheetjs.com) +# [SheetJS](http://sheetjs.com) Parser and writer for various spreadsheet formats. Pure-JS cleanroom implementation from official specifications, related documents, and test files. @@ -33,7 +33,6 @@ enhancements, additional features like styling, and dedicated support. [![Build Status](https://saucelabs.com/browser-matrix/sheetjs.svg)](https://saucelabs.com/u/sheetjs) -[![Build Status](https://travis-ci.org/SheetJS/sheetjs.svg?branch=master)](https://travis-ci.org/SheetJS/sheetjs) [![Build Status](https://semaphoreci.com/api/v1/sheetjs/sheetjs/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/sheetjs) [![Coverage Status](http://img.shields.io/coveralls/SheetJS/sheetjs/master.svg)](https://coveralls.io/r/SheetJS/sheetjs?branch=master) [![Dependencies Status](https://david-dm.org/sheetjs/sheetjs/status.svg)](https://david-dm.org/sheetjs/sheetjs) @@ -246,7 +245,7 @@ be configured to remove support with `resolve.alias`: ### ECMAScript 5 Compatibility Since the library uses functions like `Array#forEach`, older browsers require -[shims to provide missing functions](http://oss.sheetjs.com/js-xlsx/shim.js). +[shims to provide missing functions](http://oss.sheetjs.com/sheetjs/shim.js). To use the shim, add the shim before the script tag that loads `xlsx.js`: @@ -269,7 +268,7 @@ Third-party libraries either supported one format, or they involved a separate set of classes for each supported file type. Even though XLSB was introduced in Excel 2007, nothing outside of SheetJS or Excel supported the format. -To promote a format-agnostic view, js-xlsx starts from a pure-JS representation +To promote a format-agnostic view, SheetJS starts from a pure-JS representation that we call the ["Common Spreadsheet Format"](#common-spreadsheet-format). Emphasizing a uniform object representation enables new features like format conversion (reading an XLSX template and saving as XLS) and circumvents the mess @@ -363,7 +362,7 @@ var workbook = XLSX.read(htmlstr, {type:'string'}); Note: for a more complete example that works in older browsers, check the demo -at . The [`xhr` demo](demos/xhr/) +at . The [`xhr` demo](demos/xhr/) includes more examples with `XMLHttpRequest` and `fetch`. ```js @@ -432,7 +431,7 @@ More specialized cases, including mobile app file processing, are covered in the ### Parsing Examples -- HTML5 File API / Base64 Text / Web Workers +- HTML5 File API / Base64 Text / Web Workers Note that older versions of IE do not support HTML5 File API, so the Base64 mode is used for testing. @@ -453,7 +452,7 @@ On Windows XP and up you can get the Base64 encoding using `certutil`: (note: You have to open the file and remove the header and footer lines) -- XMLHttpRequest +- XMLHttpRequest ### Streaming Read @@ -573,7 +572,7 @@ error if the workbook is empty. - read + modify + write files -- node +- node The node version installs a command line tool `xlsx` which can read spreadsheet files and output the contents in various formats. The source is available at @@ -804,7 +803,7 @@ Utilities are available in the `XLSX.utils` object and are described in the ## Common Spreadsheet Format -js-xlsx conforms to the Common Spreadsheet Format (CSF): +SheetJS conforms to the Common Spreadsheet Format (CSF): ### General Structures @@ -1471,7 +1470,7 @@ The visibility setting is stored in the `Hidden` property of sheet props array. | 1 | Hidden | | 2 | Very Hidden | -With : +With : ```js > wb.Workbook.Sheets.map(function(x) { return [x.name, x.Hidden] }) @@ -2486,14 +2485,11 @@ $ open -a Chromium.app http://localhost:8000/stress.html - Safari 6+ (iOS and Desktop) - Edge 13+, FF 18+, and Opera 12+ -Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links: +Tests utilize the mocha testing framework. - - for XLSX module in nodejs - - for XLSX module in nodejs - - for XLS\* modules - for XLS\* modules using Sauce Labs -The Travis-CI test suite also includes tests for various time zones. To change +The test suite also includes tests for various time zones. To change the timezone locally, set the TZ environment variable: ```bash diff --git a/package.json b/package.json index 316ad1b..d0ef26b 100644 --- a/package.json +++ b/package.json @@ -34,7 +34,7 @@ "dependencies": { "adler-32": "~1.2.0", "cfb": "^1.1.4", - "codepage": "~1.14.0", + "codepage": "~1.15.0", "commander": "~2.17.1", "crc-32": "~1.2.0", "exit-on-epipe": "~1.0.1", diff --git a/xlsx.flow.js b/xlsx.flow.js index 60f4610..c02a0ca 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -7035,7 +7035,9 @@ var dbf_codepage_map = { /*::[*/0x4D/*::]*/: 936, /*::[*/0x4E/*::]*/: 949, /*::[*/0x4F/*::]*/: 950, /*::[*/0x50/*::]*/: 874, /*::[*/0x57/*::]*/: 1252, /*::[*/0x58/*::]*/: 1252, - /*::[*/0x59/*::]*/: 1252, + /*::[*/0x59/*::]*/: 1252, /*::[*/0x6C/*::]*/: 863, + /*::[*/0x86/*::]*/: 737, /*::[*/0x87/*::]*/: 852, + /*::[*/0x88/*::]*/: 857, /*::[*/0xCC/*::]*/: 1257, /*::[*/0xFF/*::]*/: 16969 }; @@ -7059,7 +7061,6 @@ var DBF_SUPPORTED_VERSIONS = [0x02, 0x03, 0x30, 0x31, 0x83, 0x8B, 0x8C, 0xF5]; /* TODO: find an actual specification */ function dbf_to_aoa(buf, opts)/*:AOA*/ { var out/*:AOA*/ = []; - /* TODO: browser based */ var d/*:Block*/ = (new_raw_buf(1)/*:any*/); switch(opts.type) { case 'base64': d = s2a(Base64.decode(buf)); break; @@ -7068,44 +7069,55 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { case 'array': d = buf; break; } prep_blob(d, 0); + /* header */ var ft = d.read_shift(1); - var memo = false; + var memo = !!(ft & 0x88); var vfp = false, l7 = false; switch(ft) { - case 0x02: case 0x03: break; - case 0x30: vfp = true; memo = true; break; - case 0x31: vfp = true; break; - case 0x83: memo = true; break; - case 0x8B: memo = true; break; - case 0x8C: memo = true; l7 = true; break; - case 0xF5: memo = true; break; + case 0x02: break; // dBASE II + case 0x03: break; // dBASE III + case 0x30: vfp = true; memo = true; break; // VFP + case 0x31: vfp = true; memo = true; break; // VFP with autoincrement + // 0x43 dBASE IV SQL table files + // 0x63 dBASE IV SQL system files + case 0x83: break; // dBASE III with memo + case 0x8B: break; // dBASE IV with memo + case 0x8C: l7 = true; break; // dBASE Level 7 with memo + // case 0xCB dBASE IV SQL table files with memo + case 0xF5: break; // FoxPro 2.x with memo + // case 0xFB FoxBASE default: throw new Error("DBF Unsupported Version: " + ft.toString(16)); } - var /*filedate = new Date(),*/ nrow = 0, fpos = 0; + + var nrow = 0, fpos = 0x0209; if(ft == 0x02) nrow = d.read_shift(2); - /*filedate = new Date(d.read_shift(1) + 1900, d.read_shift(1) - 1, d.read_shift(1));*/d.l += 3; - if(ft != 0x02) nrow = d.read_shift(4); if(nrow > 1048576) nrow = 1e6; - if(ft != 0x02) fpos = d.read_shift(2); - var rlen = d.read_shift(2); + d.l += 3; // dBASE II stores DDMMYY date, others use YYMMDD + if(ft != 0x02) nrow = d.read_shift(4); + if(nrow > 1048576) nrow = 1e6; - var /*flags = 0,*/ current_cp = 1252; - if(ft != 0x02) { - d.l+=16; - /*flags = */d.read_shift(1); - //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); + if(ft != 0x02) fpos = d.read_shift(2); // header length + var rlen = d.read_shift(2); // record length - /* codepage present in FoxPro */ - if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; - d.l+=1; + var /*flags = 0,*/ current_cp = opts.codepage || 1252; + if(ft != 0x02) { // 20 reserved bytes + d.l+=16; + /*flags = */d.read_shift(1); + //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); - d.l+=2; + /* codepage present in FoxPro and dBASE Level 7 */ + if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; + d.l+=1; + + d.l+=2; } - if(l7) d.l += 36; + if(l7) d.l += 36; // Level 7: 32 byte "Language driver name", 4 byte reserved + /*:: type DBFField = { name:string; len:number; type:string; } */ var fields/*:Array*/ = [], field/*:DBFField*/ = ({}/*:any*/); - var hend = fpos - 10 - (vfp ? 264 : 0), ww = l7 ? 32 : 11; - while(ft == 0x02 ? d.l < d.length && d[d.l] != 0x0d: d.l < hend) { + var hend = Math.min(d.length, (ft == 0x02 ? 0x209 : (fpos - 10 - (vfp ? 264 : 0)))); + var ww = l7 ? 32 : 11; + while(d.l < hend && d[d.l] != 0x0d) { field = ({}/*:any*/); field.name = cptable.utils.decode(current_cp, d.slice(d.l, d.l+ww)).replace(/[\u0000\r\n].*$/g,""); d.l += ww; @@ -7117,42 +7129,45 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { if(field.name.length) fields.push(field); if(ft != 0x02) d.l += l7 ? 13 : 14; switch(field.type) { - case 'B': // VFP Double + case 'B': // Double (VFP) / Binary (dBASE L7) if((!vfp || field.len != 8) && opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'G': // General - case 'P': // Picture + case 'G': // General (FoxPro and dBASE L7) + case 'P': // Picture (FoxPro and dBASE L7) if(opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'C': // character - case 'D': // date - case 'F': // floating point - case 'I': // long - case 'L': // boolean - case 'M': // memo - case 'N': // number - case 'O': // double - case 'T': // datetime - case 'Y': // currency - case '0': // VFP _NullFlags - case '@': // timestamp - case '+': // autoincrement + case '+': // Autoincrement (dBASE L7 only) + case '0': // _NullFlags (VFP only) + case '@': // Timestamp (dBASE L7 only) + case 'C': // Character (dBASE II) + case 'D': // Date (dBASE III) + case 'F': // Float (dBASE IV) + case 'I': // Long (VFP and dBASE L7) + case 'L': // Logical (dBASE II) + case 'M': // Memo (dBASE III) + case 'N': // Number (dBASE II) + case 'O': // Double (dBASE L7 only) + case 'T': // Datetime (VFP only) + case 'Y': // Currency (VFP only) break; default: throw new Error('Unknown Field Type: ' + field.type); } } + if(d[d.l] !== 0x0D) d.l = fpos-1; - else if(ft == 0x02) d.l = 0x209; - if(ft != 0x02) { - if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); - d.l = fpos; - } + if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); + d.l = fpos; + /* data */ var R = 0, C = 0; out[0] = []; for(C = 0; C != fields.length; ++C) out[0][C] = fields[C].name; while(nrow-- > 0) { - if(d[d.l] === 0x2A) { d.l+=rlen; continue; } + if(d[d.l] === 0x2A) { + // TODO: record marked as deleted -- create a hidden row? + d.l+=rlen; + continue; + } ++d.l; out[++R] = []; C = 0; for(C = 0; C != fields.length; ++C) { @@ -7161,8 +7176,8 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { var s = cptable.utils.decode(current_cp, dd); switch(fields[C].type) { case 'C': - out[R][C] = cptable.utils.decode(current_cp, dd); - out[R][C] = out[R][C].trim(); + // NOTE: it is conventional to write ' / / ' for empty dates + if(s.trim().length) out[R][C] = s.replace(/\s+$/,""); break; case 'D': if(s.length === 8) out[R][C] = new Date(+s.slice(0,4), +s.slice(4,6)-1, +s.slice(6,8)); @@ -7170,18 +7185,24 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { break; case 'F': out[R][C] = parseFloat(s.trim()); break; case '+': case 'I': out[R][C] = l7 ? dd.read_shift(-4, 'i') ^ 0x80000000 : dd.read_shift(4, 'i'); break; - case 'L': switch(s.toUpperCase()) { + case 'L': switch(s.trim().toUpperCase()) { case 'Y': case 'T': out[R][C] = true; break; case 'N': case 'F': out[R][C] = false; break; - case ' ': case '?': out[R][C] = false; break; /* NOTE: technically uninitialized */ + case '': case '?': break; default: throw new Error("DBF Unrecognized L:|" + s + "|"); } break; case 'M': /* TODO: handle memo files */ if(!memo) throw new Error("DBF Unexpected MEMO for type " + ft.toString(16)); out[R][C] = "##MEMO##" + (l7 ? parseInt(s.trim(), 10): dd.read_shift(4)); break; - case 'N': out[R][C] = +s.replace(/\u0000/g,"").trim(); break; - case '@': out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); break; + case 'N': + s = s.replace(/\u0000/g,"").trim(); + // NOTE: dBASE II interprets " . " as 0 + if(s && s != ".") out[R][C] = +s || 0; break; + case '@': + // NOTE: dBASE specs appear to be incorrect + out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); + break; case 'T': out[R][C] = new Date((dd.read_shift(4) - 0x253D8C) * 0x5265C00 + dd.read_shift(4)); break; case 'Y': out[R][C] = dd.read_shift(4,'i')/1e4; break; case 'O': out[R][C] = -dd.read_shift(-8, 'f'); break; @@ -7374,7 +7395,7 @@ var SYLK = (function() { formats.push(rstr.slice(3).replace(/;;/g, ";")); break; case 'C': - var C_seen_K = false, C_seen_X = false; + var C_seen_K = false, C_seen_X = false, C_seen_S = false, C_seen_E = false, _R = -1, _C = -1; for(rj=1; rj -1 && arr[_R][_C]; + if(!shrbase || !shrbase[1]) throw new Error("SYLK shared formula cannot find base"); + arr[R][C][1] = shift_formula_str(shrbase[1], {r: R - _R, c: C - _C}); + } break; case 'F': var F_seen = 0; @@ -7736,9 +7775,9 @@ var ETH = (function() { var PRN = (function() { function set_text_arr(data/*:string*/, arr/*:AOA*/, R/*:number*/, C/*:number*/, o/*:any*/) { if(o.raw) arr[R][C] = data; + else if(data === ""){/* empty */} else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(data === ""){/* empty */} else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; @@ -7897,7 +7936,7 @@ var PRN = (function() { default: throw new Error("Unrecognized type " + opts.type); } if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3)); - else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str)); + else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(28591,str)); if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts); return prn_to_sheet_str(str, opts); } @@ -21151,7 +21190,10 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0xD0: if(n[1] === 0xCF && n[2] === 0x11 && n[3] === 0xE0 && n[4] === 0xA1 && n[5] === 0xB1 && n[6] === 0x1A && n[7] === 0xE1) return read_cfb(CFB.read(d, o), o); break; case 0x09: if(n[1] <= 0x04) return parse_xlscfb(d, o); break; case 0x3C: return parse_xlml(d, o); - case 0x49: if(n[1] === 0x44) return read_wb_ID(d, o); break; + case 0x49: + if(n[1] === 0x49 && n[2] === 0x2a && n[3] === 0x00) throw new Error("TIFF Image File is not a spreadsheet"); + if(n[1] === 0x44) return read_wb_ID(d, o); + break; case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break; case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str); case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str); diff --git a/xlsx.js b/xlsx.js index 9595034..3bd2a27 100644 --- a/xlsx.js +++ b/xlsx.js @@ -6938,7 +6938,9 @@ var dbf_codepage_map = { 0x4D: 936, 0x4E: 949, 0x4F: 950, 0x50: 874, 0x57: 1252, 0x58: 1252, -0x59: 1252, +0x59: 1252, 0x6C: 863, +0x86: 737, 0x87: 852, +0x88: 857, 0xCC: 1257, 0xFF: 16969 }; @@ -6962,7 +6964,6 @@ var DBF_SUPPORTED_VERSIONS = [0x02, 0x03, 0x30, 0x31, 0x83, 0x8B, 0x8C, 0xF5]; /* TODO: find an actual specification */ function dbf_to_aoa(buf, opts) { var out = []; - /* TODO: browser based */ var d = (new_raw_buf(1)); switch(opts.type) { case 'base64': d = s2a(Base64.decode(buf)); break; @@ -6971,43 +6972,54 @@ function dbf_to_aoa(buf, opts) { case 'array': d = buf; break; } prep_blob(d, 0); + /* header */ var ft = d.read_shift(1); - var memo = false; + var memo = !!(ft & 0x88); var vfp = false, l7 = false; switch(ft) { - case 0x02: case 0x03: break; - case 0x30: vfp = true; memo = true; break; - case 0x31: vfp = true; break; - case 0x83: memo = true; break; - case 0x8B: memo = true; break; - case 0x8C: memo = true; l7 = true; break; - case 0xF5: memo = true; break; + case 0x02: break; // dBASE II + case 0x03: break; // dBASE III + case 0x30: vfp = true; memo = true; break; // VFP + case 0x31: vfp = true; memo = true; break; // VFP with autoincrement + // 0x43 dBASE IV SQL table files + // 0x63 dBASE IV SQL system files + case 0x83: break; // dBASE III with memo + case 0x8B: break; // dBASE IV with memo + case 0x8C: l7 = true; break; // dBASE Level 7 with memo + // case 0xCB dBASE IV SQL table files with memo + case 0xF5: break; // FoxPro 2.x with memo + // case 0xFB FoxBASE default: throw new Error("DBF Unsupported Version: " + ft.toString(16)); } - var /*filedate = new Date(),*/ nrow = 0, fpos = 0; + + var nrow = 0, fpos = 0x0209; if(ft == 0x02) nrow = d.read_shift(2); - /*filedate = new Date(d.read_shift(1) + 1900, d.read_shift(1) - 1, d.read_shift(1));*/d.l += 3; - if(ft != 0x02) nrow = d.read_shift(4); if(nrow > 1048576) nrow = 1e6; - if(ft != 0x02) fpos = d.read_shift(2); - var rlen = d.read_shift(2); + d.l += 3; // dBASE II stores DDMMYY date, others use YYMMDD + if(ft != 0x02) nrow = d.read_shift(4); + if(nrow > 1048576) nrow = 1e6; + + if(ft != 0x02) fpos = d.read_shift(2); // header length + var rlen = d.read_shift(2); // record length var /*flags = 0,*/ current_cp = opts.codepage || 1252; - if(ft != 0x02) { - d.l+=16; - /*flags = */d.read_shift(1); - //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); + if(ft != 0x02) { // 20 reserved bytes + d.l+=16; + /*flags = */d.read_shift(1); + //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); - /* codepage present in FoxPro */ - if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; - d.l+=1; + /* codepage present in FoxPro and dBASE Level 7 */ + if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; + d.l+=1; - d.l+=2; + d.l+=2; } - if(l7) d.l += 36; + if(l7) d.l += 36; // Level 7: 32 byte "Language driver name", 4 byte reserved + var fields = [], field = ({}); - var hend = fpos - 10 - (vfp ? 264 : 0), ww = l7 ? 32 : 11; - while(ft == 0x02 ? d.l < d.length && d[d.l] != 0x0d: d.l < hend) { + var hend = Math.min(d.length, (ft == 0x02 ? 0x209 : (fpos - 10 - (vfp ? 264 : 0)))); + var ww = l7 ? 32 : 11; + while(d.l < hend && d[d.l] != 0x0d) { field = ({}); field.name = cptable.utils.decode(current_cp, d.slice(d.l, d.l+ww)).replace(/[\u0000\r\n].*$/g,""); d.l += ww; @@ -7019,42 +7031,45 @@ var fields = [], field = ({}); if(field.name.length) fields.push(field); if(ft != 0x02) d.l += l7 ? 13 : 14; switch(field.type) { - case 'B': // VFP Double + case 'B': // Double (VFP) / Binary (dBASE L7) if((!vfp || field.len != 8) && opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'G': // General - case 'P': // Picture + case 'G': // General (FoxPro and dBASE L7) + case 'P': // Picture (FoxPro and dBASE L7) if(opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'C': // character - case 'D': // date - case 'F': // floating point - case 'I': // long - case 'L': // boolean - case 'M': // memo - case 'N': // number - case 'O': // double - case 'T': // datetime - case 'Y': // currency - case '0': // VFP _NullFlags - case '@': // timestamp - case '+': // autoincrement + case '+': // Autoincrement (dBASE L7 only) + case '0': // _NullFlags (VFP only) + case '@': // Timestamp (dBASE L7 only) + case 'C': // Character (dBASE II) + case 'D': // Date (dBASE III) + case 'F': // Float (dBASE IV) + case 'I': // Long (VFP and dBASE L7) + case 'L': // Logical (dBASE II) + case 'M': // Memo (dBASE III) + case 'N': // Number (dBASE II) + case 'O': // Double (dBASE L7 only) + case 'T': // Datetime (VFP only) + case 'Y': // Currency (VFP only) break; default: throw new Error('Unknown Field Type: ' + field.type); } } + if(d[d.l] !== 0x0D) d.l = fpos-1; - else if(ft == 0x02) d.l = 0x209; - if(ft != 0x02) { - if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); - d.l = fpos; - } + if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); + d.l = fpos; + /* data */ var R = 0, C = 0; out[0] = []; for(C = 0; C != fields.length; ++C) out[0][C] = fields[C].name; while(nrow-- > 0) { - if(d[d.l] === 0x2A) { d.l+=rlen; continue; } + if(d[d.l] === 0x2A) { + // TODO: record marked as deleted -- create a hidden row? + d.l+=rlen; + continue; + } ++d.l; out[++R] = []; C = 0; for(C = 0; C != fields.length; ++C) { @@ -7063,8 +7078,8 @@ var fields = [], field = ({}); var s = cptable.utils.decode(current_cp, dd); switch(fields[C].type) { case 'C': - out[R][C] = cptable.utils.decode(current_cp, dd); - out[R][C] = out[R][C].trim(); + // NOTE: it is conventional to write ' / / ' for empty dates + if(s.trim().length) out[R][C] = s.replace(/\s+$/,""); break; case 'D': if(s.length === 8) out[R][C] = new Date(+s.slice(0,4), +s.slice(4,6)-1, +s.slice(6,8)); @@ -7072,18 +7087,24 @@ var fields = [], field = ({}); break; case 'F': out[R][C] = parseFloat(s.trim()); break; case '+': case 'I': out[R][C] = l7 ? dd.read_shift(-4, 'i') ^ 0x80000000 : dd.read_shift(4, 'i'); break; - case 'L': switch(s.toUpperCase()) { + case 'L': switch(s.trim().toUpperCase()) { case 'Y': case 'T': out[R][C] = true; break; case 'N': case 'F': out[R][C] = false; break; - case ' ': case '?': out[R][C] = false; break; /* NOTE: technically uninitialized */ + case '': case '?': break; default: throw new Error("DBF Unrecognized L:|" + s + "|"); } break; case 'M': /* TODO: handle memo files */ if(!memo) throw new Error("DBF Unexpected MEMO for type " + ft.toString(16)); out[R][C] = "##MEMO##" + (l7 ? parseInt(s.trim(), 10): dd.read_shift(4)); break; - case 'N': out[R][C] = +s.replace(/\u0000/g,"").trim(); break; - case '@': out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); break; + case 'N': + s = s.replace(/\u0000/g,"").trim(); + // NOTE: dBASE II interprets " . " as 0 + if(s && s != ".") out[R][C] = +s || 0; break; + case '@': + // NOTE: dBASE specs appear to be incorrect + out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); + break; case 'T': out[R][C] = new Date((dd.read_shift(4) - 0x253D8C) * 0x5265C00 + dd.read_shift(4)); break; case 'Y': out[R][C] = dd.read_shift(4,'i')/1e4; break; case 'O': out[R][C] = -dd.read_shift(-8, 'f'); break; @@ -7276,7 +7297,7 @@ var SYLK = (function() { formats.push(rstr.slice(3).replace(/;;/g, ";")); break; case 'C': - var C_seen_K = false, C_seen_X = false; + var C_seen_K = false, C_seen_X = false, C_seen_S = false, C_seen_E = false, _R = -1, _C = -1; for(rj=1; rj -1 && arr[_R][_C]; + if(!shrbase || !shrbase[1]) throw new Error("SYLK shared formula cannot find base"); + arr[R][C][1] = shift_formula_str(shrbase[1], {r: R - _R, c: C - _C}); + } break; case 'F': var F_seen = 0; @@ -7638,9 +7677,9 @@ var ETH = (function() { var PRN = (function() { function set_text_arr(data, arr, R, C, o) { if(o.raw) arr[R][C] = data; + else if(data === ""){/* empty */} else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(data === ""){/* empty */} else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; @@ -7799,7 +7838,7 @@ var PRN = (function() { default: throw new Error("Unrecognized type " + opts.type); } if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3)); - else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str)); + else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(28591,str)); if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts); return prn_to_sheet_str(str, opts); } @@ -21027,7 +21066,10 @@ function readSync(data, opts) { case 0xD0: if(n[1] === 0xCF && n[2] === 0x11 && n[3] === 0xE0 && n[4] === 0xA1 && n[5] === 0xB1 && n[6] === 0x1A && n[7] === 0xE1) return read_cfb(CFB.read(d, o), o); break; case 0x09: if(n[1] <= 0x04) return parse_xlscfb(d, o); break; case 0x3C: return parse_xlml(d, o); - case 0x49: if(n[1] === 0x44) return read_wb_ID(d, o); break; + case 0x49: + if(n[1] === 0x49 && n[2] === 0x2a && n[3] === 0x00) throw new Error("TIFF Image File is not a spreadsheet"); + if(n[1] === 0x44) return read_wb_ID(d, o); + break; case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break; case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str); case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str); diff --git a/xlsx.mini.flow.js b/xlsx.mini.flow.js index 571e3c7..680833f 100644 --- a/xlsx.mini.flow.js +++ b/xlsx.mini.flow.js @@ -4809,7 +4809,9 @@ var dbf_codepage_map = { /*::[*/0x4D/*::]*/: 936, /*::[*/0x4E/*::]*/: 949, /*::[*/0x4F/*::]*/: 950, /*::[*/0x50/*::]*/: 874, /*::[*/0x57/*::]*/: 1252, /*::[*/0x58/*::]*/: 1252, - /*::[*/0x59/*::]*/: 1252, + /*::[*/0x59/*::]*/: 1252, /*::[*/0x6C/*::]*/: 863, + /*::[*/0x86/*::]*/: 737, /*::[*/0x87/*::]*/: 852, + /*::[*/0x88/*::]*/: 857, /*::[*/0xCC/*::]*/: 1257, /*::[*/0xFF/*::]*/: 16969 }; @@ -4833,7 +4835,6 @@ var DBF_SUPPORTED_VERSIONS = [0x02, 0x03, 0x30, 0x31, 0x83, 0x8B, 0x8C, 0xF5]; /* TODO: find an actual specification */ function dbf_to_aoa(buf, opts)/*:AOA*/ { var out/*:AOA*/ = []; - /* TODO: browser based */ var d/*:Block*/ = (new_raw_buf(1)/*:any*/); switch(opts.type) { case 'base64': d = s2a(Base64.decode(buf)); break; @@ -4842,44 +4843,55 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { case 'array': d = buf; break; } prep_blob(d, 0); + /* header */ var ft = d.read_shift(1); - var memo = false; + var memo = !!(ft & 0x88); var vfp = false, l7 = false; switch(ft) { - case 0x02: case 0x03: break; - case 0x30: vfp = true; memo = true; break; - case 0x31: vfp = true; break; - case 0x83: memo = true; break; - case 0x8B: memo = true; break; - case 0x8C: memo = true; l7 = true; break; - case 0xF5: memo = true; break; + case 0x02: break; // dBASE II + case 0x03: break; // dBASE III + case 0x30: vfp = true; memo = true; break; // VFP + case 0x31: vfp = true; memo = true; break; // VFP with autoincrement + // 0x43 dBASE IV SQL table files + // 0x63 dBASE IV SQL system files + case 0x83: break; // dBASE III with memo + case 0x8B: break; // dBASE IV with memo + case 0x8C: l7 = true; break; // dBASE Level 7 with memo + // case 0xCB dBASE IV SQL table files with memo + case 0xF5: break; // FoxPro 2.x with memo + // case 0xFB FoxBASE default: throw new Error("DBF Unsupported Version: " + ft.toString(16)); } - var /*filedate = new Date(),*/ nrow = 0, fpos = 0; + + var nrow = 0, fpos = 0x0209; if(ft == 0x02) nrow = d.read_shift(2); - /*filedate = new Date(d.read_shift(1) + 1900, d.read_shift(1) - 1, d.read_shift(1));*/d.l += 3; - if(ft != 0x02) nrow = d.read_shift(4); if(nrow > 1048576) nrow = 1e6; - if(ft != 0x02) fpos = d.read_shift(2); - var rlen = d.read_shift(2); + d.l += 3; // dBASE II stores DDMMYY date, others use YYMMDD + if(ft != 0x02) nrow = d.read_shift(4); + if(nrow > 1048576) nrow = 1e6; - var /*flags = 0,*/ current_cp = 1252; - if(ft != 0x02) { - d.l+=16; - /*flags = */d.read_shift(1); - //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); + if(ft != 0x02) fpos = d.read_shift(2); // header length + var rlen = d.read_shift(2); // record length - /* codepage present in FoxPro */ - if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; - d.l+=1; + var /*flags = 0,*/ current_cp = opts.codepage || 1252; + if(ft != 0x02) { // 20 reserved bytes + d.l+=16; + /*flags = */d.read_shift(1); + //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); - d.l+=2; + /* codepage present in FoxPro and dBASE Level 7 */ + if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; + d.l+=1; + + d.l+=2; } - if(l7) d.l += 36; + if(l7) d.l += 36; // Level 7: 32 byte "Language driver name", 4 byte reserved + /*:: type DBFField = { name:string; len:number; type:string; } */ var fields/*:Array*/ = [], field/*:DBFField*/ = ({}/*:any*/); - var hend = fpos - 10 - (vfp ? 264 : 0), ww = l7 ? 32 : 11; - while(ft == 0x02 ? d.l < d.length && d[d.l] != 0x0d: d.l < hend) { + var hend = Math.min(d.length, (ft == 0x02 ? 0x209 : (fpos - 10 - (vfp ? 264 : 0)))); + var ww = l7 ? 32 : 11; + while(d.l < hend && d[d.l] != 0x0d) { field = ({}/*:any*/); field.name = cptable.utils.decode(current_cp, d.slice(d.l, d.l+ww)).replace(/[\u0000\r\n].*$/g,""); d.l += ww; @@ -4891,42 +4903,45 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { if(field.name.length) fields.push(field); if(ft != 0x02) d.l += l7 ? 13 : 14; switch(field.type) { - case 'B': // VFP Double + case 'B': // Double (VFP) / Binary (dBASE L7) if((!vfp || field.len != 8) && opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'G': // General - case 'P': // Picture + case 'G': // General (FoxPro and dBASE L7) + case 'P': // Picture (FoxPro and dBASE L7) if(opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'C': // character - case 'D': // date - case 'F': // floating point - case 'I': // long - case 'L': // boolean - case 'M': // memo - case 'N': // number - case 'O': // double - case 'T': // datetime - case 'Y': // currency - case '0': // VFP _NullFlags - case '@': // timestamp - case '+': // autoincrement + case '+': // Autoincrement (dBASE L7 only) + case '0': // _NullFlags (VFP only) + case '@': // Timestamp (dBASE L7 only) + case 'C': // Character (dBASE II) + case 'D': // Date (dBASE III) + case 'F': // Float (dBASE IV) + case 'I': // Long (VFP and dBASE L7) + case 'L': // Logical (dBASE II) + case 'M': // Memo (dBASE III) + case 'N': // Number (dBASE II) + case 'O': // Double (dBASE L7 only) + case 'T': // Datetime (VFP only) + case 'Y': // Currency (VFP only) break; default: throw new Error('Unknown Field Type: ' + field.type); } } + if(d[d.l] !== 0x0D) d.l = fpos-1; - else if(ft == 0x02) d.l = 0x209; - if(ft != 0x02) { - if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); - d.l = fpos; - } + if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); + d.l = fpos; + /* data */ var R = 0, C = 0; out[0] = []; for(C = 0; C != fields.length; ++C) out[0][C] = fields[C].name; while(nrow-- > 0) { - if(d[d.l] === 0x2A) { d.l+=rlen; continue; } + if(d[d.l] === 0x2A) { + // TODO: record marked as deleted -- create a hidden row? + d.l+=rlen; + continue; + } ++d.l; out[++R] = []; C = 0; for(C = 0; C != fields.length; ++C) { @@ -4935,8 +4950,8 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { var s = cptable.utils.decode(current_cp, dd); switch(fields[C].type) { case 'C': - out[R][C] = cptable.utils.decode(current_cp, dd); - out[R][C] = out[R][C].trim(); + // NOTE: it is conventional to write ' / / ' for empty dates + if(s.trim().length) out[R][C] = s.replace(/\s+$/,""); break; case 'D': if(s.length === 8) out[R][C] = new Date(+s.slice(0,4), +s.slice(4,6)-1, +s.slice(6,8)); @@ -4944,18 +4959,24 @@ function dbf_to_aoa(buf, opts)/*:AOA*/ { break; case 'F': out[R][C] = parseFloat(s.trim()); break; case '+': case 'I': out[R][C] = l7 ? dd.read_shift(-4, 'i') ^ 0x80000000 : dd.read_shift(4, 'i'); break; - case 'L': switch(s.toUpperCase()) { + case 'L': switch(s.trim().toUpperCase()) { case 'Y': case 'T': out[R][C] = true; break; case 'N': case 'F': out[R][C] = false; break; - case ' ': case '?': out[R][C] = false; break; /* NOTE: technically uninitialized */ + case '': case '?': break; default: throw new Error("DBF Unrecognized L:|" + s + "|"); } break; case 'M': /* TODO: handle memo files */ if(!memo) throw new Error("DBF Unexpected MEMO for type " + ft.toString(16)); out[R][C] = "##MEMO##" + (l7 ? parseInt(s.trim(), 10): dd.read_shift(4)); break; - case 'N': out[R][C] = +s.replace(/\u0000/g,"").trim(); break; - case '@': out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); break; + case 'N': + s = s.replace(/\u0000/g,"").trim(); + // NOTE: dBASE II interprets " . " as 0 + if(s && s != ".") out[R][C] = +s || 0; break; + case '@': + // NOTE: dBASE specs appear to be incorrect + out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); + break; case 'T': out[R][C] = new Date((dd.read_shift(4) - 0x253D8C) * 0x5265C00 + dd.read_shift(4)); break; case 'Y': out[R][C] = dd.read_shift(4,'i')/1e4; break; case 'O': out[R][C] = -dd.read_shift(-8, 'f'); break; @@ -5148,7 +5169,7 @@ var SYLK = (function() { formats.push(rstr.slice(3).replace(/;;/g, ";")); break; case 'C': - var C_seen_K = false, C_seen_X = false; + var C_seen_K = false, C_seen_X = false, C_seen_S = false, C_seen_E = false, _R = -1, _C = -1; for(rj=1; rj -1 && arr[_R][_C]; + if(!shrbase || !shrbase[1]) throw new Error("SYLK shared formula cannot find base"); + arr[R][C][1] = shift_formula_str(shrbase[1], {r: R - _R, c: C - _C}); + } break; case 'F': var F_seen = 0; @@ -5510,9 +5549,9 @@ var ETH = (function() { var PRN = (function() { function set_text_arr(data/*:string*/, arr/*:AOA*/, R/*:number*/, C/*:number*/, o/*:any*/) { if(o.raw) arr[R][C] = data; + else if(data === ""){/* empty */} else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(data === ""){/* empty */} else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; @@ -5671,7 +5710,7 @@ var PRN = (function() { default: throw new Error("Unrecognized type " + opts.type); } if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3)); - else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str)); + else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(28591,str)); if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts); return prn_to_sheet_str(str, opts); } @@ -10040,7 +10079,10 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { case 0xD0: if(n[1] === 0xCF && n[2] === 0x11 && n[3] === 0xE0 && n[4] === 0xA1 && n[5] === 0xB1 && n[6] === 0x1A && n[7] === 0xE1) return read_cfb(CFB.read(d, o), o); break; case 0x09: if(n[1] <= 0x04) return parse_xlscfb(d, o); break; case 0x3C: return parse_xlml(d, o); - case 0x49: if(n[1] === 0x44) return read_wb_ID(d, o); break; + case 0x49: + if(n[1] === 0x49 && n[2] === 0x2a && n[3] === 0x00) throw new Error("TIFF Image File is not a spreadsheet"); + if(n[1] === 0x44) return read_wb_ID(d, o); + break; case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break; case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str); case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str); diff --git a/xlsx.mini.js b/xlsx.mini.js index 149ec0b..d829c77 100644 --- a/xlsx.mini.js +++ b/xlsx.mini.js @@ -4719,7 +4719,9 @@ var dbf_codepage_map = { 0x4D: 936, 0x4E: 949, 0x4F: 950, 0x50: 874, 0x57: 1252, 0x58: 1252, -0x59: 1252, +0x59: 1252, 0x6C: 863, +0x86: 737, 0x87: 852, +0x88: 857, 0xCC: 1257, 0xFF: 16969 }; @@ -4743,7 +4745,6 @@ var DBF_SUPPORTED_VERSIONS = [0x02, 0x03, 0x30, 0x31, 0x83, 0x8B, 0x8C, 0xF5]; /* TODO: find an actual specification */ function dbf_to_aoa(buf, opts) { var out = []; - /* TODO: browser based */ var d = (new_raw_buf(1)); switch(opts.type) { case 'base64': d = s2a(Base64.decode(buf)); break; @@ -4752,43 +4753,54 @@ function dbf_to_aoa(buf, opts) { case 'array': d = buf; break; } prep_blob(d, 0); + /* header */ var ft = d.read_shift(1); - var memo = false; + var memo = !!(ft & 0x88); var vfp = false, l7 = false; switch(ft) { - case 0x02: case 0x03: break; - case 0x30: vfp = true; memo = true; break; - case 0x31: vfp = true; break; - case 0x83: memo = true; break; - case 0x8B: memo = true; break; - case 0x8C: memo = true; l7 = true; break; - case 0xF5: memo = true; break; + case 0x02: break; // dBASE II + case 0x03: break; // dBASE III + case 0x30: vfp = true; memo = true; break; // VFP + case 0x31: vfp = true; memo = true; break; // VFP with autoincrement + // 0x43 dBASE IV SQL table files + // 0x63 dBASE IV SQL system files + case 0x83: break; // dBASE III with memo + case 0x8B: break; // dBASE IV with memo + case 0x8C: l7 = true; break; // dBASE Level 7 with memo + // case 0xCB dBASE IV SQL table files with memo + case 0xF5: break; // FoxPro 2.x with memo + // case 0xFB FoxBASE default: throw new Error("DBF Unsupported Version: " + ft.toString(16)); } - var /*filedate = new Date(),*/ nrow = 0, fpos = 0; + + var nrow = 0, fpos = 0x0209; if(ft == 0x02) nrow = d.read_shift(2); - /*filedate = new Date(d.read_shift(1) + 1900, d.read_shift(1) - 1, d.read_shift(1));*/d.l += 3; - if(ft != 0x02) nrow = d.read_shift(4); if(nrow > 1048576) nrow = 1e6; - if(ft != 0x02) fpos = d.read_shift(2); - var rlen = d.read_shift(2); + d.l += 3; // dBASE II stores DDMMYY date, others use YYMMDD + if(ft != 0x02) nrow = d.read_shift(4); + if(nrow > 1048576) nrow = 1e6; - var /*flags = 0,*/ current_cp = 1252; - if(ft != 0x02) { - d.l+=16; - /*flags = */d.read_shift(1); - //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); + if(ft != 0x02) fpos = d.read_shift(2); // header length + var rlen = d.read_shift(2); // record length - /* codepage present in FoxPro */ - if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; - d.l+=1; + var /*flags = 0,*/ current_cp = opts.codepage || 1252; + if(ft != 0x02) { // 20 reserved bytes + d.l+=16; + /*flags = */d.read_shift(1); + //if(memo && ((flags & 0x02) === 0)) throw new Error("DBF Flags " + flags.toString(16) + " ft " + ft.toString(16)); - d.l+=2; + /* codepage present in FoxPro and dBASE Level 7 */ + if(d[d.l] !== 0) current_cp = dbf_codepage_map[d[d.l]]; + d.l+=1; + + d.l+=2; } - if(l7) d.l += 36; + if(l7) d.l += 36; // Level 7: 32 byte "Language driver name", 4 byte reserved + var fields = [], field = ({}); - var hend = fpos - 10 - (vfp ? 264 : 0), ww = l7 ? 32 : 11; - while(ft == 0x02 ? d.l < d.length && d[d.l] != 0x0d: d.l < hend) { + var hend = Math.min(d.length, (ft == 0x02 ? 0x209 : (fpos - 10 - (vfp ? 264 : 0)))); + var ww = l7 ? 32 : 11; + while(d.l < hend && d[d.l] != 0x0d) { field = ({}); field.name = cptable.utils.decode(current_cp, d.slice(d.l, d.l+ww)).replace(/[\u0000\r\n].*$/g,""); d.l += ww; @@ -4800,42 +4812,45 @@ var fields = [], field = ({}); if(field.name.length) fields.push(field); if(ft != 0x02) d.l += l7 ? 13 : 14; switch(field.type) { - case 'B': // VFP Double + case 'B': // Double (VFP) / Binary (dBASE L7) if((!vfp || field.len != 8) && opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'G': // General - case 'P': // Picture + case 'G': // General (FoxPro and dBASE L7) + case 'P': // Picture (FoxPro and dBASE L7) if(opts.WTF) console.log('Skipping ' + field.name + ':' + field.type); break; - case 'C': // character - case 'D': // date - case 'F': // floating point - case 'I': // long - case 'L': // boolean - case 'M': // memo - case 'N': // number - case 'O': // double - case 'T': // datetime - case 'Y': // currency - case '0': // VFP _NullFlags - case '@': // timestamp - case '+': // autoincrement + case '+': // Autoincrement (dBASE L7 only) + case '0': // _NullFlags (VFP only) + case '@': // Timestamp (dBASE L7 only) + case 'C': // Character (dBASE II) + case 'D': // Date (dBASE III) + case 'F': // Float (dBASE IV) + case 'I': // Long (VFP and dBASE L7) + case 'L': // Logical (dBASE II) + case 'M': // Memo (dBASE III) + case 'N': // Number (dBASE II) + case 'O': // Double (dBASE L7 only) + case 'T': // Datetime (VFP only) + case 'Y': // Currency (VFP only) break; default: throw new Error('Unknown Field Type: ' + field.type); } } + if(d[d.l] !== 0x0D) d.l = fpos-1; - else if(ft == 0x02) d.l = 0x209; - if(ft != 0x02) { - if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); - d.l = fpos; - } + if(d.read_shift(1) !== 0x0D) throw new Error("DBF Terminator not found " + d.l + " " + d[d.l]); + d.l = fpos; + /* data */ var R = 0, C = 0; out[0] = []; for(C = 0; C != fields.length; ++C) out[0][C] = fields[C].name; while(nrow-- > 0) { - if(d[d.l] === 0x2A) { d.l+=rlen; continue; } + if(d[d.l] === 0x2A) { + // TODO: record marked as deleted -- create a hidden row? + d.l+=rlen; + continue; + } ++d.l; out[++R] = []; C = 0; for(C = 0; C != fields.length; ++C) { @@ -4844,8 +4859,8 @@ var fields = [], field = ({}); var s = cptable.utils.decode(current_cp, dd); switch(fields[C].type) { case 'C': - out[R][C] = cptable.utils.decode(current_cp, dd); - out[R][C] = out[R][C].trim(); + // NOTE: it is conventional to write ' / / ' for empty dates + if(s.trim().length) out[R][C] = s.replace(/\s+$/,""); break; case 'D': if(s.length === 8) out[R][C] = new Date(+s.slice(0,4), +s.slice(4,6)-1, +s.slice(6,8)); @@ -4853,18 +4868,24 @@ var fields = [], field = ({}); break; case 'F': out[R][C] = parseFloat(s.trim()); break; case '+': case 'I': out[R][C] = l7 ? dd.read_shift(-4, 'i') ^ 0x80000000 : dd.read_shift(4, 'i'); break; - case 'L': switch(s.toUpperCase()) { + case 'L': switch(s.trim().toUpperCase()) { case 'Y': case 'T': out[R][C] = true; break; case 'N': case 'F': out[R][C] = false; break; - case ' ': case '?': out[R][C] = false; break; /* NOTE: technically uninitialized */ + case '': case '?': break; default: throw new Error("DBF Unrecognized L:|" + s + "|"); } break; case 'M': /* TODO: handle memo files */ if(!memo) throw new Error("DBF Unexpected MEMO for type " + ft.toString(16)); out[R][C] = "##MEMO##" + (l7 ? parseInt(s.trim(), 10): dd.read_shift(4)); break; - case 'N': out[R][C] = +s.replace(/\u0000/g,"").trim(); break; - case '@': out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); break; + case 'N': + s = s.replace(/\u0000/g,"").trim(); + // NOTE: dBASE II interprets " . " as 0 + if(s && s != ".") out[R][C] = +s || 0; break; + case '@': + // NOTE: dBASE specs appear to be incorrect + out[R][C] = new Date(dd.read_shift(-8, 'f') - 0x388317533400); + break; case 'T': out[R][C] = new Date((dd.read_shift(4) - 0x253D8C) * 0x5265C00 + dd.read_shift(4)); break; case 'Y': out[R][C] = dd.read_shift(4,'i')/1e4; break; case 'O': out[R][C] = -dd.read_shift(-8, 'f'); break; @@ -5057,7 +5078,7 @@ var SYLK = (function() { formats.push(rstr.slice(3).replace(/;;/g, ";")); break; case 'C': - var C_seen_K = false, C_seen_X = false; + var C_seen_K = false, C_seen_X = false, C_seen_S = false, C_seen_E = false, _R = -1, _C = -1; for(rj=1; rj -1 && arr[_R][_C]; + if(!shrbase || !shrbase[1]) throw new Error("SYLK shared formula cannot find base"); + arr[R][C][1] = shift_formula_str(shrbase[1], {r: R - _R, c: C - _C}); + } break; case 'F': var F_seen = 0; @@ -5419,9 +5458,9 @@ var ETH = (function() { var PRN = (function() { function set_text_arr(data, arr, R, C, o) { if(o.raw) arr[R][C] = data; + else if(data === ""){/* empty */} else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(data === ""){/* empty */} else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; @@ -5580,7 +5619,7 @@ var PRN = (function() { default: throw new Error("Unrecognized type " + opts.type); } if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3)); - else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str)); + else if((opts.type == 'binary') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(28591,str)); if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts); return prn_to_sheet_str(str, opts); } @@ -9941,7 +9980,10 @@ function readSync(data, opts) { case 0xD0: if(n[1] === 0xCF && n[2] === 0x11 && n[3] === 0xE0 && n[4] === 0xA1 && n[5] === 0xB1 && n[6] === 0x1A && n[7] === 0xE1) return read_cfb(CFB.read(d, o), o); break; case 0x09: if(n[1] <= 0x04) return parse_xlscfb(d, o); break; case 0x3C: return parse_xlml(d, o); - case 0x49: if(n[1] === 0x44) return read_wb_ID(d, o); break; + case 0x49: + if(n[1] === 0x49 && n[2] === 0x2a && n[3] === 0x00) throw new Error("TIFF Image File is not a spreadsheet"); + if(n[1] === 0x44) return read_wb_ID(d, o); + break; case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break; case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str); case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str);