From 9ba09bab5eb56391a3dfaba460e2047766de1d6f Mon Sep 17 00:00:00 2001 From: SheetJS Date: Wed, 9 Aug 2017 18:38:23 -0400 Subject: [PATCH] improved plaintext parsing - fuzzy number check disregards commas (fixes #709 h/t @mz121star) - updated to CFB 0.12.1 - parse_dom_table and html_to_sheet raw option - canvas-datagrid demo - docs and TS definition update --- README.md | 11 +- bits/18_cfb.js | 12 +- bits/20_jsutils.js | 7 ++ bits/40_harb.js | 10 +- bits/76_xls.js | 23 ++-- bits/79_html.js | 9 +- bits/85_parsezip.js | 10 +- bits/87_read.js | 4 +- bits/90_utils.js | 12 +- demos/datagrid/README.md | 67 +++++++++++ demos/datagrid/index.html | 205 ++++++++++++++++++++++++++++++++ demos/datagrid/xlsx.full.min.js | 1 + docbits/00_intro.md | 1 - docbits/10_install.md | 1 + docbits/82_util.md | 9 ++ misc/docs/README.md | 11 +- misc/flowdeps.js | 1 + package.json | 2 +- test.js | 26 ++++ types/doc.ts | 111 +++++++++++++++++ types/index.d.ts | 41 +++---- xlsx.flow.js | 87 ++++++++------ xlsx.js | 81 ++++++++----- 23 files changed, 609 insertions(+), 133 deletions(-) create mode 100644 demos/datagrid/README.md create mode 100644 demos/datagrid/index.html create mode 120000 demos/datagrid/xlsx.full.min.js create mode 100644 types/doc.ts diff --git a/README.md b/README.md index 2cf9eb6..f02111e 100644 --- a/README.md +++ b/README.md @@ -41,7 +41,6 @@ enhancements, additional features by request, and dedicated support. [![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx) [![Build Status](https://semaphoreci.com/api/v1/sheetjs/js-xlsx/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/js-xlsx) [![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-xlsx/master.svg)](https://coveralls.io/r/SheetJS/js-xlsx?branch=master) -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx?ref=badge_shield) [![Dependencies Status](https://david-dm.org/sheetjs/js-xlsx/status.svg)](https://david-dm.org/sheetjs/js-xlsx) [![NPM Downloads](https://img.shields.io/npm/dt/xlsx.svg)](https://npmjs.org/package/xlsx) [![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-xlsx)](https://ghit.me/repo/sheetjs/js-xlsx) @@ -171,6 +170,7 @@ The `demos` directory includes sample projects for: - [`angular 1.x`](demos/angular/) - [`angular 2.x / 4.x`](demos/angular2/) - [`browserify`](demos/browserify/) +- [`canvas-datagrid`](demos/datagrid/) - [`Adobe ExtendScript`](demos/extendscript/) - [`meteor`](demos/meteor/) - [`phantomjs`](demos/phantomjs/) @@ -1639,6 +1639,15 @@ as strings. `XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet. +Both functions accept options arguments: + +| Option Name | Default | Description | +| :---------- | :------: | :-------------------------------------------------- | +| dateNF | fmt 14 | Use specified date format in string output | +| cellDates | false | Store dates as type `d` (default is `n`) | +| raw | | If true, every cell will hold raw strings | + +
Examples (click to show) diff --git a/bits/18_cfb.js b/bits/18_cfb.js index 22ba520..6eb3fe1 100644 --- a/bits/18_cfb.js +++ b/bits/18_cfb.js @@ -35,7 +35,7 @@ type CFBFiles = {[n:string]:CFBEntry}; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/; -exports.version = '0.12.0'; +exports.version = '0.12.1'; function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { var mver = 3; // major version var ssz = 512; // sector size @@ -206,7 +206,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -307,7 +307,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/; prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o/*:CFBEntry*/ = ({ @@ -328,6 +327,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -340,7 +340,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/); prep_blob(o.content, 0); } @@ -369,6 +369,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options); } +function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { + return cfb.find(path); +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -394,6 +397,7 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = { diff --git a/bits/20_jsutils.js b/bits/20_jsutils.js index aa406d9..ec03ac4 100644 --- a/bits/20_jsutils.js +++ b/bits/20_jsutils.js @@ -114,6 +114,13 @@ function dup(o/*:any*/)/*:any*/ { function fill(c/*:string*/,l/*:number*/)/*:string*/ { var o = ""; while(o.length < l) o+=c; return o; } /* TODO: stress test */ +function fuzzynum(s/*:string*/)/*:number*/ { + var v/*:number*/ = Number(s); + if(!isNaN(v)) return v; + var ss = s.replace(/([\d]),([\d])/g,"$1$2").replace(/[$]/g,""); + if(!isNaN(v = Number(ss))) return v; + return v; +} function fuzzydate(s/*:string*/)/*:Date*/ { var o = new Date(s), n = new Date(NaN); var y = o.getYear(), m = o.getMonth(), d = o.getDate(); diff --git a/bits/40_harb.js b/bits/40_harb.js index 1aeb816..01e83aa 100644 --- a/bits/40_harb.js +++ b/bits/40_harb.js @@ -234,8 +234,8 @@ var SYLK = (function() { if(val.charAt(0) === '"') val = val.substr(1,val.length - 2); else if(val === 'TRUE') val = true; else if(val === 'FALSE') val = false; - else if(+val === +val) { - val = +val; + else if(!isNaN(fuzzynum(val))) { + val = fuzzynum(val); if(next_cell_format !== null && SSF.is_date(next_cell_format)) val = numdate(val); } else if(!isNaN(fuzzydate(val).getDate())) { val = parseDate(val); @@ -396,7 +396,7 @@ var DIF = (function() { case 0: if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(+value == +value) arr[R][C] = +value; + else if(!isNaN(fuzzynum(value))) arr[R][C] = fuzzynum(value); else if(!isNaN(fuzzydate(value).getDate())) arr[R][C] = parseDate(value); else arr[R][C] = value; ++C; break; @@ -482,7 +482,7 @@ var PRN = (function() { else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; else if(data === ""){/* empty */} - else if(+data == +data) arr[R][C] = +data; + else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; } @@ -533,7 +533,7 @@ var PRN = (function() { else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); } else if(s == "TRUE") { cell.t = 'b'; cell.v = true; } else if(s == "FALSE") { cell.t = 'b'; cell.v = false; } - else if(!isNaN(v = +s)) { cell.t = 'n'; cell.w = s; cell.v = v; } + else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; } else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) { cell.z = o.dateNF || SSF._table[14]; var k = 0; diff --git a/bits/76_xls.js b/bits/76_xls.js index c2592e0..60f26ed 100644 --- a/bits/76_xls.js +++ b/bits/76_xls.js @@ -811,11 +811,11 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { /* TODO: WTF */ function parse_props(cfb) { /* [MS-OSHARED] 2.3.3.2.2 Document Summary Information Property Set */ - var DSI = cfb.find('!DocumentSummaryInformation'); + var DSI = CFB.find(cfb, '!DocumentSummaryInformation'); if(DSI) try { cfb.DocSummary = parse_PropertySetStream(DSI, DocSummaryPIDDSI); } catch(e) {/* empty */} /* [MS-OSHARED] 2.3.3.2.1 Summary Information Property Set*/ - var SI = cfb.find('!SummaryInformation'); + var SI = CFB.find(cfb, '!SummaryInformation'); if(SI) try { cfb.Summary = parse_PropertySetStream(SI, SummaryPIDSI); } catch(e) {/* empty */} } @@ -823,27 +823,28 @@ function parse_xlscfb(cfb/*:any*/, options/*:?ParseOpts*/)/*:Workbook*/ { if(!options) options = {}; fix_read_opts(options); reset_cp(); -var CompObj, Summary, Workbook/*:?any*/; +var CompObj, Summary, WB/*:?any*/; if(cfb.FullPaths) { - CompObj = cfb.find('!CompObj'); - Summary = cfb.find('!SummaryInformation'); - Workbook = cfb.find('/Workbook'); + CompObj = CFB.find(cfb, '!CompObj'); + Summary = CFB.find(cfb, '!SummaryInformation'); + WB = CFB.find(cfb, '/Workbook'); } else { prep_blob(cfb, 0); - Workbook = ({content: cfb}/*:any*/); + WB = ({content: cfb}/*:any*/); } -if(!Workbook) Workbook = cfb.find('/Book'); +if(!WB) WB = CFB.find(cfb, '/Book'); var CompObjP, SummaryP, WorkbookP/*:Workbook*/; +var _data/*:?any*/; if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/); else { - if(Workbook) WorkbookP = parse_workbook(Workbook.content, options); + if(WB && WB.content) WorkbookP = parse_workbook(WB.content, options); /* Quattro Pro 7-8 */ - else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); + else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options); /* Quattro Pro 9 */ - else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options); + else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options); else throw new Error("Cannot find Workbook stream"); } diff --git a/bits/79_html.js b/bits/79_html.js index b4ad23f..b1de837 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -37,12 +37,14 @@ var HTML_ = (function() { if(range.e.c < C) range.e.c = C; if(opts.dense) { if(!ws[R]) ws[R] = []; - if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; + if(opts.raw) ws[R][C] = {t:'s', v:m}; + else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)}; else ws[R][C] = {t:'s', v:m}; } else { var coord/*:string*/ = encode_cell({r:R, c:C}); /* TODO: value parsing */ - if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; + if(opts.raw) ws[coord] = {t:'s', v:m}; + else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)}; else ws[coord] = {t:'s', v:m}; } C += CS; @@ -134,7 +136,8 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o/*:Cell*/ = {t:'s', v:v}; if(v != null && v.length) { - if(!isNaN(Number(v))) o = {t:'n', v:Number(v)}; + if(opts.raw) o = {t:'s', v:v}; + else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)}; else if(!isNaN(fuzzydate(v).getDate())) { o = ({t:'d', v:parseDate(v)}/*:any*/); if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/); diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js index 935d1d0..c4c711f 100644 --- a/bits/85_parsezip.js +++ b/bits/85_parsezip.js @@ -172,20 +172,20 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ { /* references to [MS-OFFCRYPTO] */ function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ { var f = 'Version'; - var data = cfb.find(f); + var data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var version = parse_DataSpaceVersionInfo(data.content); /* 2.3.4.1 */ f = 'DataSpaceMap'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var dsm = parse_DataSpaceMap(data.content); if(dsm.length != 1 || dsm[0].comps.length != 1 || dsm[0].comps[0].t != 0 || dsm[0].name != "StrongEncryptionDataSpace" || dsm[0].comps[0].v != "EncryptedPackage") throw new Error("ECMA-376 Encrypted file bad " + f); f = 'StrongEncryptionDataSpace'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var seds = parse_DataSpaceDefinition(data.content); if(seds.length != 1 || seds[0] != "StrongEncryptionTransform") @@ -193,12 +193,12 @@ function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ { /* 2.3.4.3 */ f = '!Primary'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var hdr = parse_Primary(data.content); f = 'EncryptionInfo'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var einfo = parse_EncryptionInfo(data.content); diff --git a/bits/87_read.js b/bits/87_read.js index 25f9a41..9b31717 100644 --- a/bits/87_read.js +++ b/bits/87_read.js @@ -10,8 +10,8 @@ function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array*/ { return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)]; } -function read_cfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ { - if(cfb.find("EncryptedPackage")) return parse_xlsxcfb(cfb, opts); +function read_cfb(cfb/*:CFBContainer*/, opts/*:?ParseOpts*/)/*:Workbook*/ { + if(CFB.find(cfb, "EncryptedPackage")) return parse_xlsxcfb(cfb, opts); return parse_xlscfb(cfb, opts); } diff --git a/bits/90_utils.js b/bits/90_utils.js index bfe6de1..c139401 100644 --- a/bits/90_utils.js +++ b/bits/90_utils.js @@ -1,8 +1,8 @@ -function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ +function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/) { if(sheet == null || sheet["!ref"] == null) return []; var val = {t:'n',v:0}, header = 0, offset = 1, hdr/*:Array*/ = [], isempty = true, v=0, vv=""; var r = {s:{r:0,c:0},e:{r:0,c:0}}; - var o = opts != null ? opts : {}; + var o = opts || {}; var raw = o.raw; var defval = o.defval; var range = o.range != null ? o.range : sheet["!ref"]; @@ -16,8 +16,8 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ } if(header > 0) offset = 0; var rr = encode_row(r.s.r); - var cols = new Array(r.e.c-r.s.c+1); - var out = new Array(r.e.r-r.s.r-offset+1); + var cols/*:Array*/ = []; + var out/*:Array*/ = []; var outi = 0, counter = 0; var dense = Array.isArray(sheet); var R = r.s.r, C = 0, CC = 0; @@ -37,7 +37,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ hdr[C] = vv; } } - var row = (header === 1) ? [] : {}; + var row/*:any*/ = (header === 1) ? [] : {}; for (R = r.s.r + offset; R <= r.e.r; ++R) { rr = encode_row(R); isempty = true; @@ -81,7 +81,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ var qreg = /"/g; function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ { var isempty = true; - var row = [], txt = "", rr = encode_row(R); + var row/*:Array*/ = [], txt = "", rr = encode_row(R); for(var C = r.s.c; C <= r.e.c; ++C) { if (!cols[C]) continue; var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; diff --git a/demos/datagrid/README.md b/demos/datagrid/README.md new file mode 100644 index 0000000..71271ad --- /dev/null +++ b/demos/datagrid/README.md @@ -0,0 +1,67 @@ +# canvas-datagrid + +The `sheet_to_json` utility function generates output arrays suitable for use +with other JS libraries such as data grids for previewing data. After extensive +testing, [`canvas-datagrid`](https://tonygermaneri.github.io/canvas-datagrid/) +stood out as a very high-performance grid with an incredibly simple API. + +## Obtaining the Library + +The [`canvas-datagrid` npm nodule](http://npm.im/canvas-datagrid) includes a +minified script `dist/canvas-datagrid.js` that can be directly inserted as a +script tag. The unpkg CDN also exposes the latest version: + +```html + +``` + +## Previewing Data + +The HTML document needs a container element: + +```html +
+``` + +Grid initialization is a one-liner: + +```js +var grid = canvasDatagrid({ + parentNode: document.getElementById('gridctr'), + data: [] +}); +``` + +Once the workbook is read and the worksheet is selected, assigning the data +variable automatically updates the view: + +```js +grid.data = XLSX.utils.sheet_to_json(ws, {header:1}); +``` + +This demo previews the first worksheet, but it is easy to add buttons and other +features to support multiple worksheets. + +## Editing + +The library handles the whole edit cycle. No intervention is necessary. + +## Saving Data + +`grid.data` is immediately readable and can be converted back to a worksheet: + +```js +/* build worksheet from the grid data */ +var ws = XLSX.utils.aoa_to_sheet(grid.data); + +/* build up workbook */ +var wb = XLSX.utils.book_new(); +XLSX.utils.book_append_sheet(wb, ws, 'SheetJS'); + +/* .. generate download (see documentation for examples) .. */ +``` + +## Additional Features + +This demo barely scratches the surface. The underlying grid component includes +many additional features including massive data streaming, sorting and styling. diff --git a/demos/datagrid/index.html b/demos/datagrid/index.html new file mode 100644 index 0000000..235c53f --- /dev/null +++ b/demos/datagrid/index.html @@ -0,0 +1,205 @@ + + + + + + +SheetJS + canvas-datagrid Live Demo + + + +
+SheetJS Data Preview Live Demo
+
+canvas-datagrid component library
+
+Source Code Repo
+Issues?  Something look weird?  Click here and report an issue
+
+
Drop a spreadsheet file here to see sheet data
+ ... or click here to select a file + +
+Advanced Demo Options: +Use readAsBinaryString: (when available) +
+

+
+
+ + + + + + + + diff --git a/demos/datagrid/xlsx.full.min.js b/demos/datagrid/xlsx.full.min.js new file mode 120000 index 0000000..dbca48d --- /dev/null +++ b/demos/datagrid/xlsx.full.min.js @@ -0,0 +1 @@ +../../dist/xlsx.full.min.js \ No newline at end of file diff --git a/docbits/00_intro.md b/docbits/00_intro.md index e319870..e23ccb2 100644 --- a/docbits/00_intro.md +++ b/docbits/00_intro.md @@ -41,7 +41,6 @@ enhancements, additional features by request, and dedicated support. [![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx) [![Build Status](https://semaphoreci.com/api/v1/sheetjs/js-xlsx/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/js-xlsx) [![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-xlsx/master.svg)](https://coveralls.io/r/SheetJS/js-xlsx?branch=master) -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx?ref=badge_shield) [![Dependencies Status](https://david-dm.org/sheetjs/js-xlsx/status.svg)](https://david-dm.org/sheetjs/js-xlsx) [![NPM Downloads](https://img.shields.io/npm/dt/xlsx.svg)](https://npmjs.org/package/xlsx) [![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-xlsx)](https://ghit.me/repo/sheetjs/js-xlsx) diff --git a/docbits/10_install.md b/docbits/10_install.md index ce71229..a0142ee 100644 --- a/docbits/10_install.md +++ b/docbits/10_install.md @@ -28,6 +28,7 @@ The `demos` directory includes sample projects for: - [`angular 1.x`](demos/angular/) - [`angular 2.x / 4.x`](demos/angular2/) - [`browserify`](demos/browserify/) +- [`canvas-datagrid`](demos/datagrid/) - [`Adobe ExtendScript`](demos/extendscript/) - [`meteor`](demos/meteor/) - [`phantomjs`](demos/phantomjs/) diff --git a/docbits/82_util.md b/docbits/82_util.md index 83f9a85..f1f2fc2 100644 --- a/docbits/82_util.md +++ b/docbits/82_util.md @@ -77,6 +77,15 @@ as strings. `XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet. +Both functions accept options arguments: + +| Option Name | Default | Description | +| :---------- | :------: | :-------------------------------------------------- | +| dateNF | fmt 14 | Use specified date format in string output | +| cellDates | false | Store dates as type `d` (default is `n`) | +| raw | | If true, every cell will hold raw strings | + +
Examples (click to show) diff --git a/misc/docs/README.md b/misc/docs/README.md index a49c1c5..2ca569c 100644 --- a/misc/docs/README.md +++ b/misc/docs/README.md @@ -38,7 +38,6 @@ enhancements, additional features by request, and dedicated support. [![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx) [![Build Status](https://semaphoreci.com/api/v1/sheetjs/js-xlsx/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/js-xlsx) [![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-xlsx/master.svg)](https://coveralls.io/r/SheetJS/js-xlsx?branch=master) -[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx?ref=badge_shield) [![Dependencies Status](https://david-dm.org/sheetjs/js-xlsx/status.svg)](https://david-dm.org/sheetjs/js-xlsx) [![NPM Downloads](https://img.shields.io/npm/dt/xlsx.svg)](https://npmjs.org/package/xlsx) [![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-xlsx)](https://ghit.me/repo/sheetjs/js-xlsx) @@ -165,6 +164,7 @@ The `demos` directory includes sample projects for: - [`angular 1.x`](demos/angular/) - [`angular 2.x / 4.x`](demos/angular2/) - [`browserify`](demos/browserify/) +- [`canvas-datagrid`](demos/datagrid/) - [`Adobe ExtendScript`](demos/extendscript/) - [`meteor`](demos/meteor/) - [`phantomjs`](demos/phantomjs/) @@ -1516,6 +1516,15 @@ as strings. `XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet. +Both functions accept options arguments: + +| Option Name | Default | Description | +| :---------- | :------: | :-------------------------------------------------- | +| dateNF | fmt 14 | Use specified date format in string output | +| cellDates | false | Store dates as type `d` (default is `n`) | +| raw | | If true, every cell will hold raw strings | + + To generate the example sheet, start with the HTML table: diff --git a/misc/flowdeps.js b/misc/flowdeps.js index 2f60f3d..5a8778e 100644 --- a/misc/flowdeps.js +++ b/misc/flowdeps.js @@ -45,6 +45,7 @@ type SSFModule = { type CFBModule = { version:string; + find:(cfb:CFBContainer, path:string)=>?CFBEntry; read:(blob:RawBytes|string, opts:CFBReadOpts)=>CFBContainer; parse:(file:RawBytes, opts:CFBReadOpts)=>CFBContainer; utils:CFBUtils; diff --git a/package.json b/package.json index 56d8523..94d5542 100644 --- a/package.json +++ b/package.json @@ -20,7 +20,7 @@ "exit-on-epipe":"~1.0.1", "ssf":"~0.10.1", "codepage":"~1.11.0", - "cfb":"~0.12.0", + "cfb":"~0.12.1", "crc-32":"~1.1.0", "adler-32":"~1.1.0", "commander":"~2.11.0" diff --git a/test.js b/test.js index c101daa..0ae8f41 100644 --- a/test.js +++ b/test.js @@ -1845,6 +1845,32 @@ describe('csv', function() { }); }); +describe('HTML', function() { + describe('input', function(){ + var b = "
14,001
$41.08
"; + it('should generate numbers by default', function() { + var sheet = X.read(b, {type:"binary"}).Sheets.Sheet1 + var cell = get_cell(sheet, "A1"); + assert.equal(cell.v, 1); + assert.equal(cell.t, 'n'); + cell = get_cell(sheet, "B1"); + assert.equal(cell.v, 4001); + cell = get_cell(sheet, "A2"); + assert.equal(cell.v, 41.08); + }); + it('should generate strings if raw option is passed', function() { + var sheet = X.read(b, {type:"binary", raw:true}).Sheets.Sheet1 + var cell = get_cell(sheet, "A1"); + assert.equal(cell.v, "1"); + assert.equal(cell.t, 's'); + cell = get_cell(sheet, "B1"); + assert.equal(cell.v, "4,001"); + cell = get_cell(sheet, "A2"); + assert.equal(cell.v, "$41.08"); + }); + }); +}); + describe('js -> file -> js', function() { var data, ws, wb, BIN="binary"; var bef = (function() { diff --git a/types/doc.ts b/types/doc.ts new file mode 100644 index 0000000..edc40e9 --- /dev/null +++ b/types/doc.ts @@ -0,0 +1,111 @@ +import * as XLSX from 'xlsx'; +import * as fs from 'fs'; + +const version: string = XLSX.version; + +const SSF = XLSX.SSF; + +let read_opts: XLSX.ParsingOptions = { + type: "buffer", + raw: false, + cellFormula: false, + cellHTML: false, + cellNF: false, + cellStyles: false, + cellText: false, + cellDates: false, + dateNF: "yyyy-mm-dd", + sheetStubs: false, + sheetRows: 3, + bookDeps: false, + bookFiles: false, + bookProps: false, + bookSheets: false, + bookVBA: false, + password: "", + WTF: false +}; + +let write_opts: XLSX.WritingOptions = { + type: "buffer", + cellDates: false, + bookSST: false, + bookType: "xlsx", + sheet: "Sheet1", + compression: false, + Props: { + Author: "Someone", + Company: "SheetJS LLC" + } +}; + +const wb1 = XLSX.readFile("sheetjs.xls", read_opts); +XLSX.writeFile(wb1, "sheetjs.new.xlsx", write_opts); + +read_opts.type = "binary"; +const wb2 = XLSX.read("1,2,3\n4,5,6", read_opts); +write_opts.type = "binary"; +const out2 = XLSX.write(wb2, write_opts); + +read_opts.type = "buffer"; +const wb3 = XLSX.read(fs.readFileSync("sheetjs.xlsx"), read_opts); +write_opts.type = "base64"; +const out3 = XLSX.write(wb3, write_opts); + +const ws1 = XLSX.utils.aoa_to_sheet([ + "SheetJS".split(""), + [1,2,3,4,5,6,7], + [2,3,4,5,6,7,8] +], { + dateNF: "yyyy-mm-dd", + cellDates: true, + sheetStubs: false +}); + +const ws2 = XLSX.utils.json_to_sheet([ + {S:1,h:2,e:3,e_1:4,t:5,J:6,S_1:7}, + {S:2,h:3,e:4,e_1:5,t:6,J:7,S_1:8} +], { + header:["S","h","e","e_1","t","J","S_1"], + cellDates: true, + dateNF: "yyyy-mm-dd" +}); + +const tbl = {}; /* document.getElementById('table'); */ +const ws3 = XLSX.utils.table_to_sheet(tbl, { + raw: true, + cellDates: true, + dateNF: "yyyy-mm-dd" +}); + +const obj1 = XLSX.utils.sheet_to_formulae(ws1); + +const str1: string = XLSX.utils.sheet_to_csv(ws2, { + FS: "\t", + RS: "|", + dateNF: "yyyy-mm-dd", + strip: true, + blankrows: true, + skipHidden: true +}); + +const html1: string = XLSX.utils.sheet_to_html(ws3, { + editable: false +}); + +const arr1: object[] = XLSX.utils.sheet_to_json(ws1, { + raw: true, + range: 1, + header: "A", + dateNF: "yyyy-mm-dd", + defval: 0, + blankrows: true +}); + +const arr2: any[][] = XLSX.utils.sheet_to_json(ws2, { + header: 1 +}); + +const arr3: any[] = XLSX.utils.sheet_to_json(ws3, { + header: ["Sheet", "JS", "Rocks"] +}); diff --git a/types/index.d.ts b/types/index.d.ts index db3830c..b5c8c3c 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -83,6 +83,11 @@ export interface CommonOptions { cellDates?: boolean; } +export interface DateNFOption { + /** Use specified date format */ + dateNF?: NumberFormat; +} + /** Options for read and readFile */ export interface ParsingOptions extends CommonOptions { /** Input data encoding */ @@ -168,6 +173,9 @@ export interface ParsingOptions extends CommonOptions { * @default '' */ password?: string; + + /* If true, plaintext parsing will not parse values */ + raw?: boolean; } /** Options for write and writeFile */ @@ -541,16 +549,13 @@ export interface Range { e: CellAddress; } -export interface Sheet2CSVOpts { +export interface Sheet2CSVOpts extends DateNFOption { /** Field Separator ("delimiter") */ FS?: string; /** Record Separator ("row separator") */ RS?: string; - /** Use specified date format */ - dateNF?: NumberFormat; - /** Remove trailing field separators in each record */ strip?: boolean; @@ -572,10 +577,7 @@ export interface Sheet2HTMLOpts { footer?: string; } -export interface Sheet2JSONOpts { - /** Use specified format for date cells */ - dateNF?: NumberFormat; - +export interface Sheet2JSONOpts extends DateNFOption { /** Output format */ header?: "A"|number|string[]; @@ -592,16 +594,7 @@ export interface Sheet2JSONOpts { raw?: boolean; } -export interface AOA2SheetOpts { - /** Use specified format for date cells */ - dateNF?: NumberFormat; - - /** - * Store dates as type d (default is n) - * @default false - */ - cellDates?: boolean; - +export interface AOA2SheetOpts extends CommonOptions, DateNFOption { /** * Create cell objects for stub cells * @default false @@ -609,14 +602,14 @@ export interface AOA2SheetOpts { sheetStubs?: boolean; } -export interface JSON2SheetOpts { - /** Use specified date format */ - dateNF?: NumberFormat; +export interface JSON2SheetOpts extends CommonOptions, DateNFOption { + /** Use specified column order */ + header?: string[]; } -export interface Table2SheetOpts { - /** Use specified date format */ - dateNF?: NumberFormat; +export interface Table2SheetOpts extends CommonOptions, DateNFOption { + /* If true, plaintext parsing will not parse values */ + raw?: boolean; } /** General utilities */ diff --git a/xlsx.flow.js b/xlsx.flow.js index 14de743..efa3bfa 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -1088,7 +1088,7 @@ type CFBFiles = {[n:string]:CFBEntry}; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/; -exports.version = '0.12.0'; +exports.version = '0.12.1'; function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { var mver = 3; // major version var ssz = 512; // sector size @@ -1259,7 +1259,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -1360,7 +1360,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/; prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o/*:CFBEntry*/ = ({ @@ -1381,6 +1380,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -1393,7 +1393,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/); prep_blob(o.content, 0); } @@ -1422,6 +1422,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options); } +function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { + return cfb.find(path); +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -1447,6 +1450,7 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = { @@ -1577,6 +1581,13 @@ function dup(o/*:any*/)/*:any*/ { function fill(c/*:string*/,l/*:number*/)/*:string*/ { var o = ""; while(o.length < l) o+=c; return o; } /* TODO: stress test */ +function fuzzynum(s/*:string*/)/*:number*/ { + var v/*:number*/ = Number(s); + if(!isNaN(v)) return v; + var ss = s.replace(/([\d]),([\d])/g,"$1$2").replace(/[$]/g,""); + if(!isNaN(v = Number(ss))) return v; + return v; +} function fuzzydate(s/*:string*/)/*:Date*/ { var o = new Date(s), n = new Date(NaN); var y = o.getYear(), m = o.getMonth(), d = o.getDate(); @@ -5483,8 +5494,8 @@ var SYLK = (function() { if(val.charAt(0) === '"') val = val.substr(1,val.length - 2); else if(val === 'TRUE') val = true; else if(val === 'FALSE') val = false; - else if(+val === +val) { - val = +val; + else if(!isNaN(fuzzynum(val))) { + val = fuzzynum(val); if(next_cell_format !== null && SSF.is_date(next_cell_format)) val = numdate(val); } else if(!isNaN(fuzzydate(val).getDate())) { val = parseDate(val); @@ -5645,7 +5656,7 @@ var DIF = (function() { case 0: if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(+value == +value) arr[R][C] = +value; + else if(!isNaN(fuzzynum(value))) arr[R][C] = fuzzynum(value); else if(!isNaN(fuzzydate(value).getDate())) arr[R][C] = parseDate(value); else arr[R][C] = value; ++C; break; @@ -5731,7 +5742,7 @@ var PRN = (function() { else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; else if(data === ""){/* empty */} - else if(+data == +data) arr[R][C] = +data; + else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; } @@ -5782,7 +5793,7 @@ var PRN = (function() { else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); } else if(s == "TRUE") { cell.t = 'b'; cell.v = true; } else if(s == "FALSE") { cell.t = 'b'; cell.v = false; } - else if(!isNaN(v = +s)) { cell.t = 'n'; cell.w = s; cell.v = v; } + else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; } else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) { cell.z = o.dateNF || SSF._table[14]; var k = 0; @@ -14940,11 +14951,11 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ { /* TODO: WTF */ function parse_props(cfb) { /* [MS-OSHARED] 2.3.3.2.2 Document Summary Information Property Set */ - var DSI = cfb.find('!DocumentSummaryInformation'); + var DSI = CFB.find(cfb, '!DocumentSummaryInformation'); if(DSI) try { cfb.DocSummary = parse_PropertySetStream(DSI, DocSummaryPIDDSI); } catch(e) {/* empty */} /* [MS-OSHARED] 2.3.3.2.1 Summary Information Property Set*/ - var SI = cfb.find('!SummaryInformation'); + var SI = CFB.find(cfb, '!SummaryInformation'); if(SI) try { cfb.Summary = parse_PropertySetStream(SI, SummaryPIDSI); } catch(e) {/* empty */} } @@ -14952,27 +14963,28 @@ function parse_xlscfb(cfb/*:any*/, options/*:?ParseOpts*/)/*:Workbook*/ { if(!options) options = {}; fix_read_opts(options); reset_cp(); -var CompObj, Summary, Workbook/*:?any*/; +var CompObj, Summary, WB/*:?any*/; if(cfb.FullPaths) { - CompObj = cfb.find('!CompObj'); - Summary = cfb.find('!SummaryInformation'); - Workbook = cfb.find('/Workbook'); + CompObj = CFB.find(cfb, '!CompObj'); + Summary = CFB.find(cfb, '!SummaryInformation'); + WB = CFB.find(cfb, '/Workbook'); } else { prep_blob(cfb, 0); - Workbook = ({content: cfb}/*:any*/); + WB = ({content: cfb}/*:any*/); } -if(!Workbook) Workbook = cfb.find('/Book'); +if(!WB) WB = CFB.find(cfb, '/Book'); var CompObjP, SummaryP, WorkbookP/*:Workbook*/; +var _data/*:?any*/; if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/); else { - if(Workbook) WorkbookP = parse_workbook(Workbook.content, options); + if(WB && WB.content) WorkbookP = parse_workbook(WB.content, options); /* Quattro Pro 7-8 */ - else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); + else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options); /* Quattro Pro 9 */ - else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options); + else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options); else throw new Error("Cannot find Workbook stream"); } @@ -16386,12 +16398,14 @@ var HTML_ = (function() { if(range.e.c < C) range.e.c = C; if(opts.dense) { if(!ws[R]) ws[R] = []; - if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; + if(opts.raw) ws[R][C] = {t:'s', v:m}; + else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)}; else ws[R][C] = {t:'s', v:m}; } else { var coord/*:string*/ = encode_cell({r:R, c:C}); /* TODO: value parsing */ - if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; + if(opts.raw) ws[coord] = {t:'s', v:m}; + else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)}; else ws[coord] = {t:'s', v:m}; } C += CS; @@ -16483,7 +16497,8 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o/*:Cell*/ = {t:'s', v:v}; if(v != null && v.length) { - if(!isNaN(Number(v))) o = {t:'n', v:Number(v)}; + if(opts.raw) o = {t:'s', v:v}; + else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)}; else if(!isNaN(fuzzydate(v).getDate())) { o = ({t:'d', v:parseDate(v)}/*:any*/); if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/); @@ -17407,20 +17422,20 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ { /* references to [MS-OFFCRYPTO] */ function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ { var f = 'Version'; - var data = cfb.find(f); + var data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var version = parse_DataSpaceVersionInfo(data.content); /* 2.3.4.1 */ f = 'DataSpaceMap'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var dsm = parse_DataSpaceMap(data.content); if(dsm.length != 1 || dsm[0].comps.length != 1 || dsm[0].comps[0].t != 0 || dsm[0].name != "StrongEncryptionDataSpace" || dsm[0].comps[0].v != "EncryptedPackage") throw new Error("ECMA-376 Encrypted file bad " + f); f = 'StrongEncryptionDataSpace'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var seds = parse_DataSpaceDefinition(data.content); if(seds.length != 1 || seds[0] != "StrongEncryptionTransform") @@ -17428,12 +17443,12 @@ function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ { /* 2.3.4.3 */ f = '!Primary'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var hdr = parse_Primary(data.content); f = 'EncryptionInfo'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var einfo = parse_EncryptionInfo(data.content); @@ -17579,8 +17594,8 @@ function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array*/ { return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)]; } -function read_cfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ { - if(cfb.find("EncryptedPackage")) return parse_xlsxcfb(cfb, opts); +function read_cfb(cfb/*:CFBContainer*/, opts/*:?ParseOpts*/)/*:Workbook*/ { + if(CFB.find(cfb, "EncryptedPackage")) return parse_xlsxcfb(cfb, opts); return parse_xlscfb(cfb, opts); } @@ -17778,11 +17793,11 @@ function writeFileAsync(filename/*:string*/, wb/*:Workbook*/, opts/*:?WriteFileO var _cb = cb; if(!(_cb instanceof Function)) _cb = (opts/*:any*/); return _fs.writeFile(filename, writeSync(wb, o), _cb); } -function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ +function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/) { if(sheet == null || sheet["!ref"] == null) return []; var val = {t:'n',v:0}, header = 0, offset = 1, hdr/*:Array*/ = [], isempty = true, v=0, vv=""; var r = {s:{r:0,c:0},e:{r:0,c:0}}; - var o = opts != null ? opts : {}; + var o = opts || {}; var raw = o.raw; var defval = o.defval; var range = o.range != null ? o.range : sheet["!ref"]; @@ -17796,8 +17811,8 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ } if(header > 0) offset = 0; var rr = encode_row(r.s.r); - var cols = new Array(r.e.c-r.s.c+1); - var out = new Array(r.e.r-r.s.r-offset+1); + var cols/*:Array*/ = []; + var out/*:Array*/ = []; var outi = 0, counter = 0; var dense = Array.isArray(sheet); var R = r.s.r, C = 0, CC = 0; @@ -17817,7 +17832,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ hdr[C] = vv; } } - var row = (header === 1) ? [] : {}; + var row/*:any*/ = (header === 1) ? [] : {}; for (R = r.s.r + offset; R <= r.e.r; ++R) { rr = encode_row(R); isempty = true; @@ -17861,7 +17876,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ var qreg = /"/g; function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ { var isempty = true; - var row = [], txt = "", rr = encode_row(R); + var row/*:Array*/ = [], txt = "", rr = encode_row(R); for(var C = r.s.c; C <= r.e.c; ++C) { if (!cols[C]) continue; var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; diff --git a/xlsx.js b/xlsx.js index 0504375..4b3fad5 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1026,7 +1026,7 @@ var DO_NOT_EXPORT_CFB = true; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports = {}; -exports.version = '0.12.0'; +exports.version = '0.12.1'; function parse(file, options) { var mver = 3; // major version var ssz = 512; // sector size @@ -1197,7 +1197,7 @@ function build_full_paths(FI, FPD, FP, Paths) { if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -1298,7 +1298,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil var blob = sector.slice(i, i+128); prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o = ({ @@ -1319,6 +1318,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -1331,7 +1331,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)); prep_blob(o.content, 0); } @@ -1360,6 +1360,9 @@ function readSync(blob, options) { return parse(blob, options); } +function find(cfb, path) { + return cfb.find(path); +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -1385,6 +1388,7 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = { @@ -1514,6 +1518,13 @@ function dup(o) { function fill(c,l) { var o = ""; while(o.length < l) o+=c; return o; } /* TODO: stress test */ +function fuzzynum(s) { + var v = Number(s); + if(!isNaN(v)) return v; + var ss = s.replace(/([\d]),([\d])/g,"$1$2").replace(/[$]/g,""); + if(!isNaN(v = Number(ss))) return v; + return v; +} function fuzzydate(s) { var o = new Date(s), n = new Date(NaN); var y = o.getYear(), m = o.getMonth(), d = o.getDate(); @@ -5409,8 +5420,8 @@ var SYLK = (function() { if(val.charAt(0) === '"') val = val.substr(1,val.length - 2); else if(val === 'TRUE') val = true; else if(val === 'FALSE') val = false; - else if(+val === +val) { - val = +val; + else if(!isNaN(fuzzynum(val))) { + val = fuzzynum(val); if(next_cell_format !== null && SSF.is_date(next_cell_format)) val = numdate(val); } else if(!isNaN(fuzzydate(val).getDate())) { val = parseDate(val); @@ -5571,7 +5582,7 @@ var DIF = (function() { case 0: if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; - else if(+value == +value) arr[R][C] = +value; + else if(!isNaN(fuzzynum(value))) arr[R][C] = fuzzynum(value); else if(!isNaN(fuzzydate(value).getDate())) arr[R][C] = parseDate(value); else arr[R][C] = value; ++C; break; @@ -5657,7 +5668,7 @@ var PRN = (function() { else if(data === 'TRUE') arr[R][C] = true; else if(data === 'FALSE') arr[R][C] = false; else if(data === ""){/* empty */} - else if(+data == +data) arr[R][C] = +data; + else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data); else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data); else arr[R][C] = data; } @@ -5708,7 +5719,7 @@ var PRN = (function() { else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); } else if(s == "TRUE") { cell.t = 'b'; cell.v = true; } else if(s == "FALSE") { cell.t = 'b'; cell.v = false; } - else if(!isNaN(v = +s)) { cell.t = 'n'; cell.w = s; cell.v = v; } + else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; } else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) { cell.z = o.dateNF || SSF._table[14]; var k = 0; @@ -14854,11 +14865,11 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break; /* TODO: WTF */ function parse_props(cfb) { /* [MS-OSHARED] 2.3.3.2.2 Document Summary Information Property Set */ - var DSI = cfb.find('!DocumentSummaryInformation'); + var DSI = CFB.find(cfb, '!DocumentSummaryInformation'); if(DSI) try { cfb.DocSummary = parse_PropertySetStream(DSI, DocSummaryPIDDSI); } catch(e) {/* empty */} /* [MS-OSHARED] 2.3.3.2.1 Summary Information Property Set*/ - var SI = cfb.find('!SummaryInformation'); + var SI = CFB.find(cfb, '!SummaryInformation'); if(SI) try { cfb.Summary = parse_PropertySetStream(SI, SummaryPIDSI); } catch(e) {/* empty */} } @@ -14866,27 +14877,28 @@ function parse_xlscfb(cfb, options) { if(!options) options = {}; fix_read_opts(options); reset_cp(); -var CompObj, Summary, Workbook; +var CompObj, Summary, WB; if(cfb.FullPaths) { - CompObj = cfb.find('!CompObj'); - Summary = cfb.find('!SummaryInformation'); - Workbook = cfb.find('/Workbook'); + CompObj = CFB.find(cfb, '!CompObj'); + Summary = CFB.find(cfb, '!SummaryInformation'); + WB = CFB.find(cfb, '/Workbook'); } else { prep_blob(cfb, 0); - Workbook = ({content: cfb}); + WB = ({content: cfb}); } -if(!Workbook) Workbook = cfb.find('/Book'); +if(!WB) WB = CFB.find(cfb, '/Book'); var CompObjP, SummaryP, WorkbookP; +var _data; if(CompObj) CompObjP = parse_compobj(CompObj); if(options.bookProps && !options.bookSheets) WorkbookP = ({}); else { - if(Workbook) WorkbookP = parse_workbook(Workbook.content, options); + if(WB && WB.content) WorkbookP = parse_workbook(WB.content, options); /* Quattro Pro 7-8 */ - else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options); + else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options); /* Quattro Pro 9 */ - else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options); + else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options); else throw new Error("Cannot find Workbook stream"); } @@ -16300,12 +16312,14 @@ var HTML_ = (function() { if(range.e.c < C) range.e.c = C; if(opts.dense) { if(!ws[R]) ws[R] = []; - if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; + if(opts.raw) ws[R][C] = {t:'s', v:m}; + else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)}; else ws[R][C] = {t:'s', v:m}; } else { var coord = encode_cell({r:R, c:C}); /* TODO: value parsing */ - if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; + if(opts.raw) ws[coord] = {t:'s', v:m}; + else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)}; else ws[coord] = {t:'s', v:m}; } C += CS; @@ -16397,7 +16411,8 @@ function parse_dom_table(table, _opts) { if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o = {t:'s', v:v}; if(v != null && v.length) { - if(!isNaN(Number(v))) o = {t:'n', v:Number(v)}; + if(opts.raw) o = {t:'s', v:v}; + else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)}; else if(!isNaN(fuzzydate(v).getDate())) { o = ({t:'d', v:parseDate(v)}); if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}); @@ -17320,20 +17335,20 @@ function parse_zip(zip, opts) { /* references to [MS-OFFCRYPTO] */ function parse_xlsxcfb(cfb, opts) { var f = 'Version'; - var data = cfb.find(f); + var data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var version = parse_DataSpaceVersionInfo(data.content); /* 2.3.4.1 */ f = 'DataSpaceMap'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var dsm = parse_DataSpaceMap(data.content); if(dsm.length != 1 || dsm[0].comps.length != 1 || dsm[0].comps[0].t != 0 || dsm[0].name != "StrongEncryptionDataSpace" || dsm[0].comps[0].v != "EncryptedPackage") throw new Error("ECMA-376 Encrypted file bad " + f); f = 'StrongEncryptionDataSpace'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var seds = parse_DataSpaceDefinition(data.content); if(seds.length != 1 || seds[0] != "StrongEncryptionTransform") @@ -17341,12 +17356,12 @@ function parse_xlsxcfb(cfb, opts) { /* 2.3.4.3 */ f = '!Primary'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var hdr = parse_Primary(data.content); f = 'EncryptionInfo'; - data = cfb.find(f); + data = CFB.find(cfb, f); if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f); var einfo = parse_EncryptionInfo(data.content); @@ -17491,7 +17506,7 @@ function firstbyte(f,o) { } function read_cfb(cfb, opts) { - if(cfb.find("EncryptedPackage")) return parse_xlsxcfb(cfb, opts); + if(CFB.find(cfb, "EncryptedPackage")) return parse_xlsxcfb(cfb, opts); return parse_xlscfb(cfb, opts); } @@ -17688,11 +17703,11 @@ function writeFileAsync(filename, wb, opts, cb) { var _cb = cb; if(!(_cb instanceof Function)) _cb = (opts); return _fs.writeFile(filename, writeSync(wb, o), _cb); } -function sheet_to_json(sheet, opts){ +function sheet_to_json(sheet, opts) { if(sheet == null || sheet["!ref"] == null) return []; var val = {t:'n',v:0}, header = 0, offset = 1, hdr = [], isempty = true, v=0, vv=""; var r = {s:{r:0,c:0},e:{r:0,c:0}}; - var o = opts != null ? opts : {}; + var o = opts || {}; var raw = o.raw; var defval = o.defval; var range = o.range != null ? o.range : sheet["!ref"]; @@ -17706,8 +17721,8 @@ function sheet_to_json(sheet, opts){ } if(header > 0) offset = 0; var rr = encode_row(r.s.r); - var cols = new Array(r.e.c-r.s.c+1); - var out = new Array(r.e.r-r.s.r-offset+1); + var cols = []; + var out = []; var outi = 0, counter = 0; var dense = Array.isArray(sheet); var R = r.s.r, C = 0, CC = 0;