diff --git a/README.md b/README.md index 857cb1e..f229ec5 100644 --- a/README.md +++ b/README.md @@ -677,7 +677,7 @@ The exported `read` and `readFile` functions accept an options argument: | cellHTML | true | Parse rich text and save HTML to the .h field | | cellNF | false | Save number format string to the .z field | | cellStyles | false | Save style/theme info to the .s field | -| cellDates | false | Store dates as type `d` (default is `n`) ** | +| cellDates | false | Store dates as type `d` (default is `n`) | | sheetStubs | false | Create cell objects of type `z` for stub cells | | sheetRows | 0 | If >0, read the first `sheetRows` rows ** | | bookDeps | false | If true, parse calculation chains | @@ -701,7 +701,6 @@ The exported `read` and `readFile` functions accept an options argument: - `sheetRows-1` rows will be generated when looking at the JSON object output (since the header row is counted as a row when parsing the data) - `bookVBA` merely exposes the raw vba object. It does not parse the data. -- `cellDates` currently does not convert numerical dates to JS dates. - Currently only XOR encryption is supported. Unsupported error will be thrown for files employing other encryption methods. - WTF is mainly for development. By default, the parser will suppress read @@ -826,6 +825,10 @@ produces CSV output. The function takes an options argument: | :---------- | :------: | :-------------------------------------------------- | | FS | `","` | "Field Separator" delimiter between fields | | RS | `"\n"` | "Record Separator" delimiter between rows | +| dateNF | fmt 14 | Use specified date format in string output | +| strip | false | Remove trailing field separators in each record ** | + +- `strip` will remove trailing commas from each line under default `FS/RS` For the example sheet: @@ -852,6 +855,7 @@ generate different types of JS objects. The function takes an options argument: | raw | `false` | Use raw values (true) or formatted strings (false) | | range | from WS | Override Range (see table below) | | header | | Control output format (see table below) | +| dateNF | fmt 14 | Use specified date format in string output | - `raw` only affects cells which have a format code (`.z`) field or a formatted text (`.w`) field. diff --git a/bits/22_xmlutils.js b/bits/22_xmlutils.js index 02d2d01..de09b13 100644 --- a/bits/22_xmlutils.js +++ b/bits/22_xmlutils.js @@ -2,7 +2,7 @@ var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g; var tagregex=/<[^>]*>/g; var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/; function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { - var z = ([]/*:any*/); + var z = ({}/*:any*/); var eq = 0, c = 0; for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break; if(!skip_root) z[0] = tag.substr(0, eq); diff --git a/bits/87_read.js b/bits/87_read.js index f72f70f..bf57537 100644 --- a/bits/87_read.js +++ b/bits/87_read.js @@ -1,11 +1,13 @@ -function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:number*/ { +function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array*/ { + var x = ""; switch((o||{}).type || "base64") { - case 'buffer': return f[0]; - case 'base64': return Base64.decode(f.substr(0,12)).charCodeAt(0); - case 'binary': return f.charCodeAt(0); - case 'array': return f[0]; + case 'buffer': return [f[0], f[1], f[2], f[3]]; + case 'base64': x = Base64.decode(f.substr(0,24)); break; + case 'binary': x = f; break; + case 'array': return [f[0], f[1], f[2], f[3]]; default: throw new Error("Unrecognized type " + (o ? o.type : "undefined")); } + return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)]; } function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { @@ -23,18 +25,19 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { } function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { - var zip, d = data, n=0; + var zip, d = data, n=[0]; var o = opts||{}; if(!o.type) o.type = (has_buf && Buffer.isBuffer(data)) ? "buffer" : "base64"; if(o.type == "file") { o.type = "buffer"; d = _fs.readFileSync(data); } - switch((n = firstbyte(d, o))) { + switch((n = firstbyte(d, o))[0]) { case 0xD0: return parse_xlscfb(CFB.read(d, o), o); case 0x09: return parse_xlscfb(s2a(o.type === 'base64' ? Base64.decode(d) : d), o); case 0x3C: return parse_xlml(d, o); - case 0x50: return read_zip(d, o); + case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break; case 0xEF: return parse_xlml(d, o); - default: throw new Error("Unsupported file " + n); + default: throw new Error("Unsupported file " + n.join("|")); } + throw new Error("Unsupported file format " + n.join("|")); } function readFileSync(filename/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ { diff --git a/bits/90_utils.js b/bits/90_utils.js index 1c12129..8665387 100644 --- a/bits/90_utils.js +++ b/bits/90_utils.js @@ -60,15 +60,16 @@ function safe_decode_range(range/*:string*/)/*:Range*/ { } function safe_format_cell(cell/*:Cell*/, v/*:any*/) { - if(cell.z !== undefined) try { return (cell.w = SSF.format(cell.z, v)); } catch(e) { } - if(!cell.XF) return v; - try { return (cell.w = SSF.format(cell.XF.ifmt||0, v)); } catch(e) { return ''+v; } + var q = (cell.t == 'd' && v instanceof Date); + if(cell.z != null) try { return (cell.w = SSF.format(cell.z, q ? datenum(v) : v)); } catch(e) { } + try { return (cell.w = SSF.format((cell.XF||{}).ifmt||(q ? 14 : 0), q ? datenum(v) : v)); } catch(e) { return ''+v; } } -function format_cell(cell/*:Cell*/, v/*:any*/) { +function format_cell(cell/*:Cell*/, v/*:any*/, o/*:any*/) { if(cell == null || cell.t == null || cell.t == 'z') return ""; if(cell.w !== undefined) return cell.w; - if(v === undefined) return safe_format_cell(cell, cell.v); + if(cell.t == 'd' && !cell.z && o && o.dateNF) cell.z = o.dateNF; + if(v == undefined) return safe_format_cell(cell, cell.v); return safe_format_cell(cell, v); } @@ -146,6 +147,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); + var endregex = new RegExp(FS+"+$"); var row = "", rr = "", cols = []; var i = 0, cc = 0, val; var R = 0, C = 0; @@ -166,6 +168,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { /* NOTE: Excel CSV does not support array formulae */ row += (C === r.s.c ? "" : FS) + txt; } + if(o.strip) row = row.replace(endregex,""); out += row + RS; } return out; diff --git a/docbits/80_parseopts.md b/docbits/80_parseopts.md index a4ef4f9..439cb9d 100644 --- a/docbits/80_parseopts.md +++ b/docbits/80_parseopts.md @@ -9,7 +9,7 @@ The exported `read` and `readFile` functions accept an options argument: | cellHTML | true | Parse rich text and save HTML to the .h field | | cellNF | false | Save number format string to the .z field | | cellStyles | false | Save style/theme info to the .s field | -| cellDates | false | Store dates as type `d` (default is `n`) ** | +| cellDates | false | Store dates as type `d` (default is `n`) | | sheetStubs | false | Create cell objects of type `z` for stub cells | | sheetRows | 0 | If >0, read the first `sheetRows` rows ** | | bookDeps | false | If true, parse calculation chains | @@ -33,7 +33,6 @@ The exported `read` and `readFile` functions accept an options argument: - `sheetRows-1` rows will be generated when looking at the JSON object output (since the header row is counted as a row when parsing the data) - `bookVBA` merely exposes the raw vba object. It does not parse the data. -- `cellDates` currently does not convert numerical dates to JS dates. - Currently only XOR encryption is supported. Unsupported error will be thrown for files employing other encryption methods. - WTF is mainly for development. By default, the parser will suppress read diff --git a/docbits/82_util.md b/docbits/82_util.md index 2a26720..474046d 100644 --- a/docbits/82_util.md +++ b/docbits/82_util.md @@ -34,6 +34,10 @@ produces CSV output. The function takes an options argument: | :---------- | :------: | :-------------------------------------------------- | | FS | `","` | "Field Separator" delimiter between fields | | RS | `"\n"` | "Record Separator" delimiter between rows | +| dateNF | fmt 14 | Use specified date format in string output | +| strip | false | Remove trailing field separators in each record ** | + +- `strip` will remove trailing commas from each line under default `FS/RS` For the example sheet: @@ -60,6 +64,7 @@ generate different types of JS objects. The function takes an options argument: | raw | `false` | Use raw values (true) or formatted strings (false) | | range | from WS | Override Range (see table below) | | header | | Control output format (see table below) | +| dateNF | fmt 14 | Use specified date format in string output | - `raw` only affects cells which have a format code (`.z`) field or a formatted text (`.w`) field. diff --git a/docbits/99_badges.md b/docbits/99_badges.md index 615917c..f8d2fb1 100644 --- a/docbits/99_badges.md +++ b/docbits/99_badges.md @@ -1,6 +1,6 @@ ## Badges -[![Build Status](https://saucelabs.com/browser-matrix/xlsx.svg)](https://saucelabs.com/u/xlsx) +[![Build Status](https://saucelabs.com/browser-matrix/sheetjs.svg)](https://saucelabs.com/u/sheetjs) [![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx) diff --git a/test.js b/test.js index 5bde88a..cac1f91 100644 --- a/test.js +++ b/test.js @@ -1056,8 +1056,9 @@ function sheet_from_array_of_arrays(data, opts) { if(typeof cell.v === 'number') cell.t = 'n'; else if(typeof cell.v === 'boolean') cell.t = 'b'; else if(cell.v instanceof Date) { - cell.t = 'n'; cell.z = X.SSF._table[14]; - cell.v = datenum(cell.v); + cell.z = X.SSF._table[14]; + if(opts && opts.cellDates) cell.t = 'd'; + else { cell.t = 'n'; cell.v = datenum(cell.v); } } else cell.t = 's'; ws[cell_ref] = cell; @@ -1090,7 +1091,7 @@ describe('json output', function() { it('should use first-row headers and full sheet by default', function() { var json = X.utils.sheet_to_json(ws); assert.equal(json.length, data.length - 1); - assert.equal(json[0][1], true); + assert.equal(json[0][1], "TRUE"); assert.equal(json[1][2], "bar"); assert.equal(json[2][3], "qux"); assert.doesNotThrow(function() { seeker(json, [1,2,3], "sheetjs"); }); @@ -1099,7 +1100,7 @@ describe('json output', function() { it('should create array of arrays if header == 1', function() { var json = X.utils.sheet_to_json(ws, {header:1}); assert.equal(json.length, data.length); - assert.equal(json[1][0], true); + assert.equal(json[1][0], "TRUE"); assert.equal(json[2][1], "bar"); assert.equal(json[3][2], "qux"); assert.doesNotThrow(function() { seeker(json, [0,1,2], "sheetjs"); }); @@ -1109,7 +1110,7 @@ describe('json output', function() { it('should use column names if header == "A"', function() { var json = X.utils.sheet_to_json(ws, {header:'A'}); assert.equal(json.length, data.length); - assert.equal(json[1].A, true); + assert.equal(json[1].A, "TRUE"); assert.equal(json[2].B, "bar"); assert.equal(json[3].C, "qux"); assert.doesNotThrow(function() { seeker(json, "ABC", "sheetjs"); }); @@ -1119,7 +1120,7 @@ describe('json output', function() { it('should use column labels if specified', function() { var json = X.utils.sheet_to_json(ws, {header:["O","D","I","N"]}); assert.equal(json.length, data.length); - assert.equal(json[1].O, true); + assert.equal(json[1].O, "TRUE"); assert.equal(json[2].D, "bar"); assert.equal(json[3].I, "qux"); assert.doesNotThrow(function() { seeker(json, "ODI", "sheetjs"); }); @@ -1130,7 +1131,7 @@ describe('json output', function() { it('should accept custom ' + w[0] + ' range', function() { var json = X.utils.sheet_to_json(ws, {header:1, range:w[1]}); assert.equal(json.length, 3); - assert.equal(json[0][0], true); + assert.equal(json[0][0], "TRUE"); assert.equal(json[1][1], "bar"); assert.equal(json[2][2], "qux"); assert.doesNotThrow(function() { seeker(json, [0,1,2], "sheetjs"); }); @@ -1152,6 +1153,16 @@ describe('json output', function() { assert.equal(json[i].S_1, 7 + i); } }); + it('should handle raw data if requested', function() { + var _ws = sheet_from_array_of_arrays(data, {cellDates:true}); + var json = X.utils.sheet_to_json(_ws, {header:1, raw:true}); + console.log(json, typeof json[2][2]); + assert.equal(json.length, data.length); + assert.equal(json[1][0], true); + assert.equal(json[2][1], "bar"); + assert.equal(json[2][2].getTime(), new Date("2014-02-19T14:30Z").getTime()); + assert.equal(json[3][2], "qux"); + }); }); describe('js -> file -> js', function() { diff --git a/xlsx.flow.js b/xlsx.flow.js index f7d1455..5f783c4 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -1516,7 +1516,7 @@ var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g; var tagregex=/<[^>]*>/g; var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/; function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { - var z = ([]/*:any*/); + var z = ({}/*:any*/); var eq = 0, c = 0; for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break; if(!skip_root) z[0] = tag.substr(0, eq); @@ -13860,14 +13860,16 @@ function write_zip(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ { zip.file('xl/_rels/workbook.' + wbext + '.rels', write_rels(opts.wbrels)); return zip; } -function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:number*/ { +function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array*/ { + var x = ""; switch((o||{}).type || "base64") { - case 'buffer': return f[0]; - case 'base64': return Base64.decode(f.substr(0,12)).charCodeAt(0); - case 'binary': return f.charCodeAt(0); - case 'array': return f[0]; + case 'buffer': return [f[0], f[1], f[2], f[3]]; + case 'base64': x = Base64.decode(f.substr(0,24)); break; + case 'binary': x = f; break; + case 'array': return [f[0], f[1], f[2], f[3]]; default: throw new Error("Unrecognized type " + (o ? o.type : "undefined")); } + return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)]; } function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { @@ -13885,18 +13887,19 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { } function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { - var zip, d = data, n=0; + var zip, d = data, n=[0]; var o = opts||{}; if(!o.type) o.type = (has_buf && Buffer.isBuffer(data)) ? "buffer" : "base64"; if(o.type == "file") { o.type = "buffer"; d = _fs.readFileSync(data); } - switch((n = firstbyte(d, o))) { + switch((n = firstbyte(d, o))[0]) { case 0xD0: return parse_xlscfb(CFB.read(d, o), o); case 0x09: return parse_xlscfb(s2a(o.type === 'base64' ? Base64.decode(d) : d), o); case 0x3C: return parse_xlml(d, o); - case 0x50: return read_zip(d, o); + case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break; case 0xEF: return parse_xlml(d, o); - default: throw new Error("Unsupported file " + n); + default: throw new Error("Unsupported file " + n.join("|")); } + throw new Error("Unsupported file format " + n.join("|")); } function readFileSync(filename/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ { @@ -14044,15 +14047,16 @@ function safe_decode_range(range/*:string*/)/*:Range*/ { } function safe_format_cell(cell/*:Cell*/, v/*:any*/) { - if(cell.z !== undefined) try { return (cell.w = SSF.format(cell.z, v)); } catch(e) { } - if(!cell.XF) return v; - try { return (cell.w = SSF.format(cell.XF.ifmt||0, v)); } catch(e) { return ''+v; } + var q = (cell.t == 'd' && v instanceof Date); + if(cell.z != null) try { return (cell.w = SSF.format(cell.z, q ? datenum(v) : v)); } catch(e) { } + try { return (cell.w = SSF.format((cell.XF||{}).ifmt||(q ? 14 : 0), q ? datenum(v) : v)); } catch(e) { return ''+v; } } -function format_cell(cell/*:Cell*/, v/*:any*/) { +function format_cell(cell/*:Cell*/, v/*:any*/, o/*:any*/) { if(cell == null || cell.t == null || cell.t == 'z') return ""; if(cell.w !== undefined) return cell.w; - if(v === undefined) return safe_format_cell(cell, cell.v); + if(cell.t == 'd' && !cell.z && o && o.dateNF) cell.z = o.dateNF; + if(v == undefined) return safe_format_cell(cell, cell.v); return safe_format_cell(cell, v); } @@ -14130,6 +14134,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); + var endregex = new RegExp(FS+"+$"); var row = "", rr = "", cols = []; var i = 0, cc = 0, val; var R = 0, C = 0; @@ -14150,6 +14155,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { /* NOTE: Excel CSV does not support array formulae */ row += (C === r.s.c ? "" : FS) + txt; } + if(o.strip) row = row.replace(endregex,""); out += row + RS; } return out; diff --git a/xlsx.js b/xlsx.js index 668c870..08011e9 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1467,7 +1467,7 @@ var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g; var tagregex=/<[^>]*>/g; var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/; function parsexmltag(tag, skip_root) { - var z = ([]); + var z = ({}); var eq = 0, c = 0; for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break; if(!skip_root) z[0] = tag.substr(0, eq); @@ -13799,13 +13799,15 @@ f = "docProps/app.xml"; return zip; } function firstbyte(f,o) { + var x = ""; switch((o||{}).type || "base64") { - case 'buffer': return f[0]; - case 'base64': return Base64.decode(f.substr(0,12)).charCodeAt(0); - case 'binary': return f.charCodeAt(0); - case 'array': return f[0]; + case 'buffer': return [f[0], f[1], f[2], f[3]]; + case 'base64': x = Base64.decode(f.substr(0,24)); break; + case 'binary': x = f; break; + case 'array': return [f[0], f[1], f[2], f[3]]; default: throw new Error("Unrecognized type " + (o ? o.type : "undefined")); } + return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)]; } function read_zip(data, opts) { @@ -13822,18 +13824,19 @@ var zip, d = data; } function readSync(data, opts) { - var zip, d = data, n=0; + var zip, d = data, n=[0]; var o = opts||{}; if(!o.type) o.type = (has_buf && Buffer.isBuffer(data)) ? "buffer" : "base64"; if(o.type == "file") { o.type = "buffer"; d = _fs.readFileSync(data); } - switch((n = firstbyte(d, o))) { + switch((n = firstbyte(d, o))[0]) { case 0xD0: return parse_xlscfb(CFB.read(d, o), o); case 0x09: return parse_xlscfb(s2a(o.type === 'base64' ? Base64.decode(d) : d), o); case 0x3C: return parse_xlml(d, o); - case 0x50: return read_zip(d, o); + case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break; case 0xEF: return parse_xlml(d, o); - default: throw new Error("Unsupported file " + n); + default: throw new Error("Unsupported file " + n.join("|")); } + throw new Error("Unsupported file format " + n.join("|")); } function readFileSync(filename, opts) { @@ -13977,15 +13980,16 @@ function safe_decode_range(range) { } function safe_format_cell(cell, v) { - if(cell.z !== undefined) try { return (cell.w = SSF.format(cell.z, v)); } catch(e) { } - if(!cell.XF) return v; - try { return (cell.w = SSF.format(cell.XF.ifmt||0, v)); } catch(e) { return ''+v; } + var q = (cell.t == 'd' && v instanceof Date); + if(cell.z != null) try { return (cell.w = SSF.format(cell.z, q ? datenum(v) : v)); } catch(e) { } + try { return (cell.w = SSF.format((cell.XF||{}).ifmt||(q ? 14 : 0), q ? datenum(v) : v)); } catch(e) { return ''+v; } } -function format_cell(cell, v) { +function format_cell(cell, v, o) { if(cell == null || cell.t == null || cell.t == 'z') return ""; if(cell.w !== undefined) return cell.w; - if(v === undefined) return safe_format_cell(cell, cell.v); + if(cell.t == 'd' && !cell.z && o && o.dateNF) cell.z = o.dateNF; + if(v == undefined) return safe_format_cell(cell, cell.v); return safe_format_cell(cell, v); } @@ -14063,6 +14067,7 @@ function sheet_to_csv(sheet, opts) { var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); + var endregex = new RegExp(FS+"+$"); var row = "", rr = "", cols = []; var i = 0, cc = 0, val; var R = 0, C = 0; @@ -14083,6 +14088,7 @@ function sheet_to_csv(sheet, opts) { /* NOTE: Excel CSV does not support array formulae */ row += (C === r.s.c ? "" : FS) + txt; } + if(o.strip) row = row.replace(endregex,""); out += row + RS; } return out;