utility improvements

- sheet_to_csv strip option (fixes  h/t @davidworkman9)
- sheet_to_json dateNF option (fixes  h/t @rotemtam)
- file type detection expanded to 4 byte magic number
This commit is contained in:
SheetJS 2017-03-22 03:50:11 -04:00
parent c7010eec35
commit a846f7184d
10 changed files with 93 additions and 56 deletions

@ -677,7 +677,7 @@ The exported `read` and `readFile` functions accept an options argument:
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| cellStyles | false | Save style/theme info to the .s field |
| cellDates | false | Store dates as type `d` (default is `n`) ** |
| cellDates | false | Store dates as type `d` (default is `n`) |
| sheetStubs | false | Create cell objects of type `z` for stub cells |
| sheetRows | 0 | If >0, read the first `sheetRows` rows ** |
| bookDeps | false | If true, parse calculation chains |
@ -701,7 +701,6 @@ The exported `read` and `readFile` functions accept an options argument:
- `sheetRows-1` rows will be generated when looking at the JSON object output
(since the header row is counted as a row when parsing the data)
- `bookVBA` merely exposes the raw vba object. It does not parse the data.
- `cellDates` currently does not convert numerical dates to JS dates.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read
@ -826,6 +825,10 @@ produces CSV output. The function takes an options argument:
| :---------- | :------: | :-------------------------------------------------- |
| FS | `","` | "Field Separator" delimiter between fields |
| RS | `"\n"` | "Record Separator" delimiter between rows |
| dateNF | fmt 14 | Use specified date format in string output |
| strip | false | Remove trailing field separators in each record ** |
- `strip` will remove trailing commas from each line under default `FS/RS`
For the example sheet:
@ -852,6 +855,7 @@ generate different types of JS objects. The function takes an options argument:
| raw | `false` | Use raw values (true) or formatted strings (false) |
| range | from WS | Override Range (see table below) |
| header | | Control output format (see table below) |
| dateNF | fmt 14 | Use specified date format in string output |
- `raw` only affects cells which have a format code (`.z`) field or a formatted
text (`.w`) field.

@ -2,7 +2,7 @@ var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g;
var tagregex=/<[^>]*>/g;
var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/;
function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ {
var z = ([]/*:any*/);
var z = ({}/*:any*/);
var eq = 0, c = 0;
for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break;
if(!skip_root) z[0] = tag.substr(0, eq);

@ -1,11 +1,13 @@
function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:number*/ {
function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array<number>*/ {
var x = "";
switch((o||{}).type || "base64") {
case 'buffer': return f[0];
case 'base64': return Base64.decode(f.substr(0,12)).charCodeAt(0);
case 'binary': return f.charCodeAt(0);
case 'array': return f[0];
case 'buffer': return [f[0], f[1], f[2], f[3]];
case 'base64': x = Base64.decode(f.substr(0,24)); break;
case 'binary': x = f; break;
case 'array': return [f[0], f[1], f[2], f[3]];
default: throw new Error("Unrecognized type " + (o ? o.type : "undefined"));
}
return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)];
}
function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
@ -23,18 +25,19 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
}
function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
var zip, d = data, n=0;
var zip, d = data, n=[0];
var o = opts||{};
if(!o.type) o.type = (has_buf && Buffer.isBuffer(data)) ? "buffer" : "base64";
if(o.type == "file") { o.type = "buffer"; d = _fs.readFileSync(data); }
switch((n = firstbyte(d, o))) {
switch((n = firstbyte(d, o))[0]) {
case 0xD0: return parse_xlscfb(CFB.read(d, o), o);
case 0x09: return parse_xlscfb(s2a(o.type === 'base64' ? Base64.decode(d) : d), o);
case 0x3C: return parse_xlml(d, o);
case 0x50: return read_zip(d, o);
case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break;
case 0xEF: return parse_xlml(d, o);
default: throw new Error("Unsupported file " + n);
default: throw new Error("Unsupported file " + n.join("|"));
}
throw new Error("Unsupported file format " + n.join("|"));
}
function readFileSync(filename/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ {

@ -60,15 +60,16 @@ function safe_decode_range(range/*:string*/)/*:Range*/ {
}
function safe_format_cell(cell/*:Cell*/, v/*:any*/) {
if(cell.z !== undefined) try { return (cell.w = SSF.format(cell.z, v)); } catch(e) { }
if(!cell.XF) return v;
try { return (cell.w = SSF.format(cell.XF.ifmt||0, v)); } catch(e) { return ''+v; }
var q = (cell.t == 'd' && v instanceof Date);
if(cell.z != null) try { return (cell.w = SSF.format(cell.z, q ? datenum(v) : v)); } catch(e) { }
try { return (cell.w = SSF.format((cell.XF||{}).ifmt||(q ? 14 : 0), q ? datenum(v) : v)); } catch(e) { return ''+v; }
}
function format_cell(cell/*:Cell*/, v/*:any*/) {
function format_cell(cell/*:Cell*/, v/*:any*/, o/*:any*/) {
if(cell == null || cell.t == null || cell.t == 'z') return "";
if(cell.w !== undefined) return cell.w;
if(v === undefined) return safe_format_cell(cell, cell.v);
if(cell.t == 'd' && !cell.z && o && o.dateNF) cell.z = o.dateNF;
if(v == undefined) return safe_format_cell(cell, cell.v);
return safe_format_cell(cell, v);
}
@ -146,6 +147,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
var r = safe_decode_range(sheet["!ref"]);
var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0);
var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0);
var endregex = new RegExp(FS+"+$");
var row = "", rr = "", cols = [];
var i = 0, cc = 0, val;
var R = 0, C = 0;
@ -166,6 +168,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
/* NOTE: Excel CSV does not support array formulae */
row += (C === r.s.c ? "" : FS) + txt;
}
if(o.strip) row = row.replace(endregex,"");
out += row + RS;
}
return out;

@ -9,7 +9,7 @@ The exported `read` and `readFile` functions accept an options argument:
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| cellStyles | false | Save style/theme info to the .s field |
| cellDates | false | Store dates as type `d` (default is `n`) ** |
| cellDates | false | Store dates as type `d` (default is `n`) |
| sheetStubs | false | Create cell objects of type `z` for stub cells |
| sheetRows | 0 | If >0, read the first `sheetRows` rows ** |
| bookDeps | false | If true, parse calculation chains |
@ -33,7 +33,6 @@ The exported `read` and `readFile` functions accept an options argument:
- `sheetRows-1` rows will be generated when looking at the JSON object output
(since the header row is counted as a row when parsing the data)
- `bookVBA` merely exposes the raw vba object. It does not parse the data.
- `cellDates` currently does not convert numerical dates to JS dates.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read

@ -34,6 +34,10 @@ produces CSV output. The function takes an options argument:
| :---------- | :------: | :-------------------------------------------------- |
| FS | `","` | "Field Separator" delimiter between fields |
| RS | `"\n"` | "Record Separator" delimiter between rows |
| dateNF | fmt 14 | Use specified date format in string output |
| strip | false | Remove trailing field separators in each record ** |
- `strip` will remove trailing commas from each line under default `FS/RS`
For the example sheet:
@ -60,6 +64,7 @@ generate different types of JS objects. The function takes an options argument:
| raw | `false` | Use raw values (true) or formatted strings (false) |
| range | from WS | Override Range (see table below) |
| header | | Control output format (see table below) |
| dateNF | fmt 14 | Use specified date format in string output |
- `raw` only affects cells which have a format code (`.z`) field or a formatted
text (`.w`) field.

@ -1,6 +1,6 @@
## Badges
[![Build Status](https://saucelabs.com/browser-matrix/xlsx.svg)](https://saucelabs.com/u/xlsx)
[![Build Status](https://saucelabs.com/browser-matrix/sheetjs.svg)](https://saucelabs.com/u/sheetjs)
[![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx)

25
test.js

@ -1056,8 +1056,9 @@ function sheet_from_array_of_arrays(data, opts) {
if(typeof cell.v === 'number') cell.t = 'n';
else if(typeof cell.v === 'boolean') cell.t = 'b';
else if(cell.v instanceof Date) {
cell.t = 'n'; cell.z = X.SSF._table[14];
cell.v = datenum(cell.v);
cell.z = X.SSF._table[14];
if(opts && opts.cellDates) cell.t = 'd';
else { cell.t = 'n'; cell.v = datenum(cell.v); }
}
else cell.t = 's';
ws[cell_ref] = cell;
@ -1090,7 +1091,7 @@ describe('json output', function() {
it('should use first-row headers and full sheet by default', function() {
var json = X.utils.sheet_to_json(ws);
assert.equal(json.length, data.length - 1);
assert.equal(json[0][1], true);
assert.equal(json[0][1], "TRUE");
assert.equal(json[1][2], "bar");
assert.equal(json[2][3], "qux");
assert.doesNotThrow(function() { seeker(json, [1,2,3], "sheetjs"); });
@ -1099,7 +1100,7 @@ describe('json output', function() {
it('should create array of arrays if header == 1', function() {
var json = X.utils.sheet_to_json(ws, {header:1});
assert.equal(json.length, data.length);
assert.equal(json[1][0], true);
assert.equal(json[1][0], "TRUE");
assert.equal(json[2][1], "bar");
assert.equal(json[3][2], "qux");
assert.doesNotThrow(function() { seeker(json, [0,1,2], "sheetjs"); });
@ -1109,7 +1110,7 @@ describe('json output', function() {
it('should use column names if header == "A"', function() {
var json = X.utils.sheet_to_json(ws, {header:'A'});
assert.equal(json.length, data.length);
assert.equal(json[1].A, true);
assert.equal(json[1].A, "TRUE");
assert.equal(json[2].B, "bar");
assert.equal(json[3].C, "qux");
assert.doesNotThrow(function() { seeker(json, "ABC", "sheetjs"); });
@ -1119,7 +1120,7 @@ describe('json output', function() {
it('should use column labels if specified', function() {
var json = X.utils.sheet_to_json(ws, {header:["O","D","I","N"]});
assert.equal(json.length, data.length);
assert.equal(json[1].O, true);
assert.equal(json[1].O, "TRUE");
assert.equal(json[2].D, "bar");
assert.equal(json[3].I, "qux");
assert.doesNotThrow(function() { seeker(json, "ODI", "sheetjs"); });
@ -1130,7 +1131,7 @@ describe('json output', function() {
it('should accept custom ' + w[0] + ' range', function() {
var json = X.utils.sheet_to_json(ws, {header:1, range:w[1]});
assert.equal(json.length, 3);
assert.equal(json[0][0], true);
assert.equal(json[0][0], "TRUE");
assert.equal(json[1][1], "bar");
assert.equal(json[2][2], "qux");
assert.doesNotThrow(function() { seeker(json, [0,1,2], "sheetjs"); });
@ -1152,6 +1153,16 @@ describe('json output', function() {
assert.equal(json[i].S_1, 7 + i);
}
});
it('should handle raw data if requested', function() {
var _ws = sheet_from_array_of_arrays(data, {cellDates:true});
var json = X.utils.sheet_to_json(_ws, {header:1, raw:true});
console.log(json, typeof json[2][2]);
assert.equal(json.length, data.length);
assert.equal(json[1][0], true);
assert.equal(json[2][1], "bar");
assert.equal(json[2][2].getTime(), new Date("2014-02-19T14:30Z").getTime());
assert.equal(json[3][2], "qux");
});
});
describe('js -> file -> js', function() {

@ -1516,7 +1516,7 @@ var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g;
var tagregex=/<[^>]*>/g;
var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/;
function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ {
var z = ([]/*:any*/);
var z = ({}/*:any*/);
var eq = 0, c = 0;
for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break;
if(!skip_root) z[0] = tag.substr(0, eq);
@ -13860,14 +13860,16 @@ function write_zip(wb/*:Workbook*/, opts/*:WriteOpts*/)/*:ZIP*/ {
zip.file('xl/_rels/workbook.' + wbext + '.rels', write_rels(opts.wbrels));
return zip;
}
function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:number*/ {
function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array<number>*/ {
var x = "";
switch((o||{}).type || "base64") {
case 'buffer': return f[0];
case 'base64': return Base64.decode(f.substr(0,12)).charCodeAt(0);
case 'binary': return f.charCodeAt(0);
case 'array': return f[0];
case 'buffer': return [f[0], f[1], f[2], f[3]];
case 'base64': x = Base64.decode(f.substr(0,24)); break;
case 'binary': x = f; break;
case 'array': return [f[0], f[1], f[2], f[3]];
default: throw new Error("Unrecognized type " + (o ? o.type : "undefined"));
}
return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)];
}
function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
@ -13885,18 +13887,19 @@ function read_zip(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
}
function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
var zip, d = data, n=0;
var zip, d = data, n=[0];
var o = opts||{};
if(!o.type) o.type = (has_buf && Buffer.isBuffer(data)) ? "buffer" : "base64";
if(o.type == "file") { o.type = "buffer"; d = _fs.readFileSync(data); }
switch((n = firstbyte(d, o))) {
switch((n = firstbyte(d, o))[0]) {
case 0xD0: return parse_xlscfb(CFB.read(d, o), o);
case 0x09: return parse_xlscfb(s2a(o.type === 'base64' ? Base64.decode(d) : d), o);
case 0x3C: return parse_xlml(d, o);
case 0x50: return read_zip(d, o);
case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break;
case 0xEF: return parse_xlml(d, o);
default: throw new Error("Unsupported file " + n);
default: throw new Error("Unsupported file " + n.join("|"));
}
throw new Error("Unsupported file format " + n.join("|"));
}
function readFileSync(filename/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
@ -14044,15 +14047,16 @@ function safe_decode_range(range/*:string*/)/*:Range*/ {
}
function safe_format_cell(cell/*:Cell*/, v/*:any*/) {
if(cell.z !== undefined) try { return (cell.w = SSF.format(cell.z, v)); } catch(e) { }
if(!cell.XF) return v;
try { return (cell.w = SSF.format(cell.XF.ifmt||0, v)); } catch(e) { return ''+v; }
var q = (cell.t == 'd' && v instanceof Date);
if(cell.z != null) try { return (cell.w = SSF.format(cell.z, q ? datenum(v) : v)); } catch(e) { }
try { return (cell.w = SSF.format((cell.XF||{}).ifmt||(q ? 14 : 0), q ? datenum(v) : v)); } catch(e) { return ''+v; }
}
function format_cell(cell/*:Cell*/, v/*:any*/) {
function format_cell(cell/*:Cell*/, v/*:any*/, o/*:any*/) {
if(cell == null || cell.t == null || cell.t == 'z') return "";
if(cell.w !== undefined) return cell.w;
if(v === undefined) return safe_format_cell(cell, cell.v);
if(cell.t == 'd' && !cell.z && o && o.dateNF) cell.z = o.dateNF;
if(v == undefined) return safe_format_cell(cell, cell.v);
return safe_format_cell(cell, v);
}
@ -14130,6 +14134,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
var r = safe_decode_range(sheet["!ref"]);
var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0);
var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0);
var endregex = new RegExp(FS+"+$");
var row = "", rr = "", cols = [];
var i = 0, cc = 0, val;
var R = 0, C = 0;
@ -14150,6 +14155,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
/* NOTE: Excel CSV does not support array formulae */
row += (C === r.s.c ? "" : FS) + txt;
}
if(o.strip) row = row.replace(endregex,"");
out += row + RS;
}
return out;

34
xlsx.js

@ -1467,7 +1467,7 @@ var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g;
var tagregex=/<[^>]*>/g;
var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/;
function parsexmltag(tag, skip_root) {
var z = ([]);
var z = ({});
var eq = 0, c = 0;
for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break;
if(!skip_root) z[0] = tag.substr(0, eq);
@ -13799,13 +13799,15 @@ f = "docProps/app.xml";
return zip;
}
function firstbyte(f,o) {
var x = "";
switch((o||{}).type || "base64") {
case 'buffer': return f[0];
case 'base64': return Base64.decode(f.substr(0,12)).charCodeAt(0);
case 'binary': return f.charCodeAt(0);
case 'array': return f[0];
case 'buffer': return [f[0], f[1], f[2], f[3]];
case 'base64': x = Base64.decode(f.substr(0,24)); break;
case 'binary': x = f; break;
case 'array': return [f[0], f[1], f[2], f[3]];
default: throw new Error("Unrecognized type " + (o ? o.type : "undefined"));
}
return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)];
}
function read_zip(data, opts) {
@ -13822,18 +13824,19 @@ var zip, d = data;
}
function readSync(data, opts) {
var zip, d = data, n=0;
var zip, d = data, n=[0];
var o = opts||{};
if(!o.type) o.type = (has_buf && Buffer.isBuffer(data)) ? "buffer" : "base64";
if(o.type == "file") { o.type = "buffer"; d = _fs.readFileSync(data); }
switch((n = firstbyte(d, o))) {
switch((n = firstbyte(d, o))[0]) {
case 0xD0: return parse_xlscfb(CFB.read(d, o), o);
case 0x09: return parse_xlscfb(s2a(o.type === 'base64' ? Base64.decode(d) : d), o);
case 0x3C: return parse_xlml(d, o);
case 0x50: return read_zip(d, o);
case 0x50: if(n[1] == 0x4B && n[2] < 0x20 && n[3] < 0x20) return read_zip(d, o); break;
case 0xEF: return parse_xlml(d, o);
default: throw new Error("Unsupported file " + n);
default: throw new Error("Unsupported file " + n.join("|"));
}
throw new Error("Unsupported file format " + n.join("|"));
}
function readFileSync(filename, opts) {
@ -13977,15 +13980,16 @@ function safe_decode_range(range) {
}
function safe_format_cell(cell, v) {
if(cell.z !== undefined) try { return (cell.w = SSF.format(cell.z, v)); } catch(e) { }
if(!cell.XF) return v;
try { return (cell.w = SSF.format(cell.XF.ifmt||0, v)); } catch(e) { return ''+v; }
var q = (cell.t == 'd' && v instanceof Date);
if(cell.z != null) try { return (cell.w = SSF.format(cell.z, q ? datenum(v) : v)); } catch(e) { }
try { return (cell.w = SSF.format((cell.XF||{}).ifmt||(q ? 14 : 0), q ? datenum(v) : v)); } catch(e) { return ''+v; }
}
function format_cell(cell, v) {
function format_cell(cell, v, o) {
if(cell == null || cell.t == null || cell.t == 'z') return "";
if(cell.w !== undefined) return cell.w;
if(v === undefined) return safe_format_cell(cell, cell.v);
if(cell.t == 'd' && !cell.z && o && o.dateNF) cell.z = o.dateNF;
if(v == undefined) return safe_format_cell(cell, cell.v);
return safe_format_cell(cell, v);
}
@ -14063,6 +14067,7 @@ function sheet_to_csv(sheet, opts) {
var r = safe_decode_range(sheet["!ref"]);
var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0);
var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0);
var endregex = new RegExp(FS+"+$");
var row = "", rr = "", cols = [];
var i = 0, cc = 0, val;
var R = 0, C = 0;
@ -14083,6 +14088,7 @@ function sheet_to_csv(sheet, opts) {
/* NOTE: Excel CSV does not support array formulae */
row += (C === r.s.c ? "" : FS) + txt;
}
if(o.strip) row = row.replace(endregex,"");
out += row + RS;
}
return out;