version bump 0.5.5: sheet names optimization

- bookSheets option skips worksheet parsing (used by xlsx2csv)
- filtered tests (by file extension)
- XLSB formulae stubs
This commit is contained in:
SheetJS 2014-02-13 01:22:42 -05:00
parent 7e9f218f0f
commit 1dceeffbe9
14 changed files with 184 additions and 62 deletions

@ -1,3 +1,4 @@
test_files/
tests/files/
index.html
misc/coverage.html

@ -1,5 +1,6 @@
DEPS=$(wildcard bits/*.js)
TARGET=xlsx.js
FMT=xlsx xlsm xlsb
$(TARGET): $(DEPS)
cat $^ > $@
@ -23,6 +24,11 @@ init:
test mocha:
mocha -R spec
TESTFMT=$(patsubst %,test_%,$(FMT))
.PHONY: $(TESTFMT)
$(TESTFMT): test_%:
FMTS=$* make test
.PHONY: jasmine
jasmine:
npm run-script test-jasmine

@ -79,10 +79,12 @@ The exported `read` and `readFile` functions accept an options argument:
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| sheetStubs | false | Create cell objects for stub cells |
| bookSheets | false | If true, only parse enough to get the sheet names |
- `cellFormula` only applies to constructing XLSB formulae. XLSX/XLSM formulae
are stored in plaintext, but XLSB formulae are stored in a binary format.
- Even if `cellNF` is false, formatted text (.w) will be generated
- In some cases, sheets may be parsed even if `bookSheets` is false.
The defaults are enumerated in bits/84_defaults.js
@ -138,6 +140,7 @@ OSP-covered specifications:
- [MS-XLSX]: Excel (.xlsx) Extensions to the Office Open XML SpreadsheetML File Format
- [MS-XLSB]: Excel (.xlsb) Binary File Format
- [MS-OE376]: Office Implementation Information for ECMA-376 Standards Support
## Badges

@ -46,10 +46,13 @@ if(!fs.existsSync(filename)) {
if(program.dev) X.verbose = 2;
var opts = {};
if(program.listSheets) opts.bookSheets = true;
var wb;
if(program.dev) wb = X.readFile(filename);
if(program.dev) wb = X.readFile(filename, opts);
else try {
wb = X.readFile(filename);
wb = X.readFile(filename, opts);
} catch(e) {
var msg = (program.quiet) ? "" : n + "2csv: error parsing ";
msg += filename + ": " + e;

@ -1 +1 @@
XLSX.version = '0.5.4';
XLSX.version = '0.5.5';

@ -1,5 +1,5 @@
/* [MS-XLSB] 2.1.4 Record */
var recordhopper = function(data, cb) {
var recordhopper = function(data, cb, opts) {
var tmpbyte, cntbyte, length;
prep_blob(data, data.l || 0);
while(data.l < data.length) {
@ -9,7 +9,7 @@ var recordhopper = function(data, cb) {
tmpbyte = data.read_shift(1);
length = tmpbyte & 0x7F;
for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte);
var d = R.f(data, length);
var d = R.f(data, length, opts);
if(cb(d, R, RT)) return;
}
};

5
bits/70_fbin.js Normal file

@ -0,0 +1,5 @@
/* [MS-XLSB] 2.5.97.4 CellParsedFormula TODO: use similar logic to js-xls */
var parse_CellParsedFormula = function(data, length) {
var cce = data.read_shift(4);
return parsenoop(data, length-4);
};

@ -1,3 +1,4 @@
/* [MS-XLSB] 2.4.718 BrtRowHdr */
var parse_BrtRowHdr = function(data, length) {
var z = {};
@ -18,6 +19,9 @@ var parse_BrtWsProp = function(data, length) {
return z;
};
/* [MS-XLSB] 2.4.303 BrtCellBlank */
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.4.304 BrtCellBool */
var parse_BrtCellBool = function(data, length) {
var cell = parse_Cell(data);
@ -53,7 +57,7 @@ var parse_BrtCellRk = function(data, length) {
return [cell, value, 'n'];
};
/* [MS-XLSB] 2.4.311 BrtCellRk */
/* [MS-XLSB] 2.4.314 BrtCellSt */
var parse_BrtCellSt = function(data, length) {
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
@ -61,40 +65,58 @@ var parse_BrtCellSt = function(data, length) {
};
/* [MS-XLSB] 2.4.647 BrtFmlaBool */
var parse_BrtFmlaBool = function(data, length) {
var parse_BrtFmlaBool = function(data, length, opts) {
var cell = parse_Cell(data);
var value = data.read_shift(1);
data.l += length-9;
return [cell, value, 'b' /*, formula */];
var o = [cell, value, 'b'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.648 BrtFmlaError */
var parse_BrtFmlaError = function(data, length) {
var parse_BrtFmlaError = function(data, length, opts) {
var cell = parse_Cell(data);
var fBool = data.read_shift(1);
data.l += length-9;
return [cell, fBool, 'e'];
var value = data.read_shift(1);
var o = [cell, value, 'e'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.649 BrtFmlaNum */
var parse_BrtFmlaNum = function(data, length) {
var parse_BrtFmlaNum = function(data, length, opts) {
var cell = parse_Cell(data);
var value = parse_Xnum(data);
data.l += length-16;
return [cell, value, 'n' /*, formula */];
var o = [cell, value, 'n'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length - 16);
o[3] = ""; /* TODO */
}
else data.l += length-16;
return o;
};
/* [MS-XLSB] 2.4.650 BrtFmlaString */
var parse_BrtFmlaString = function(data, length) {
var parse_BrtFmlaString = function(data, length, opts) {
var start = data.l;
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
data.l = start + length;
return [cell, value, 'str' /*, formula */];
var o = [cell, value, 'str'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, start + length - data.l);
o[3] = ""; /* TODO */
}
else data.l = start + length;
return o;
};
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.1.7.61 Worksheet */
var parse_ws_bin = function(data, opts) {
if(!data) return data;
@ -126,7 +148,7 @@ var parse_ws_bin = function(data, opts) {
case 'e': p.raw = val[1]; p.v = BErr[p.raw]; break;
case 'str': p.v = utf8read(val[1]); break;
}
if(val[3] && opts.cellFormula) p.f = val[3];
if(opts.cellFormula && val.length > 3) p.f = val[3];
if((cf = styles.CellXf[val[0].iStyleRef])) try {
p.w = SSF.format(cf.ifmt,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
@ -136,7 +158,6 @@ var parse_ws_bin = function(data, opts) {
case 'BrtCellBlank': break; // (blank cell)
case 'BrtFmt': break; // TODO
case 'BrtArrFmla': break; // TODO
case 'BrtShrFmla': break; // TODO
case 'BrtBeginSheet': break;
@ -163,9 +184,13 @@ var parse_ws_bin = function(data, opts) {
case 'BrtFRTBegin': pass = true; break;
case 'BrtFRTEnd': pass = false; break;
case 'BrtEndSheet': break; // TODO
case 'BrtBeginMergeCells': break; // TODO
case 'BrtMergeCell': break; // TODO
case 'BrtEndMergeCells': break; // TODO
case 'BrtLegacyDrawing': break; // TODO
//default: if(!pass) throw new Error("Unexpected record " + R.n);
}
});
}, opts);
s["!ref"] = encode_range(ref);
return s;
};

@ -6,6 +6,8 @@ function fixopts(opts) {
['sheetStubs', false], /* emit empty cells */
['bookSheets', false], /* only try to get sheet names (no Sheets) */
['WTF', false] /* WTF mode (do not use) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });

@ -12,19 +12,29 @@ function parseZip(zip, opts) {
dir.workbooks.push(binname);
xlsb = true;
}
if(!opts.bookSheets) {
strs = {};
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts);
styles = {};
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
}
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0], opts);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
if(opts.bookSheets) {
if(props.Worksheets && props.SheetNames.length > 0) return { SheetNames:props.SheetNames };
else if(wb.Sheets) return { SheetNames:wb.Sheets.map(function(x) { return x.name; }) };
}
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.5.4",
"version": "0.5.5",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],

25
test.js

@ -4,6 +4,8 @@ var fs = require('fs'), assert = require('assert');
describe('source',function(){ it('should load', function(){ XLSX = require('./'); });});
var ex = [".xlsb", ".xlsm", ".xlsx"];
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
console.log(ex, process.env.FMTS);
var exp = ex.map(function(x){ return x + ".pending"; });
function test_file(x){return ex.indexOf(x.substr(-5))>=0||exp.indexOf(x.substr(-13))>=0;}
@ -106,4 +108,27 @@ describe('options', function() {
assert(typeof ws[addr].h === 'undefined');
});
});
it('should generate formulae by default', function() {
var wb = XLSX.readFile('./test_files/formula_stress_test.xlsb');
var found = false;
wb.SheetNames.forEach(function(s) {
var ws = wb.Sheets[s];
console.log(ws);
Object.keys(ws).forEach(function(addr) {
if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return;
if(typeof ws[addr].f !== 'undefined') return found = true;
});
});
assert(found);
});
it('should not generate formulae when requested', function() {
var wb = XLSX.readFile('./test_files/formula_stress_test.xlsb', {cellFormula: false});
wb.SheetNames.forEach(function(s) {
var ws = wb.Sheets[s];
Object.keys(ws).forEach(function(addr) {
if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return;
assert(typeof ws[addr].f === 'undefined');
});
});
});
});

@ -1 +1 @@
Subproject commit 709d865dc7f9d7173bc4b5f5c6b0c5aea945589c
Subproject commit 5a2df78bfe58c087fc604450185ee3bc4fb2c077

88
xlsx.js

@ -424,7 +424,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
make_ssf(SSF);
var XLSX = {};
(function(XLSX){
XLSX.version = '0.5.4';
XLSX.version = '0.5.5';
var current_codepage, current_cptable, cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
@ -621,7 +621,7 @@ function prep_blob(blob, pos) {
function parsenoop(blob, length) { blob.l += length; }
/* [MS-XLSB] 2.1.4 Record */
var recordhopper = function(data, cb) {
var recordhopper = function(data, cb, opts) {
var tmpbyte, cntbyte, length;
prep_blob(data, data.l || 0);
while(data.l < data.length) {
@ -631,7 +631,7 @@ var recordhopper = function(data, cb) {
tmpbyte = data.read_shift(1);
length = tmpbyte & 0x7F;
for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte);
var d = R.f(data, length);
var d = R.f(data, length, opts);
if(cb(d, R, RT)) return;
}
};
@ -1235,6 +1235,11 @@ function insertCommentsIntoSheet(sheetName, sheet, comments) {
});
}
/* [MS-XLSB] 2.5.97.4 CellParsedFormula TODO: use similar logic to js-xls */
var parse_CellParsedFormula = function(data, length) {
var cce = data.read_shift(4);
return parsenoop(data, length-4);
};
var strs = {}; // shared strings
var _ssfopts = {}; // spreadsheet formatting options
@ -1325,6 +1330,7 @@ function parse_ws_xml(data, opts) {
return s;
}
/* [MS-XLSB] 2.4.718 BrtRowHdr */
var parse_BrtRowHdr = function(data, length) {
var z = {};
@ -1345,6 +1351,9 @@ var parse_BrtWsProp = function(data, length) {
return z;
};
/* [MS-XLSB] 2.4.303 BrtCellBlank */
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.4.304 BrtCellBool */
var parse_BrtCellBool = function(data, length) {
var cell = parse_Cell(data);
@ -1380,7 +1389,7 @@ var parse_BrtCellRk = function(data, length) {
return [cell, value, 'n'];
};
/* [MS-XLSB] 2.4.311 BrtCellRk */
/* [MS-XLSB] 2.4.314 BrtCellSt */
var parse_BrtCellSt = function(data, length) {
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
@ -1388,40 +1397,58 @@ var parse_BrtCellSt = function(data, length) {
};
/* [MS-XLSB] 2.4.647 BrtFmlaBool */
var parse_BrtFmlaBool = function(data, length) {
var parse_BrtFmlaBool = function(data, length, opts) {
var cell = parse_Cell(data);
var value = data.read_shift(1);
data.l += length-9;
return [cell, value, 'b' /*, formula */];
var o = [cell, value, 'b'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.648 BrtFmlaError */
var parse_BrtFmlaError = function(data, length) {
var parse_BrtFmlaError = function(data, length, opts) {
var cell = parse_Cell(data);
var fBool = data.read_shift(1);
data.l += length-9;
return [cell, fBool, 'e'];
var value = data.read_shift(1);
var o = [cell, value, 'e'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.649 BrtFmlaNum */
var parse_BrtFmlaNum = function(data, length) {
var parse_BrtFmlaNum = function(data, length, opts) {
var cell = parse_Cell(data);
var value = parse_Xnum(data);
data.l += length-16;
return [cell, value, 'n' /*, formula */];
var o = [cell, value, 'n'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length - 16);
o[3] = ""; /* TODO */
}
else data.l += length-16;
return o;
};
/* [MS-XLSB] 2.4.650 BrtFmlaString */
var parse_BrtFmlaString = function(data, length) {
var parse_BrtFmlaString = function(data, length, opts) {
var start = data.l;
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
data.l = start + length;
return [cell, value, 'str' /*, formula */];
var o = [cell, value, 'str'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, start + length - data.l);
o[3] = ""; /* TODO */
}
else data.l = start + length;
return o;
};
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.1.7.61 Worksheet */
var parse_ws_bin = function(data, opts) {
if(!data) return data;
@ -1453,7 +1480,7 @@ var parse_ws_bin = function(data, opts) {
case 'e': p.raw = val[1]; p.v = BErr[p.raw]; break;
case 'str': p.v = utf8read(val[1]); break;
}
if(val[3] && opts.cellFormula) p.f = val[3];
if(opts.cellFormula && val.length > 3) p.f = val[3];
if((cf = styles.CellXf[val[0].iStyleRef])) try {
p.w = SSF.format(cf.ifmt,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
@ -1463,7 +1490,6 @@ var parse_ws_bin = function(data, opts) {
case 'BrtCellBlank': break; // (blank cell)
case 'BrtFmt': break; // TODO
case 'BrtArrFmla': break; // TODO
case 'BrtShrFmla': break; // TODO
case 'BrtBeginSheet': break;
@ -1490,9 +1516,13 @@ var parse_ws_bin = function(data, opts) {
case 'BrtFRTBegin': pass = true; break;
case 'BrtFRTEnd': pass = false; break;
case 'BrtEndSheet': break; // TODO
case 'BrtBeginMergeCells': break; // TODO
case 'BrtMergeCell': break; // TODO
case 'BrtEndMergeCells': break; // TODO
case 'BrtLegacyDrawing': break; // TODO
//default: if(!pass) throw new Error("Unexpected record " + R.n);
}
});
}, opts);
s["!ref"] = encode_range(ref);
return s;
};
@ -2593,6 +2623,8 @@ function fixopts(opts) {
['sheetStubs', false], /* emit empty cells */
['bookSheets', false], /* only try to get sheet names (no Sheets) */
['WTF', false] /* WTF mode (do not use) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
@ -2611,19 +2643,29 @@ function parseZip(zip, opts) {
dir.workbooks.push(binname);
xlsb = true;
}
if(!opts.bookSheets) {
strs = {};
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts);
styles = {};
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
}
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0], opts);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
if(opts.bookSheets) {
if(props.Worksheets && props.SheetNames.length > 0) return { SheetNames:props.SheetNames };
else if(wb.Sheets) return { SheetNames:wb.Sheets.map(function(x) { return x.name; }) };
}
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;