version bump 0.5.5: sheet names optimization

- bookSheets option skips worksheet parsing (used by xlsx2csv)
- filtered tests (by file extension)
- XLSB formulae stubs
This commit is contained in:
SheetJS 2014-02-13 01:22:42 -05:00
parent 7e9f218f0f
commit 1dceeffbe9
14 changed files with 184 additions and 62 deletions

@ -1,3 +1,4 @@
test_files/
tests/files/
index.html
misc/coverage.html

@ -1,5 +1,6 @@
DEPS=$(wildcard bits/*.js)
TARGET=xlsx.js
FMT=xlsx xlsm xlsb
$(TARGET): $(DEPS)
cat $^ > $@
@ -23,6 +24,11 @@ init:
test mocha:
mocha -R spec
TESTFMT=$(patsubst %,test_%,$(FMT))
.PHONY: $(TESTFMT)
$(TESTFMT): test_%:
FMTS=$* make test
.PHONY: jasmine
jasmine:
npm run-script test-jasmine

@ -49,7 +49,7 @@ For more details:
- `index.html` is the live demo
- `bits/90_utils.js` contains the logic for generating CSV and JSON from sheets
## Cell Object Description
## Cell Object Description
`.SheetNames` is an ordered list of the sheets in the workbook
@ -65,7 +65,7 @@ that does not start with `!` corresponds to a cell (using `A-1` notation).
- `.r` : the rich text encoding of a cell text (if applicable)
- `.h` : an HTML rendering of the rich text (if applicable)
- `.c` : comments associated with the cell
- `.z` : the number format string associated with the cell (if requested)
- `.z` : the number format string associated with the cell (if requested)
For dates, `.v` holds the raw date code from the sheet and `.w` holds the text
@ -79,12 +79,14 @@ The exported `read` and `readFile` functions accept an options argument:
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| sheetStubs | false | Create cell objects for stub cells |
| bookSheets | false | If true, only parse enough to get the sheet names |
- `cellFormula` only applies to constructing XLSB formulae. XLSX/XLSM formulae
are stored in plaintext, but XLSB formulae are stored in a binary format.
- Even if `cellNF` is false, formatted text (.w) will be generated
- In some cases, sheets may be parsed even if `bookSheets` is false.
The defaults are enumerated in bits/84_defaults.js
The defaults are enumerated in bits/84_defaults.js
## Tested Environments
@ -118,7 +120,7 @@ $ open -a Chromium.app http://localhost:8000/stress.html
## Contributing
Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md
Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md
## XLS Support
@ -138,6 +140,7 @@ OSP-covered specifications:
- [MS-XLSX]: Excel (.xlsx) Extensions to the Office Open XML SpreadsheetML File Format
- [MS-XLSB]: Excel (.xlsb) Binary File Format
- [MS-OE376]: Office Implementation Information for ECMA-376 Standards Support
## Badges

@ -46,10 +46,13 @@ if(!fs.existsSync(filename)) {
if(program.dev) X.verbose = 2;
var opts = {};
if(program.listSheets) opts.bookSheets = true;
var wb;
if(program.dev) wb = X.readFile(filename);
if(program.dev) wb = X.readFile(filename, opts);
else try {
wb = X.readFile(filename);
wb = X.readFile(filename, opts);
} catch(e) {
var msg = (program.quiet) ? "" : n + "2csv: error parsing ";
msg += filename + ": " + e;

@ -1 +1 @@
XLSX.version = '0.5.4';
XLSX.version = '0.5.5';

@ -1,5 +1,5 @@
/* [MS-XLSB] 2.1.4 Record */
var recordhopper = function(data, cb) {
var recordhopper = function(data, cb, opts) {
var tmpbyte, cntbyte, length;
prep_blob(data, data.l || 0);
while(data.l < data.length) {
@ -9,7 +9,7 @@ var recordhopper = function(data, cb) {
tmpbyte = data.read_shift(1);
length = tmpbyte & 0x7F;
for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte);
var d = R.f(data, length);
var d = R.f(data, length, opts);
if(cb(d, R, RT)) return;
}
};

5
bits/70_fbin.js Normal file

@ -0,0 +1,5 @@
/* [MS-XLSB] 2.5.97.4 CellParsedFormula TODO: use similar logic to js-xls */
var parse_CellParsedFormula = function(data, length) {
var cce = data.read_shift(4);
return parsenoop(data, length-4);
};

@ -1,3 +1,4 @@
/* [MS-XLSB] 2.4.718 BrtRowHdr */
var parse_BrtRowHdr = function(data, length) {
var z = {};
@ -18,6 +19,9 @@ var parse_BrtWsProp = function(data, length) {
return z;
};
/* [MS-XLSB] 2.4.303 BrtCellBlank */
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.4.304 BrtCellBool */
var parse_BrtCellBool = function(data, length) {
var cell = parse_Cell(data);
@ -53,7 +57,7 @@ var parse_BrtCellRk = function(data, length) {
return [cell, value, 'n'];
};
/* [MS-XLSB] 2.4.311 BrtCellRk */
/* [MS-XLSB] 2.4.314 BrtCellSt */
var parse_BrtCellSt = function(data, length) {
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
@ -61,40 +65,58 @@ var parse_BrtCellSt = function(data, length) {
};
/* [MS-XLSB] 2.4.647 BrtFmlaBool */
var parse_BrtFmlaBool = function(data, length) {
var parse_BrtFmlaBool = function(data, length, opts) {
var cell = parse_Cell(data);
var value = data.read_shift(1);
data.l += length-9;
return [cell, value, 'b' /*, formula */];
var o = [cell, value, 'b'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.648 BrtFmlaError */
var parse_BrtFmlaError = function(data, length) {
var parse_BrtFmlaError = function(data, length, opts) {
var cell = parse_Cell(data);
var fBool = data.read_shift(1);
data.l += length-9;
return [cell, fBool, 'e'];
var value = data.read_shift(1);
var o = [cell, value, 'e'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.649 BrtFmlaNum */
var parse_BrtFmlaNum = function(data, length) {
var parse_BrtFmlaNum = function(data, length, opts) {
var cell = parse_Cell(data);
var value = parse_Xnum(data);
data.l += length-16;
return [cell, value, 'n' /*, formula */];
var o = [cell, value, 'n'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length - 16);
o[3] = ""; /* TODO */
}
else data.l += length-16;
return o;
};
/* [MS-XLSB] 2.4.650 BrtFmlaString */
var parse_BrtFmlaString = function(data, length) {
var parse_BrtFmlaString = function(data, length, opts) {
var start = data.l;
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
data.l = start + length;
return [cell, value, 'str' /*, formula */];
var o = [cell, value, 'str'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, start + length - data.l);
o[3] = ""; /* TODO */
}
else data.l = start + length;
return o;
};
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.1.7.61 Worksheet */
var parse_ws_bin = function(data, opts) {
if(!data) return data;
@ -126,7 +148,7 @@ var parse_ws_bin = function(data, opts) {
case 'e': p.raw = val[1]; p.v = BErr[p.raw]; break;
case 'str': p.v = utf8read(val[1]); break;
}
if(val[3] && opts.cellFormula) p.f = val[3];
if(opts.cellFormula && val.length > 3) p.f = val[3];
if((cf = styles.CellXf[val[0].iStyleRef])) try {
p.w = SSF.format(cf.ifmt,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
@ -136,7 +158,6 @@ var parse_ws_bin = function(data, opts) {
case 'BrtCellBlank': break; // (blank cell)
case 'BrtFmt': break; // TODO
case 'BrtArrFmla': break; // TODO
case 'BrtShrFmla': break; // TODO
case 'BrtBeginSheet': break;
@ -163,9 +184,13 @@ var parse_ws_bin = function(data, opts) {
case 'BrtFRTBegin': pass = true; break;
case 'BrtFRTEnd': pass = false; break;
case 'BrtEndSheet': break; // TODO
case 'BrtBeginMergeCells': break; // TODO
case 'BrtMergeCell': break; // TODO
case 'BrtEndMergeCells': break; // TODO
case 'BrtLegacyDrawing': break; // TODO
//default: if(!pass) throw new Error("Unexpected record " + R.n);
}
});
}, opts);
s["!ref"] = encode_range(ref);
return s;
};

@ -6,6 +6,8 @@ function fixopts(opts) {
['sheetStubs', false], /* emit empty cells */
['bookSheets', false], /* only try to get sheet names (no Sheets) */
['WTF', false] /* WTF mode (do not use) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });

@ -12,19 +12,29 @@ function parseZip(zip, opts) {
dir.workbooks.push(binname);
xlsb = true;
}
strs = {};
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts);
styles = {};
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
if(!opts.bookSheets) {
strs = {};
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts);
styles = {};
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
}
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0], opts);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
if(opts.bookSheets) {
if(props.Worksheets && props.SheetNames.length > 0) return { SheetNames:props.SheetNames };
else if(wb.Sheets) return { SheetNames:wb.Sheets.map(function(x) { return x.name; }) };
}
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.5.4",
"version": "0.5.5",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],

25
test.js

@ -4,6 +4,8 @@ var fs = require('fs'), assert = require('assert');
describe('source',function(){ it('should load', function(){ XLSX = require('./'); });});
var ex = [".xlsb", ".xlsm", ".xlsx"];
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
console.log(ex, process.env.FMTS);
var exp = ex.map(function(x){ return x + ".pending"; });
function test_file(x){return ex.indexOf(x.substr(-5))>=0||exp.indexOf(x.substr(-13))>=0;}
@ -106,4 +108,27 @@ describe('options', function() {
assert(typeof ws[addr].h === 'undefined');
});
});
it('should generate formulae by default', function() {
var wb = XLSX.readFile('./test_files/formula_stress_test.xlsb');
var found = false;
wb.SheetNames.forEach(function(s) {
var ws = wb.Sheets[s];
console.log(ws);
Object.keys(ws).forEach(function(addr) {
if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return;
if(typeof ws[addr].f !== 'undefined') return found = true;
});
});
assert(found);
});
it('should not generate formulae when requested', function() {
var wb = XLSX.readFile('./test_files/formula_stress_test.xlsb', {cellFormula: false});
wb.SheetNames.forEach(function(s) {
var ws = wb.Sheets[s];
Object.keys(ws).forEach(function(addr) {
if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return;
assert(typeof ws[addr].f === 'undefined');
});
});
});
});

@ -1 +1 @@
Subproject commit 709d865dc7f9d7173bc4b5f5c6b0c5aea945589c
Subproject commit 5a2df78bfe58c087fc604450185ee3bc4fb2c077

96
xlsx.js

@ -424,7 +424,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
make_ssf(SSF);
var XLSX = {};
(function(XLSX){
XLSX.version = '0.5.4';
XLSX.version = '0.5.5';
var current_codepage, current_cptable, cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
@ -621,7 +621,7 @@ function prep_blob(blob, pos) {
function parsenoop(blob, length) { blob.l += length; }
/* [MS-XLSB] 2.1.4 Record */
var recordhopper = function(data, cb) {
var recordhopper = function(data, cb, opts) {
var tmpbyte, cntbyte, length;
prep_blob(data, data.l || 0);
while(data.l < data.length) {
@ -631,7 +631,7 @@ var recordhopper = function(data, cb) {
tmpbyte = data.read_shift(1);
length = tmpbyte & 0x7F;
for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte);
var d = R.f(data, length);
var d = R.f(data, length, opts);
if(cb(d, R, RT)) return;
}
};
@ -1235,6 +1235,11 @@ function insertCommentsIntoSheet(sheetName, sheet, comments) {
});
}
/* [MS-XLSB] 2.5.97.4 CellParsedFormula TODO: use similar logic to js-xls */
var parse_CellParsedFormula = function(data, length) {
var cce = data.read_shift(4);
return parsenoop(data, length-4);
};
var strs = {}; // shared strings
var _ssfopts = {}; // spreadsheet formatting options
@ -1325,6 +1330,7 @@ function parse_ws_xml(data, opts) {
return s;
}
/* [MS-XLSB] 2.4.718 BrtRowHdr */
var parse_BrtRowHdr = function(data, length) {
var z = {};
@ -1345,6 +1351,9 @@ var parse_BrtWsProp = function(data, length) {
return z;
};
/* [MS-XLSB] 2.4.303 BrtCellBlank */
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.4.304 BrtCellBool */
var parse_BrtCellBool = function(data, length) {
var cell = parse_Cell(data);
@ -1380,7 +1389,7 @@ var parse_BrtCellRk = function(data, length) {
return [cell, value, 'n'];
};
/* [MS-XLSB] 2.4.311 BrtCellRk */
/* [MS-XLSB] 2.4.314 BrtCellSt */
var parse_BrtCellSt = function(data, length) {
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
@ -1388,40 +1397,58 @@ var parse_BrtCellSt = function(data, length) {
};
/* [MS-XLSB] 2.4.647 BrtFmlaBool */
var parse_BrtFmlaBool = function(data, length) {
var parse_BrtFmlaBool = function(data, length, opts) {
var cell = parse_Cell(data);
var value = data.read_shift(1);
data.l += length-9;
return [cell, value, 'b' /*, formula */];
var o = [cell, value, 'b'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.648 BrtFmlaError */
var parse_BrtFmlaError = function(data, length) {
var parse_BrtFmlaError = function(data, length, opts) {
var cell = parse_Cell(data);
var fBool = data.read_shift(1);
data.l += length-9;
return [cell, fBool, 'e'];
var value = data.read_shift(1);
var o = [cell, value, 'e'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length-9);
o[3] = ""; /* TODO */
}
else data.l += length-9;
return o;
};
/* [MS-XLSB] 2.4.649 BrtFmlaNum */
var parse_BrtFmlaNum = function(data, length) {
var parse_BrtFmlaNum = function(data, length, opts) {
var cell = parse_Cell(data);
var value = parse_Xnum(data);
data.l += length-16;
return [cell, value, 'n' /*, formula */];
var o = [cell, value, 'n'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, length - 16);
o[3] = ""; /* TODO */
}
else data.l += length-16;
return o;
};
/* [MS-XLSB] 2.4.650 BrtFmlaString */
var parse_BrtFmlaString = function(data, length) {
var parse_BrtFmlaString = function(data, length, opts) {
var start = data.l;
var cell = parse_Cell(data);
var value = parse_XLWideString(data);
data.l = start + length;
return [cell, value, 'str' /*, formula */];
var o = [cell, value, 'str'];
if(opts.cellFormula) {
var formula = parse_CellParsedFormula(data, start + length - data.l);
o[3] = ""; /* TODO */
}
else data.l = start + length;
return o;
};
var parse_BrtCellBlank = parsenoop;
/* [MS-XLSB] 2.1.7.61 Worksheet */
var parse_ws_bin = function(data, opts) {
if(!data) return data;
@ -1453,7 +1480,7 @@ var parse_ws_bin = function(data, opts) {
case 'e': p.raw = val[1]; p.v = BErr[p.raw]; break;
case 'str': p.v = utf8read(val[1]); break;
}
if(val[3] && opts.cellFormula) p.f = val[3];
if(opts.cellFormula && val.length > 3) p.f = val[3];
if((cf = styles.CellXf[val[0].iStyleRef])) try {
p.w = SSF.format(cf.ifmt,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
@ -1463,7 +1490,6 @@ var parse_ws_bin = function(data, opts) {
case 'BrtCellBlank': break; // (blank cell)
case 'BrtFmt': break; // TODO
case 'BrtArrFmla': break; // TODO
case 'BrtShrFmla': break; // TODO
case 'BrtBeginSheet': break;
@ -1490,9 +1516,13 @@ var parse_ws_bin = function(data, opts) {
case 'BrtFRTBegin': pass = true; break;
case 'BrtFRTEnd': pass = false; break;
case 'BrtEndSheet': break; // TODO
case 'BrtBeginMergeCells': break; // TODO
case 'BrtMergeCell': break; // TODO
case 'BrtEndMergeCells': break; // TODO
case 'BrtLegacyDrawing': break; // TODO
//default: if(!pass) throw new Error("Unexpected record " + R.n);
}
});
}, opts);
s["!ref"] = encode_range(ref);
return s;
};
@ -2593,6 +2623,8 @@ function fixopts(opts) {
['sheetStubs', false], /* emit empty cells */
['bookSheets', false], /* only try to get sheet names (no Sheets) */
['WTF', false] /* WTF mode (do not use) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
@ -2611,19 +2643,29 @@ function parseZip(zip, opts) {
dir.workbooks.push(binname);
xlsb = true;
}
strs = {};
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts);
styles = {};
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
if(!opts.bookSheets) {
strs = {};
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts);
styles = {};
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
}
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0], opts);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
if(opts.bookSheets) {
if(props.Worksheets && props.SheetNames.length > 0) return { SheetNames:props.SheetNames };
else if(wb.Sheets) return { SheetNames:wb.Sheets.map(function(x) { return x.name; }) };
}
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;