diff --git a/.npmignore b/.npmignore index f861188..4b34d86 100644 --- a/.npmignore +++ b/.npmignore @@ -1,3 +1,4 @@ test_files/ tests/files/ index.html +misc/coverage.html diff --git a/Makefile b/Makefile index 2aced76..5cb612f 100644 --- a/Makefile +++ b/Makefile @@ -1,5 +1,6 @@ DEPS=$(wildcard bits/*.js) TARGET=xlsx.js +FMT=xlsx xlsm xlsb $(TARGET): $(DEPS) cat $^ > $@ @@ -23,6 +24,11 @@ init: test mocha: mocha -R spec +TESTFMT=$(patsubst %,test_%,$(FMT)) +.PHONY: $(TESTFMT) +$(TESTFMT): test_%: + FMTS=$* make test + .PHONY: jasmine jasmine: npm run-script test-jasmine diff --git a/README.md b/README.md index 049129e..2344268 100644 --- a/README.md +++ b/README.md @@ -49,7 +49,7 @@ For more details: - `index.html` is the live demo - `bits/90_utils.js` contains the logic for generating CSV and JSON from sheets -## Cell Object Description +## Cell Object Description `.SheetNames` is an ordered list of the sheets in the workbook @@ -65,7 +65,7 @@ that does not start with `!` corresponds to a cell (using `A-1` notation). - `.r` : the rich text encoding of a cell text (if applicable) - `.h` : an HTML rendering of the rich text (if applicable) - `.c` : comments associated with the cell -- `.z` : the number format string associated with the cell (if requested) +- `.z` : the number format string associated with the cell (if requested) For dates, `.v` holds the raw date code from the sheet and `.w` holds the text @@ -79,12 +79,14 @@ The exported `read` and `readFile` functions accept an options argument: | cellHTML | true | Parse rich text and save HTML to the .h field | | cellNF | false | Save number format string to the .z field | | sheetStubs | false | Create cell objects for stub cells | +| bookSheets | false | If true, only parse enough to get the sheet names | - `cellFormula` only applies to constructing XLSB formulae. XLSX/XLSM formulae are stored in plaintext, but XLSB formulae are stored in a binary format. - Even if `cellNF` is false, formatted text (.w) will be generated +- In some cases, sheets may be parsed even if `bookSheets` is false. -The defaults are enumerated in bits/84_defaults.js +The defaults are enumerated in bits/84_defaults.js ## Tested Environments @@ -118,7 +120,7 @@ $ open -a Chromium.app http://localhost:8000/stress.html ## Contributing -Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md +Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md ## XLS Support @@ -138,6 +140,7 @@ OSP-covered specifications: - [MS-XLSX]: Excel (.xlsx) Extensions to the Office Open XML SpreadsheetML File Format - [MS-XLSB]: Excel (.xlsb) Binary File Format + - [MS-OE376]: Office Implementation Information for ECMA-376 Standards Support ## Badges diff --git a/bin/xlsx2csv.njs b/bin/xlsx2csv.njs index f601189..c1949d7 100755 --- a/bin/xlsx2csv.njs +++ b/bin/xlsx2csv.njs @@ -46,10 +46,13 @@ if(!fs.existsSync(filename)) { if(program.dev) X.verbose = 2; +var opts = {}; +if(program.listSheets) opts.bookSheets = true; + var wb; -if(program.dev) wb = X.readFile(filename); +if(program.dev) wb = X.readFile(filename, opts); else try { - wb = X.readFile(filename); + wb = X.readFile(filename, opts); } catch(e) { var msg = (program.quiet) ? "" : n + "2csv: error parsing "; msg += filename + ": " + e; diff --git a/bits/31_version.js b/bits/31_version.js index 050d268..d5ceec5 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -XLSX.version = '0.5.4'; +XLSX.version = '0.5.5'; diff --git a/bits/38_recordhopper.js b/bits/38_recordhopper.js index ae96e30..7ff65f7 100644 --- a/bits/38_recordhopper.js +++ b/bits/38_recordhopper.js @@ -1,5 +1,5 @@ /* [MS-XLSB] 2.1.4 Record */ -var recordhopper = function(data, cb) { +var recordhopper = function(data, cb, opts) { var tmpbyte, cntbyte, length; prep_blob(data, data.l || 0); while(data.l < data.length) { @@ -9,7 +9,7 @@ var recordhopper = function(data, cb) { tmpbyte = data.read_shift(1); length = tmpbyte & 0x7F; for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte); - var d = R.f(data, length); + var d = R.f(data, length, opts); if(cb(d, R, RT)) return; } }; diff --git a/bits/70_fbin.js b/bits/70_fbin.js new file mode 100644 index 0000000..42cbaca --- /dev/null +++ b/bits/70_fbin.js @@ -0,0 +1,5 @@ +/* [MS-XLSB] 2.5.97.4 CellParsedFormula TODO: use similar logic to js-xls */ +var parse_CellParsedFormula = function(data, length) { + var cce = data.read_shift(4); + return parsenoop(data, length-4); +}; diff --git a/bits/73_wsbin.js b/bits/73_wsbin.js index 658878d..b3ee0f9 100644 --- a/bits/73_wsbin.js +++ b/bits/73_wsbin.js @@ -1,3 +1,4 @@ + /* [MS-XLSB] 2.4.718 BrtRowHdr */ var parse_BrtRowHdr = function(data, length) { var z = {}; @@ -18,6 +19,9 @@ var parse_BrtWsProp = function(data, length) { return z; }; +/* [MS-XLSB] 2.4.303 BrtCellBlank */ +var parse_BrtCellBlank = parsenoop; + /* [MS-XLSB] 2.4.304 BrtCellBool */ var parse_BrtCellBool = function(data, length) { var cell = parse_Cell(data); @@ -53,7 +57,7 @@ var parse_BrtCellRk = function(data, length) { return [cell, value, 'n']; }; -/* [MS-XLSB] 2.4.311 BrtCellRk */ +/* [MS-XLSB] 2.4.314 BrtCellSt */ var parse_BrtCellSt = function(data, length) { var cell = parse_Cell(data); var value = parse_XLWideString(data); @@ -61,40 +65,58 @@ var parse_BrtCellSt = function(data, length) { }; /* [MS-XLSB] 2.4.647 BrtFmlaBool */ -var parse_BrtFmlaBool = function(data, length) { +var parse_BrtFmlaBool = function(data, length, opts) { var cell = parse_Cell(data); var value = data.read_shift(1); - data.l += length-9; - return [cell, value, 'b' /*, formula */]; + var o = [cell, value, 'b']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, length-9); + o[3] = ""; /* TODO */ + } + else data.l += length-9; + return o; }; /* [MS-XLSB] 2.4.648 BrtFmlaError */ -var parse_BrtFmlaError = function(data, length) { +var parse_BrtFmlaError = function(data, length, opts) { var cell = parse_Cell(data); - var fBool = data.read_shift(1); - data.l += length-9; - return [cell, fBool, 'e']; + var value = data.read_shift(1); + var o = [cell, value, 'e']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, length-9); + o[3] = ""; /* TODO */ + } + else data.l += length-9; + return o; }; /* [MS-XLSB] 2.4.649 BrtFmlaNum */ -var parse_BrtFmlaNum = function(data, length) { +var parse_BrtFmlaNum = function(data, length, opts) { var cell = parse_Cell(data); var value = parse_Xnum(data); - data.l += length-16; - return [cell, value, 'n' /*, formula */]; + var o = [cell, value, 'n']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, length - 16); + o[3] = ""; /* TODO */ + } + else data.l += length-16; + return o; }; /* [MS-XLSB] 2.4.650 BrtFmlaString */ -var parse_BrtFmlaString = function(data, length) { +var parse_BrtFmlaString = function(data, length, opts) { var start = data.l; var cell = parse_Cell(data); var value = parse_XLWideString(data); - data.l = start + length; - return [cell, value, 'str' /*, formula */]; + var o = [cell, value, 'str']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, start + length - data.l); + o[3] = ""; /* TODO */ + } + else data.l = start + length; + return o; }; -var parse_BrtCellBlank = parsenoop; - /* [MS-XLSB] 2.1.7.61 Worksheet */ var parse_ws_bin = function(data, opts) { if(!data) return data; @@ -126,7 +148,7 @@ var parse_ws_bin = function(data, opts) { case 'e': p.raw = val[1]; p.v = BErr[p.raw]; break; case 'str': p.v = utf8read(val[1]); break; } - if(val[3] && opts.cellFormula) p.f = val[3]; + if(opts.cellFormula && val.length > 3) p.f = val[3]; if((cf = styles.CellXf[val[0].iStyleRef])) try { p.w = SSF.format(cf.ifmt,p.v,_ssfopts); if(opts.cellNF) p.z = SSF._table[cf.ifmt]; @@ -136,7 +158,6 @@ var parse_ws_bin = function(data, opts) { case 'BrtCellBlank': break; // (blank cell) - case 'BrtFmt': break; // TODO case 'BrtArrFmla': break; // TODO case 'BrtShrFmla': break; // TODO case 'BrtBeginSheet': break; @@ -163,9 +184,13 @@ var parse_ws_bin = function(data, opts) { case 'BrtFRTBegin': pass = true; break; case 'BrtFRTEnd': pass = false; break; case 'BrtEndSheet': break; // TODO + case 'BrtBeginMergeCells': break; // TODO + case 'BrtMergeCell': break; // TODO + case 'BrtEndMergeCells': break; // TODO + case 'BrtLegacyDrawing': break; // TODO //default: if(!pass) throw new Error("Unexpected record " + R.n); } - }); + }, opts); s["!ref"] = encode_range(ref); return s; }; diff --git a/bits/84_defaults.js b/bits/84_defaults.js index aa15f81..094938c 100644 --- a/bits/84_defaults.js +++ b/bits/84_defaults.js @@ -6,6 +6,8 @@ function fixopts(opts) { ['sheetStubs', false], /* emit empty cells */ + ['bookSheets', false], /* only try to get sheet names (no Sheets) */ + ['WTF', false] /* WTF mode (do not use) */ ]; defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; }); diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js index 7dee0fe..6fdd753 100644 --- a/bits/85_parsezip.js +++ b/bits/85_parsezip.js @@ -12,19 +12,29 @@ function parseZip(zip, opts) { dir.workbooks.push(binname); xlsb = true; } - strs = {}; - if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts); - styles = {}; - if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style); + if(!opts.bookSheets) { + strs = {}; + if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts); + + styles = {}; + if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style); + } + + var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0], opts); - var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]); var props = {}, propdata = ""; try { propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : ""; propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : ""; props = propdata !== "" ? parseProps(propdata) : {}; } catch(e) { } + + if(opts.bookSheets) { + if(props.Worksheets && props.SheetNames.length > 0) return { SheetNames:props.SheetNames }; + else if(wb.Sheets) return { SheetNames:wb.Sheets.map(function(x) { return x.name; }) }; + } + var deps = {}; if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0; diff --git a/package.json b/package.json index 2dac6c4..1361ab8 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xlsx", - "version": "0.5.4", + "version": "0.5.5", "author": "sheetjs", "description": "XLSB / XLSX / XLSM parser", "keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ], diff --git a/test.js b/test.js index 07bed7b..10540b8 100644 --- a/test.js +++ b/test.js @@ -4,6 +4,8 @@ var fs = require('fs'), assert = require('assert'); describe('source',function(){ it('should load', function(){ XLSX = require('./'); });}); var ex = [".xlsb", ".xlsm", ".xlsx"]; +if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;}); +console.log(ex, process.env.FMTS); var exp = ex.map(function(x){ return x + ".pending"; }); function test_file(x){return ex.indexOf(x.substr(-5))>=0||exp.indexOf(x.substr(-13))>=0;} @@ -106,4 +108,27 @@ describe('options', function() { assert(typeof ws[addr].h === 'undefined'); }); }); + it('should generate formulae by default', function() { + var wb = XLSX.readFile('./test_files/formula_stress_test.xlsb'); + var found = false; + wb.SheetNames.forEach(function(s) { + var ws = wb.Sheets[s]; + console.log(ws); + Object.keys(ws).forEach(function(addr) { + if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return; + if(typeof ws[addr].f !== 'undefined') return found = true; + }); + }); + assert(found); + }); + it('should not generate formulae when requested', function() { + var wb = XLSX.readFile('./test_files/formula_stress_test.xlsb', {cellFormula: false}); + wb.SheetNames.forEach(function(s) { + var ws = wb.Sheets[s]; + Object.keys(ws).forEach(function(addr) { + if(addr[0] === "!" || !ws.hasOwnProperty(addr)) return; + assert(typeof ws[addr].f === 'undefined'); + }); + }); + }); }); diff --git a/tests/files b/tests/files index 709d865..5a2df78 160000 --- a/tests/files +++ b/tests/files @@ -1 +1 @@ -Subproject commit 709d865dc7f9d7173bc4b5f5c6b0c5aea945589c +Subproject commit 5a2df78bfe58c087fc604450185ee3bc4fb2c077 diff --git a/xlsx.js b/xlsx.js index 30ebfb2..6d2a6f6 100644 --- a/xlsx.js +++ b/xlsx.js @@ -424,7 +424,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa make_ssf(SSF); var XLSX = {}; (function(XLSX){ -XLSX.version = '0.5.4'; +XLSX.version = '0.5.5'; var current_codepage, current_cptable, cptable; if(typeof module !== "undefined" && typeof require !== 'undefined') { if(typeof cptable === 'undefined') cptable = require('codepage'); @@ -621,7 +621,7 @@ function prep_blob(blob, pos) { function parsenoop(blob, length) { blob.l += length; } /* [MS-XLSB] 2.1.4 Record */ -var recordhopper = function(data, cb) { +var recordhopper = function(data, cb, opts) { var tmpbyte, cntbyte, length; prep_blob(data, data.l || 0); while(data.l < data.length) { @@ -631,7 +631,7 @@ var recordhopper = function(data, cb) { tmpbyte = data.read_shift(1); length = tmpbyte & 0x7F; for(cntbyte = 1; cntbyte <4 && (tmpbyte & 0x80); ++cntbyte) length += ((tmpbyte = data.read_shift(1)) & 0x7F)<<(7*cntbyte); - var d = R.f(data, length); + var d = R.f(data, length, opts); if(cb(d, R, RT)) return; } }; @@ -1235,6 +1235,11 @@ function insertCommentsIntoSheet(sheetName, sheet, comments) { }); } +/* [MS-XLSB] 2.5.97.4 CellParsedFormula TODO: use similar logic to js-xls */ +var parse_CellParsedFormula = function(data, length) { + var cce = data.read_shift(4); + return parsenoop(data, length-4); +}; var strs = {}; // shared strings var _ssfopts = {}; // spreadsheet formatting options @@ -1325,6 +1330,7 @@ function parse_ws_xml(data, opts) { return s; } + /* [MS-XLSB] 2.4.718 BrtRowHdr */ var parse_BrtRowHdr = function(data, length) { var z = {}; @@ -1345,6 +1351,9 @@ var parse_BrtWsProp = function(data, length) { return z; }; +/* [MS-XLSB] 2.4.303 BrtCellBlank */ +var parse_BrtCellBlank = parsenoop; + /* [MS-XLSB] 2.4.304 BrtCellBool */ var parse_BrtCellBool = function(data, length) { var cell = parse_Cell(data); @@ -1380,7 +1389,7 @@ var parse_BrtCellRk = function(data, length) { return [cell, value, 'n']; }; -/* [MS-XLSB] 2.4.311 BrtCellRk */ +/* [MS-XLSB] 2.4.314 BrtCellSt */ var parse_BrtCellSt = function(data, length) { var cell = parse_Cell(data); var value = parse_XLWideString(data); @@ -1388,40 +1397,58 @@ var parse_BrtCellSt = function(data, length) { }; /* [MS-XLSB] 2.4.647 BrtFmlaBool */ -var parse_BrtFmlaBool = function(data, length) { +var parse_BrtFmlaBool = function(data, length, opts) { var cell = parse_Cell(data); var value = data.read_shift(1); - data.l += length-9; - return [cell, value, 'b' /*, formula */]; + var o = [cell, value, 'b']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, length-9); + o[3] = ""; /* TODO */ + } + else data.l += length-9; + return o; }; /* [MS-XLSB] 2.4.648 BrtFmlaError */ -var parse_BrtFmlaError = function(data, length) { +var parse_BrtFmlaError = function(data, length, opts) { var cell = parse_Cell(data); - var fBool = data.read_shift(1); - data.l += length-9; - return [cell, fBool, 'e']; + var value = data.read_shift(1); + var o = [cell, value, 'e']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, length-9); + o[3] = ""; /* TODO */ + } + else data.l += length-9; + return o; }; /* [MS-XLSB] 2.4.649 BrtFmlaNum */ -var parse_BrtFmlaNum = function(data, length) { +var parse_BrtFmlaNum = function(data, length, opts) { var cell = parse_Cell(data); var value = parse_Xnum(data); - data.l += length-16; - return [cell, value, 'n' /*, formula */]; + var o = [cell, value, 'n']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, length - 16); + o[3] = ""; /* TODO */ + } + else data.l += length-16; + return o; }; /* [MS-XLSB] 2.4.650 BrtFmlaString */ -var parse_BrtFmlaString = function(data, length) { +var parse_BrtFmlaString = function(data, length, opts) { var start = data.l; var cell = parse_Cell(data); var value = parse_XLWideString(data); - data.l = start + length; - return [cell, value, 'str' /*, formula */]; + var o = [cell, value, 'str']; + if(opts.cellFormula) { + var formula = parse_CellParsedFormula(data, start + length - data.l); + o[3] = ""; /* TODO */ + } + else data.l = start + length; + return o; }; -var parse_BrtCellBlank = parsenoop; - /* [MS-XLSB] 2.1.7.61 Worksheet */ var parse_ws_bin = function(data, opts) { if(!data) return data; @@ -1453,7 +1480,7 @@ var parse_ws_bin = function(data, opts) { case 'e': p.raw = val[1]; p.v = BErr[p.raw]; break; case 'str': p.v = utf8read(val[1]); break; } - if(val[3] && opts.cellFormula) p.f = val[3]; + if(opts.cellFormula && val.length > 3) p.f = val[3]; if((cf = styles.CellXf[val[0].iStyleRef])) try { p.w = SSF.format(cf.ifmt,p.v,_ssfopts); if(opts.cellNF) p.z = SSF._table[cf.ifmt]; @@ -1463,7 +1490,6 @@ var parse_ws_bin = function(data, opts) { case 'BrtCellBlank': break; // (blank cell) - case 'BrtFmt': break; // TODO case 'BrtArrFmla': break; // TODO case 'BrtShrFmla': break; // TODO case 'BrtBeginSheet': break; @@ -1490,9 +1516,13 @@ var parse_ws_bin = function(data, opts) { case 'BrtFRTBegin': pass = true; break; case 'BrtFRTEnd': pass = false; break; case 'BrtEndSheet': break; // TODO + case 'BrtBeginMergeCells': break; // TODO + case 'BrtMergeCell': break; // TODO + case 'BrtEndMergeCells': break; // TODO + case 'BrtLegacyDrawing': break; // TODO //default: if(!pass) throw new Error("Unexpected record " + R.n); } - }); + }, opts); s["!ref"] = encode_range(ref); return s; }; @@ -2593,6 +2623,8 @@ function fixopts(opts) { ['sheetStubs', false], /* emit empty cells */ + ['bookSheets', false], /* only try to get sheet names (no Sheets) */ + ['WTF', false] /* WTF mode (do not use) */ ]; defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; }); @@ -2611,19 +2643,29 @@ function parseZip(zip, opts) { dir.workbooks.push(binname); xlsb = true; } - strs = {}; - if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts); - styles = {}; - if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style); + if(!opts.bookSheets) { + strs = {}; + if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))), dir.sst, opts); + + styles = {}; + if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style); + } + + var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0], opts); - var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]); var props = {}, propdata = ""; try { propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : ""; propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : ""; props = propdata !== "" ? parseProps(propdata) : {}; } catch(e) { } + + if(opts.bookSheets) { + if(props.Worksheets && props.SheetNames.length > 0) return { SheetNames:props.SheetNames }; + else if(wb.Sheets) return { SheetNames:wb.Sheets.map(function(x) { return x.name; }) }; + } + var deps = {}; if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0;