diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..7dfcbd1 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,53 @@ +# Contributing + +The SheetJS Libraries should be free and clear to use in your projects. In +order to maintain that, every contributor must be vigilant. + +There have been many projects in the past that have been very lax regarding +licensing, and we are of the opinion that those are ticking timebombs and that +no corporate product should depend on them. + + +# Required Reading + +These are pretty short reads and emphasize the importance of proper licensing: + +- https://github.com/kennethreitz/tablib/issues/114 (discussion of other tools) + +- http://www.codinghorror.com/blog/2007/04/pick-a-license-any-license.html + + +# Pre-Contribution Checklist + +Before thinking about contributing, make sure that: + +- You are not, nor have ever been, an employee of Microsoft Corporation. + +- You have not signed any NDAs or Shared Source Agreements with Microsoft + Corporation or a subsidiary + +- You have not consulted any existing relevant codebase (if you have, please +take note of which codebases were consulted). + +If you cannot attest to each of these items, the best approach is to raise an +issue. If it is a particularly high-priority issue, please drop an email to + and it will be prioritized. + + +# Intra-Contribution + +Keep these in mind as you work: + +- Your contributions are your original work. Take note of any resources you + consult in the process (and be extra careful not to use unlicensed code on + the internet. + +- You are working on your own time. Unless they explicitly grant permission, + your employer may be the ultimate owner of your IP + + +# Post-Contribution + +Before contributions are merged, you will receive an email (at the address +associated with the git commit) and will be asked to confirm the aforementioned +items. diff --git a/README.md b/README.md index bcfbf0f..3c74a54 100644 --- a/README.md +++ b/README.md @@ -43,7 +43,13 @@ Some helper functions in `XLSX.utils` generate different views of the sheets: - `XLSX.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects - `XLSX.utils.get_formulae` generates a list of formulae -## Notes +For more details: + +- `bin/xlsx2csv.njs` is a tool for node +- `index.html` is the live demo +- `bits/90_utils.js` contains the logic for generating CSV and JSON from sheets + +## Cell Object Description `.SheetNames` is an ordered list of the sheets in the workbook @@ -52,17 +58,27 @@ that does not start with `!` corresponds to a cell (using `A-1` notation). `.Sheets[sheetname][address]` returns the specified cell: -- `.v` returns the raw value of the cell -- `.w` returns the formatted text of the cell -- `.t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) ) +- `.v` : the raw value of the cell +- `.w` : the formatted text of the cell (if applicable) +- `.t` : the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) ) +- `.f` : the formula of the cell (if applicable) +- `.r` : the rich text encoding of a cell text (if applicable) +- `.h` : an HTML rendering of the rich text (if applicable) +- `.c` : comments associated with the cell +- `.z` : the number format string associated with the cell (if requested) For dates, `.v` holds the raw date code from the sheet and `.w` holds the text -For more details: +## Options -- `bin/xlsx2csv.njs` is a tool for node -- `index.html` is the live demo -- `bits/90_utils.js` contains the logic for generating CSV and JSON from sheets +The exported `read` and `readFile` functions accept an options argument: + +| Option Name | Default | Description | +| :---------- | ------: | :---------- | +| cellNF | false | Save number format string to the .z field | +| sheetStubs | true | Create cell objects for stub cells | + +The defaults are enumerated in bits/84_defaults.js ## Tested Environments @@ -94,6 +110,10 @@ $ simplehttpserver # or "python -mSimpleHTTPServer" or "serve" $ open -a Chromium.app http://localhost:8000/stress.html ``` +## Contributing + +Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md + ## XLS Support XLS is available in [js-xls](https://github.com/SheetJS/js-xls). diff --git a/bits/31_version.js b/bits/31_version.js index ede97ea..e573b37 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -XLSX.version = '0.5.2'; +XLSX.version = '0.5.3'; diff --git a/bits/57_styxml.js b/bits/57_styxml.js index 8f057e4..2217411 100644 --- a/bits/57_styxml.js +++ b/bits/57_styxml.js @@ -43,7 +43,7 @@ function parseCXfs(t) { } /* 18.8 Styles CT_Stylesheet*/ -function parse_styles(data) { +function parse_sty_xml(data) { /* 18.8.39 styleSheet CT_Stylesheet */ var t; diff --git a/bits/58_stybin.js b/bits/58_stybin.js index c4c192f..c4b7116 100644 --- a/bits/58_stybin.js +++ b/bits/58_stybin.js @@ -14,7 +14,7 @@ function parse_BrtXF(data, length) { function parse_sty_bin(data) { styles.NumberFmt = []; for(var y in SSF._table) styles.NumberFmt[y] = SSF._table[y]; - + styles.CellXf = []; var state = ""; var pass = false; diff --git a/bits/72_wsxml.js b/bits/72_wsxml.js index 080df0e..5295de5 100644 --- a/bits/72_wsxml.js +++ b/bits/72_wsxml.js @@ -1,5 +1,5 @@ /* 18.3 Worksheets */ -function parse_worksheet(data) { +function parse_ws_xml(data, opts) { if(!data) return data; /* 18.3.1.99 worksheet CT_Worksheet */ var s = {}; @@ -30,16 +30,19 @@ function parse_worksheet(data) { var cref_cell = decode_cell(cref[1]); idx = cref_cell.c; } - if(refguess.s.c > idx) refguess.s.c = idx; - if(refguess.e.c < idx) refguess.e.c = idx; var cell = parsexmltag((c.match(/]*>/)||[c])[0]); delete cell[0]; var d = c.substr(c.indexOf('>')+1); var p = {}; q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);}); /* SCHEMA IS ACTUALLY INCORRECT HERE. IF A CELL HAS NO T, EMIT "" */ - if(cell.t === undefined && p.v === undefined) { p.t = "str"; p.v = undefined; } + if(cell.t === undefined && p.v === undefined) { + if(!opts.sheetEmptyCells) return; + p.t = "str"; p.v = undefined; + } else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema + if(refguess.s.c > idx) refguess.s.c = idx; + if(refguess.e.c < idx) refguess.e.c = idx; switch(p.t) { case 'n': p.v = parseFloat(p.v); break; case 's': { @@ -71,7 +74,10 @@ function parse_worksheet(data) { var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId) fmtid = cf.numFmtId; } - try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { } + try { + p.w = SSF.format(fmtid,p.v,_ssfopts); + if(opts.cellNF) p.z = SSF._table[fmtid]; + } catch(e) { } s[cell.r] = p; }); }); diff --git a/bits/73_wsbin.js b/bits/73_wsbin.js index f0e7194..5a8411f 100644 --- a/bits/73_wsbin.js +++ b/bits/73_wsbin.js @@ -81,14 +81,14 @@ var parse_BrtFmlaBool = parsenoop; var parse_BrtFmlaString = parsenoop; /* [MS-XLSB] 2.1.7.61 Worksheet */ -var parse_ws_bin = function(data) { +var parse_ws_bin = function(data, opts) { if(!data) return data; var s = {}; var ref; var pass = false; - var row, p; + var row, p, cf; recordhopper(data, function(val, R) { switch(R.n) { case 'BrtWsDim': ref = val; break; @@ -115,8 +115,9 @@ var parse_ws_bin = function(data) { case 'str': if(p.v) p.v = utf8read(p.v); break; } if(val[3]) p.f = val[3]; - if(styles.CellXf[val[0].iStyleRef]) try { - p.w = SSF.format(styles.CellXf[val[0].iStyleRef].ifmt,p.v,_ssfopts); + if((cf = styles.CellXf[val[0].iStyleRef])) try { + p.w = SSF.format(cf.ifmt,p.v,_ssfopts); + if(opts.cellNF) p.z = SSF._table[cf.ifmt]; } catch(e) { } s[encode_cell({c:val[0].c,r:row.r})] = p; break; // TODO diff --git a/bits/77_wbxml.js b/bits/77_wbxml.js index 56bc457..817e2fc 100644 --- a/bits/77_wbxml.js +++ b/bits/77_wbxml.js @@ -5,7 +5,7 @@ var XMLNS_WB = [ ]; /* 18.2 Workbook */ -function parse_workbook(data) { +function parse_wb_xml(data) { var wb = { AppVersion:{}, WBProps:{}, WBView:[], Sheets:[], CalcPr:{}, xmlns: "" }; var pass = false; data.match(/<[^>]*>/g).forEach(function(x) { diff --git a/bits/79_xmlbin.js b/bits/79_xmlbin.js index cb9a1e6..3927460 100644 --- a/bits/79_xmlbin.js +++ b/bits/79_xmlbin.js @@ -1,11 +1,11 @@ -function parse_wb(data, name) { - return name.substr(-4)===".bin" ? parse_wb_bin(data) : parse_workbook(data); +function parse_wb(data, name, opts) { + return name.substr(-4)===".bin" ? parse_wb_bin(data, opts) : parse_wb_xml(data, opts); } -function parse_ws(data, name) { - return name.substr(-4)===".bin" ? parse_ws_bin(data) : parse_worksheet(data); +function parse_ws(data, name, opts) { + return name.substr(-4)===".bin" ? parse_ws_bin(data, opts) : parse_ws_xml(data, opts); } -function parse_sty(data, name) { - return name.substr(-4)===".bin" ? parse_sty_bin(data) : parse_styles(data); +function parse_sty(data, name, opts) { + return name.substr(-4)===".bin" ? parse_sty_bin(data, opts) : parse_sty_xml(data, opts); } diff --git a/bits/84_defaults.js b/bits/84_defaults.js new file mode 100644 index 0000000..17ea03f --- /dev/null +++ b/bits/84_defaults.js @@ -0,0 +1,10 @@ +function fixopts(opts) { + var defaults = [ + ['cellNF', false], /* emit cell number format string as .z */ + + ['sheetStubs', true], /* emit empty cells */ + + ['WTF', false] /* WTF mode (do not use) */ + ]; + defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; }); +} diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js index 7082035..f95cda8 100644 --- a/bits/85_parsezip.js +++ b/bits/85_parsezip.js @@ -1,4 +1,6 @@ -function parseZip(zip) { +function parseZip(zip, opts) { + opts = opts || {}; + fixopts(opts); reset_cp(); var entries = Object.keys(zip.files); var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort(); @@ -40,7 +42,7 @@ function parseZip(zip) { try { /* TODO: remove these guards */ path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml'); relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); - sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path); + sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts); sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); } catch(e) {} } @@ -50,7 +52,7 @@ function parseZip(zip) { //var path = dir.sheets[i].replace(/^\//,''); path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml'); relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); - sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path); + sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts); sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); } catch(e) {/*console.error(e);*/} } diff --git a/bits/88_read.js b/bits/88_read.js index ce93f59..333a34a 100644 --- a/bits/88_read.js +++ b/bits/88_read.js @@ -9,7 +9,7 @@ function readSync(data, options) { case "base64": zip = new jszip(d, { base64:true }); break; case "binary": zip = new jszip(d, { base64:false }); break; } - return parseZip(zip); + return parseZip(zip, o); } function readFileSync(data, options) { diff --git a/misc/xl.d.ts b/misc/xl.d.ts index 25812bb..c699cc1 100644 --- a/misc/xl.d.ts +++ b/misc/xl.d.ts @@ -1,7 +1,12 @@ interface Cell { v; - t: string; - ixfe: number; + w?: string; + t?: string; + f?: string; + r?: string; + h?: string; + c?: any; + z?: string; } interface Worksheet { diff --git a/package.json b/package.json index abd1b3d..f6bb8a7 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xlsx", - "version": "0.5.2", + "version": "0.5.3", "author": "sheetjs", "description": "XLSB / XLSX / XLSM parser", "keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ], diff --git a/xlsx.js b/xlsx.js index 202844f..95da242 100644 --- a/xlsx.js +++ b/xlsx.js @@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa make_ssf(SSF); var XLSX = {}; (function(XLSX){ -XLSX.version = '0.5.2'; +XLSX.version = '0.5.3'; var current_codepage, current_cptable, cptable; if(typeof module !== "undefined" && typeof require !== 'undefined') { if(typeof cptable === 'undefined') cptable = require('codepage'); @@ -994,7 +994,7 @@ function parseCXfs(t) { } /* 18.8 Styles CT_Stylesheet*/ -function parse_styles(data) { +function parse_sty_xml(data) { /* 18.8.39 styleSheet CT_Stylesheet */ var t; @@ -1032,7 +1032,7 @@ function parse_BrtXF(data, length) { function parse_sty_bin(data) { styles.NumberFmt = []; for(var y in SSF._table) styles.NumberFmt[y] = SSF._table[y]; - + styles.CellXf = []; var state = ""; var pass = false; @@ -1301,7 +1301,7 @@ var strs = {}; // shared strings var _ssfopts = {}; // spreadsheet formatting options /* 18.3 Worksheets */ -function parse_worksheet(data) { +function parse_ws_xml(data, opts) { if(!data) return data; /* 18.3.1.99 worksheet CT_Worksheet */ var s = {}; @@ -1332,16 +1332,19 @@ function parse_worksheet(data) { var cref_cell = decode_cell(cref[1]); idx = cref_cell.c; } - if(refguess.s.c > idx) refguess.s.c = idx; - if(refguess.e.c < idx) refguess.e.c = idx; var cell = parsexmltag((c.match(/]*>/)||[c])[0]); delete cell[0]; var d = c.substr(c.indexOf('>')+1); var p = {}; q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);}); /* SCHEMA IS ACTUALLY INCORRECT HERE. IF A CELL HAS NO T, EMIT "" */ - if(cell.t === undefined && p.v === undefined) { p.t = "str"; p.v = undefined; } + if(cell.t === undefined && p.v === undefined) { + if(!opts.sheetEmptyCells) return; + p.t = "str"; p.v = undefined; + } else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema + if(refguess.s.c > idx) refguess.s.c = idx; + if(refguess.e.c < idx) refguess.e.c = idx; switch(p.t) { case 'n': p.v = parseFloat(p.v); break; case 's': { @@ -1373,7 +1376,10 @@ function parse_worksheet(data) { var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId) fmtid = cf.numFmtId; } - try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { } + try { + p.w = SSF.format(fmtid,p.v,_ssfopts); + if(opts.cellNF) p.z = SSF._table[fmtid]; + } catch(e) { } s[cell.r] = p; }); }); @@ -1464,14 +1470,14 @@ var parse_BrtFmlaBool = parsenoop; var parse_BrtFmlaString = parsenoop; /* [MS-XLSB] 2.1.7.61 Worksheet */ -var parse_ws_bin = function(data) { +var parse_ws_bin = function(data, opts) { if(!data) return data; var s = {}; var ref; var pass = false; - var row, p; + var row, p, cf; recordhopper(data, function(val, R) { switch(R.n) { case 'BrtWsDim': ref = val; break; @@ -1498,8 +1504,9 @@ var parse_ws_bin = function(data) { case 'str': if(p.v) p.v = utf8read(p.v); break; } if(val[3]) p.f = val[3]; - if(styles.CellXf[val[0].iStyleRef]) try { - p.w = SSF.format(styles.CellXf[val[0].iStyleRef].ifmt,p.v,_ssfopts); + if((cf = styles.CellXf[val[0].iStyleRef])) try { + p.w = SSF.format(cf.ifmt,p.v,_ssfopts); + if(opts.cellNF) p.z = SSF._table[cf.ifmt]; } catch(e) { } s[encode_cell({c:val[0].c,r:row.r})] = p; break; // TODO @@ -1624,7 +1631,7 @@ var XMLNS_WB = [ ]; /* 18.2 Workbook */ -function parse_workbook(data) { +function parse_wb_xml(data) { var wb = { AppVersion:{}, WBProps:{}, WBView:[], Sheets:[], CalcPr:{}, xmlns: "" }; var pass = false; data.match(/<[^>]*>/g).forEach(function(x) { @@ -1793,16 +1800,16 @@ var parse_wb_bin = function(data) { return wb; }; -function parse_wb(data, name) { - return name.substr(-4)===".bin" ? parse_wb_bin(data) : parse_workbook(data); +function parse_wb(data, name, opts) { + return name.substr(-4)===".bin" ? parse_wb_bin(data, opts) : parse_wb_xml(data, opts); } -function parse_ws(data, name) { - return name.substr(-4)===".bin" ? parse_ws_bin(data) : parse_worksheet(data); +function parse_ws(data, name, opts) { + return name.substr(-4)===".bin" ? parse_ws_bin(data, opts) : parse_ws_xml(data, opts); } -function parse_sty(data, name) { - return name.substr(-4)===".bin" ? parse_sty_bin(data) : parse_styles(data); +function parse_sty(data, name, opts) { + return name.substr(-4)===".bin" ? parse_sty_bin(data, opts) : parse_sty_xml(data, opts); } /* [MS-XLSB] 2.3 Record Enumeration */ var RecordEnum = { @@ -2624,7 +2631,19 @@ var RecordEnum = { 0xFFFF: { n:"", f:parsenoop } }; -function parseZip(zip) { +function fixopts(opts) { + var defaults = [ + ['cellNF', false], /* emit cell number format string as .z */ + + ['sheetStubs', true], /* emit empty cells */ + + ['WTF', false] /* WTF mode (do not use) */ + ]; + defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; }); +} +function parseZip(zip, opts) { + opts = opts || {}; + fixopts(opts); reset_cp(); var entries = Object.keys(zip.files); var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort(); @@ -2666,7 +2685,7 @@ function parseZip(zip) { try { /* TODO: remove these guards */ path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml'); relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); - sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path); + sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts); sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); } catch(e) {} } @@ -2676,7 +2695,7 @@ function parseZip(zip) { //var path = dir.sheets[i].replace(/^\//,''); path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml'); relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); - sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path); + sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts); sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); } catch(e) {/*console.error(e);*/} } @@ -2708,7 +2727,7 @@ function readSync(data, options) { case "base64": zip = new jszip(d, { base64:true }); break; case "binary": zip = new jszip(d, { base64:false }); break; } - return parseZip(zip); + return parseZip(zip, o); } function readFileSync(data, options) {