version bump 0.5.3: options!

- read and readFile accept opts argument:
o cellNF (default false) true -> cell.z holds number format string
o sheetStubs (default true) false -> stub cells not emitted

- name consistency (parse_workbook -> parse_wb_xml)
- README updates
- CONTRIBUTING notes added
This commit is contained in:
SheetJS 2014-02-07 05:53:40 -05:00
parent 99d879ee30
commit 27af8a6d6a
15 changed files with 173 additions and 57 deletions

53
CONTRIBUTING.md Normal file

@ -0,0 +1,53 @@
# Contributing
The SheetJS Libraries should be free and clear to use in your projects. In
order to maintain that, every contributor must be vigilant.
There have been many projects in the past that have been very lax regarding
licensing, and we are of the opinion that those are ticking timebombs and that
no corporate product should depend on them.
# Required Reading
These are pretty short reads and emphasize the importance of proper licensing:
- https://github.com/kennethreitz/tablib/issues/114 (discussion of other tools)
- http://www.codinghorror.com/blog/2007/04/pick-a-license-any-license.html
# Pre-Contribution Checklist
Before thinking about contributing, make sure that:
- You are not, nor have ever been, an employee of Microsoft Corporation.
- You have not signed any NDAs or Shared Source Agreements with Microsoft
Corporation or a subsidiary
- You have not consulted any existing relevant codebase (if you have, please
take note of which codebases were consulted).
If you cannot attest to each of these items, the best approach is to raise an
issue. If it is a particularly high-priority issue, please drop an email to
<sheetjs@gmail.com> and it will be prioritized.
# Intra-Contribution
Keep these in mind as you work:
- Your contributions are your original work. Take note of any resources you
consult in the process (and be extra careful not to use unlicensed code on
the internet.
- You are working on your own time. Unless they explicitly grant permission,
your employer may be the ultimate owner of your IP
# Post-Contribution
Before contributions are merged, you will receive an email (at the address
associated with the git commit) and will be asked to confirm the aforementioned
items.

@ -43,7 +43,13 @@ Some helper functions in `XLSX.utils` generate different views of the sheets:
- `XLSX.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects
- `XLSX.utils.get_formulae` generates a list of formulae
## Notes
For more details:
- `bin/xlsx2csv.njs` is a tool for node
- `index.html` is the live demo
- `bits/90_utils.js` contains the logic for generating CSV and JSON from sheets
## Cell Object Description
`.SheetNames` is an ordered list of the sheets in the workbook
@ -52,17 +58,27 @@ that does not start with `!` corresponds to a cell (using `A-1` notation).
`.Sheets[sheetname][address]` returns the specified cell:
- `.v` returns the raw value of the cell
- `.w` returns the formatted text of the cell
- `.t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
- `.v` : the raw value of the cell
- `.w` : the formatted text of the cell (if applicable)
- `.t` : the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
- `.f` : the formula of the cell (if applicable)
- `.r` : the rich text encoding of a cell text (if applicable)
- `.h` : an HTML rendering of the rich text (if applicable)
- `.c` : comments associated with the cell
- `.z` : the number format string associated with the cell (if requested)
For dates, `.v` holds the raw date code from the sheet and `.w` holds the text
For more details:
## Options
- `bin/xlsx2csv.njs` is a tool for node
- `index.html` is the live demo
- `bits/90_utils.js` contains the logic for generating CSV and JSON from sheets
The exported `read` and `readFile` functions accept an options argument:
| Option Name | Default | Description |
| :---------- | ------: | :---------- |
| cellNF | false | Save number format string to the .z field |
| sheetStubs | true | Create cell objects for stub cells |
The defaults are enumerated in bits/84_defaults.js
## Tested Environments
@ -94,6 +110,10 @@ $ simplehttpserver # or "python -mSimpleHTTPServer" or "serve"
$ open -a Chromium.app http://localhost:8000/stress.html
```
## Contributing
Due to the precarious nature of the Open Specifications Promise, it is very important to ensure code is cleanroom. Consult CONTRIBUTING.md
## XLS Support
XLS is available in [js-xls](https://github.com/SheetJS/js-xls).

@ -1 +1 @@
XLSX.version = '0.5.2';
XLSX.version = '0.5.3';

@ -43,7 +43,7 @@ function parseCXfs(t) {
}
/* 18.8 Styles CT_Stylesheet*/
function parse_styles(data) {
function parse_sty_xml(data) {
/* 18.8.39 styleSheet CT_Stylesheet */
var t;

@ -14,7 +14,7 @@ function parse_BrtXF(data, length) {
function parse_sty_bin(data) {
styles.NumberFmt = [];
for(var y in SSF._table) styles.NumberFmt[y] = SSF._table[y];
styles.CellXf = [];
var state = "";
var pass = false;

@ -1,5 +1,5 @@
/* 18.3 Worksheets */
function parse_worksheet(data) {
function parse_ws_xml(data, opts) {
if(!data) return data;
/* 18.3.1.99 worksheet CT_Worksheet */
var s = {};
@ -30,16 +30,19 @@ function parse_worksheet(data) {
var cref_cell = decode_cell(cref[1]);
idx = cref_cell.c;
}
if(refguess.s.c > idx) refguess.s.c = idx;
if(refguess.e.c < idx) refguess.e.c = idx;
var cell = parsexmltag((c.match(/<c[^>]*>/)||[c])[0]); delete cell[0];
var d = c.substr(c.indexOf('>')+1);
var p = {};
q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);});
/* SCHEMA IS ACTUALLY INCORRECT HERE. IF A CELL HAS NO T, EMIT "" */
if(cell.t === undefined && p.v === undefined) { p.t = "str"; p.v = undefined; }
if(cell.t === undefined && p.v === undefined) {
if(!opts.sheetEmptyCells) return;
p.t = "str"; p.v = undefined;
}
else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema
if(refguess.s.c > idx) refguess.s.c = idx;
if(refguess.e.c < idx) refguess.e.c = idx;
switch(p.t) {
case 'n': p.v = parseFloat(p.v); break;
case 's': {
@ -71,7 +74,10 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
try {
p.w = SSF.format(fmtid,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[fmtid];
} catch(e) { }
s[cell.r] = p;
});
});

@ -81,14 +81,14 @@ var parse_BrtFmlaBool = parsenoop;
var parse_BrtFmlaString = parsenoop;
/* [MS-XLSB] 2.1.7.61 Worksheet */
var parse_ws_bin = function(data) {
var parse_ws_bin = function(data, opts) {
if(!data) return data;
var s = {};
var ref;
var pass = false;
var row, p;
var row, p, cf;
recordhopper(data, function(val, R) {
switch(R.n) {
case 'BrtWsDim': ref = val; break;
@ -115,8 +115,9 @@ var parse_ws_bin = function(data) {
case 'str': if(p.v) p.v = utf8read(p.v); break;
}
if(val[3]) p.f = val[3];
if(styles.CellXf[val[0].iStyleRef]) try {
p.w = SSF.format(styles.CellXf[val[0].iStyleRef].ifmt,p.v,_ssfopts);
if((cf = styles.CellXf[val[0].iStyleRef])) try {
p.w = SSF.format(cf.ifmt,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
} catch(e) { }
s[encode_cell({c:val[0].c,r:row.r})] = p;
break; // TODO

@ -5,7 +5,7 @@ var XMLNS_WB = [
];
/* 18.2 Workbook */
function parse_workbook(data) {
function parse_wb_xml(data) {
var wb = { AppVersion:{}, WBProps:{}, WBView:[], Sheets:[], CalcPr:{}, xmlns: "" };
var pass = false;
data.match(/<[^>]*>/g).forEach(function(x) {

@ -1,11 +1,11 @@
function parse_wb(data, name) {
return name.substr(-4)===".bin" ? parse_wb_bin(data) : parse_workbook(data);
function parse_wb(data, name, opts) {
return name.substr(-4)===".bin" ? parse_wb_bin(data, opts) : parse_wb_xml(data, opts);
}
function parse_ws(data, name) {
return name.substr(-4)===".bin" ? parse_ws_bin(data) : parse_worksheet(data);
function parse_ws(data, name, opts) {
return name.substr(-4)===".bin" ? parse_ws_bin(data, opts) : parse_ws_xml(data, opts);
}
function parse_sty(data, name) {
return name.substr(-4)===".bin" ? parse_sty_bin(data) : parse_styles(data);
function parse_sty(data, name, opts) {
return name.substr(-4)===".bin" ? parse_sty_bin(data, opts) : parse_sty_xml(data, opts);
}

10
bits/84_defaults.js Normal file

@ -0,0 +1,10 @@
function fixopts(opts) {
var defaults = [
['cellNF', false], /* emit cell number format string as .z */
['sheetStubs', true], /* emit empty cells */
['WTF', false] /* WTF mode (do not use) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
}

@ -1,4 +1,6 @@
function parseZip(zip) {
function parseZip(zip, opts) {
opts = opts || {};
fixopts(opts);
reset_cp();
var entries = Object.keys(zip.files);
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
@ -40,7 +42,7 @@ function parseZip(zip) {
try { /* TODO: remove these guards */
path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml');
relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path);
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts);
sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
} catch(e) {}
}
@ -50,7 +52,7 @@ function parseZip(zip) {
//var path = dir.sheets[i].replace(/^\//,'');
path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml');
relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path);
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts);
sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
} catch(e) {/*console.error(e);*/}
}

@ -9,7 +9,7 @@ function readSync(data, options) {
case "base64": zip = new jszip(d, { base64:true }); break;
case "binary": zip = new jszip(d, { base64:false }); break;
}
return parseZip(zip);
return parseZip(zip, o);
}
function readFileSync(data, options) {

9
misc/xl.d.ts vendored

@ -1,7 +1,12 @@
interface Cell {
v;
t: string;
ixfe: number;
w?: string;
t?: string;
f?: string;
r?: string;
h?: string;
c?: any;
z?: string;
}
interface Worksheet {

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.5.2",
"version": "0.5.3",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],

65
xlsx.js

@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
make_ssf(SSF);
var XLSX = {};
(function(XLSX){
XLSX.version = '0.5.2';
XLSX.version = '0.5.3';
var current_codepage, current_cptable, cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
@ -994,7 +994,7 @@ function parseCXfs(t) {
}
/* 18.8 Styles CT_Stylesheet*/
function parse_styles(data) {
function parse_sty_xml(data) {
/* 18.8.39 styleSheet CT_Stylesheet */
var t;
@ -1032,7 +1032,7 @@ function parse_BrtXF(data, length) {
function parse_sty_bin(data) {
styles.NumberFmt = [];
for(var y in SSF._table) styles.NumberFmt[y] = SSF._table[y];
styles.CellXf = [];
var state = "";
var pass = false;
@ -1301,7 +1301,7 @@ var strs = {}; // shared strings
var _ssfopts = {}; // spreadsheet formatting options
/* 18.3 Worksheets */
function parse_worksheet(data) {
function parse_ws_xml(data, opts) {
if(!data) return data;
/* 18.3.1.99 worksheet CT_Worksheet */
var s = {};
@ -1332,16 +1332,19 @@ function parse_worksheet(data) {
var cref_cell = decode_cell(cref[1]);
idx = cref_cell.c;
}
if(refguess.s.c > idx) refguess.s.c = idx;
if(refguess.e.c < idx) refguess.e.c = idx;
var cell = parsexmltag((c.match(/<c[^>]*>/)||[c])[0]); delete cell[0];
var d = c.substr(c.indexOf('>')+1);
var p = {};
q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);});
/* SCHEMA IS ACTUALLY INCORRECT HERE. IF A CELL HAS NO T, EMIT "" */
if(cell.t === undefined && p.v === undefined) { p.t = "str"; p.v = undefined; }
if(cell.t === undefined && p.v === undefined) {
if(!opts.sheetEmptyCells) return;
p.t = "str"; p.v = undefined;
}
else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema
if(refguess.s.c > idx) refguess.s.c = idx;
if(refguess.e.c < idx) refguess.e.c = idx;
switch(p.t) {
case 'n': p.v = parseFloat(p.v); break;
case 's': {
@ -1373,7 +1376,10 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
try {
p.w = SSF.format(fmtid,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[fmtid];
} catch(e) { }
s[cell.r] = p;
});
});
@ -1464,14 +1470,14 @@ var parse_BrtFmlaBool = parsenoop;
var parse_BrtFmlaString = parsenoop;
/* [MS-XLSB] 2.1.7.61 Worksheet */
var parse_ws_bin = function(data) {
var parse_ws_bin = function(data, opts) {
if(!data) return data;
var s = {};
var ref;
var pass = false;
var row, p;
var row, p, cf;
recordhopper(data, function(val, R) {
switch(R.n) {
case 'BrtWsDim': ref = val; break;
@ -1498,8 +1504,9 @@ var parse_ws_bin = function(data) {
case 'str': if(p.v) p.v = utf8read(p.v); break;
}
if(val[3]) p.f = val[3];
if(styles.CellXf[val[0].iStyleRef]) try {
p.w = SSF.format(styles.CellXf[val[0].iStyleRef].ifmt,p.v,_ssfopts);
if((cf = styles.CellXf[val[0].iStyleRef])) try {
p.w = SSF.format(cf.ifmt,p.v,_ssfopts);
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
} catch(e) { }
s[encode_cell({c:val[0].c,r:row.r})] = p;
break; // TODO
@ -1624,7 +1631,7 @@ var XMLNS_WB = [
];
/* 18.2 Workbook */
function parse_workbook(data) {
function parse_wb_xml(data) {
var wb = { AppVersion:{}, WBProps:{}, WBView:[], Sheets:[], CalcPr:{}, xmlns: "" };
var pass = false;
data.match(/<[^>]*>/g).forEach(function(x) {
@ -1793,16 +1800,16 @@ var parse_wb_bin = function(data) {
return wb;
};
function parse_wb(data, name) {
return name.substr(-4)===".bin" ? parse_wb_bin(data) : parse_workbook(data);
function parse_wb(data, name, opts) {
return name.substr(-4)===".bin" ? parse_wb_bin(data, opts) : parse_wb_xml(data, opts);
}
function parse_ws(data, name) {
return name.substr(-4)===".bin" ? parse_ws_bin(data) : parse_worksheet(data);
function parse_ws(data, name, opts) {
return name.substr(-4)===".bin" ? parse_ws_bin(data, opts) : parse_ws_xml(data, opts);
}
function parse_sty(data, name) {
return name.substr(-4)===".bin" ? parse_sty_bin(data) : parse_styles(data);
function parse_sty(data, name, opts) {
return name.substr(-4)===".bin" ? parse_sty_bin(data, opts) : parse_sty_xml(data, opts);
}
/* [MS-XLSB] 2.3 Record Enumeration */
var RecordEnum = {
@ -2624,7 +2631,19 @@ var RecordEnum = {
0xFFFF: { n:"", f:parsenoop }
};
function parseZip(zip) {
function fixopts(opts) {
var defaults = [
['cellNF', false], /* emit cell number format string as .z */
['sheetStubs', true], /* emit empty cells */
['WTF', false] /* WTF mode (do not use) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
}
function parseZip(zip, opts) {
opts = opts || {};
fixopts(opts);
reset_cp();
var entries = Object.keys(zip.files);
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
@ -2666,7 +2685,7 @@ function parseZip(zip) {
try { /* TODO: remove these guards */
path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml');
relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path);
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts);
sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
} catch(e) {}
}
@ -2676,7 +2695,7 @@ function parseZip(zip) {
//var path = dir.sheets[i].replace(/^\//,'');
path = 'xl/worksheets/sheet' + (i+1) + (xlsb?'.bin':'.xml');
relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path);
sheets[props.SheetNames[i]]=parse_ws(getdata(getzipfile(zip, path)),path,opts);
sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
} catch(e) {/*console.error(e);*/}
}
@ -2708,7 +2727,7 @@ function readSync(data, options) {
case "base64": zip = new jszip(d, { base64:true }); break;
case "binary": zip = new jszip(d, { base64:false }); break;
}
return parseZip(zip);
return parseZip(zip, o);
}
function readFileSync(data, options) {