version bump 0.5.0: changes to cell interface

Cell object stores the formatted text in the `.w` field
- Raw format and types are preserved in the `.t`, `.v` fields
- Accessors have been updated to use the field

updates for older OOXML:
- ECMA-376 makes reference to old sstItem shared string item
- [MS-XLSX] xmlns for workbook
This commit is contained in:
SheetJS 2014-02-03 19:00:44 -05:00
parent 5c4bf62b83
commit a96d8964e2
11 changed files with 72 additions and 39 deletions

View File

@ -1,6 +1,6 @@
# xlsx
Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
ISO 29500 Office Open XML specifications, [MS-XLSB], and related documents.
## Installation
@ -19,7 +19,7 @@ In the browser:
The node version installs a binary `xlsx2csv` which can read XLSX/XLSM/XLSB files and output the contents in various formats. The source is available at `xlsx2csv.njs` in the bin directory.
See <http://oss.sheetjs.com/js-xlsx/> for a browser example.
See <http://oss.sheetjs.com/js-xlsx/> for a browser example.
Note that older versions of IE does not support HTML5 File API, so the base64 mode is provided for testing. On OSX you can get the base64 encoding by running:
@ -39,18 +39,24 @@ Simple usage (walks through every cell of every sheet and dumps the values):
Some helper functions in `XLSX.utils` generate different views of the sheets:
- `XLSX.utils.sheet_to_csv` generates CSV
- `XLSX.utils.sheet_to_csv` generates CSV
- `XLSX.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects
- `XLSX.utils.get_formulae` generates a list of formulae
## Notes
## Notes
`.SheetNames` is an ordered list of the sheets in the workbook
`.Sheets[sheetname]` returns a data structure representing the sheet. Each key
that does not start with `!` corresponds to a cell (using `A-1` notation).
that does not start with `!` corresponds to a cell (using `A-1` notation).
`.Sheets[sheetname][address].v` returns the value of the specified cell and `.Sheets[sheetname][address].t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
`.Sheets[sheetname][address]` returns the specified cell:
- `.v` returns the raw value of the cell
- `.w` returns the formatted text of the cell
- `.t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
For dates, `.v` holds the raw date code from the sheet and `.w` holds the text
For more details:
@ -69,12 +75,25 @@ Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links:
- <https://travis-ci.org/SheetJS/js-xlsx> for XLSX module in node
- <https://travis-ci.org/SheetJS/SheetJS.github.io> for XLS* modules
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs
## Test Files
Test files are housed in [another repo](https://github.com/SheetJS/test_files).
## Testing
`make test` will run the node-based tests. To run the in-browser tests, clone
[the oss.sheetjs.com repo](https://github.com/SheetJS/SheetJS.github.io) and
replace the xlsx.js file (then fire up the browser and go to `stress.html`):
```
$ cp xlsx.js ../SheetJS.github.io
$ cd ../SheetJS.github.io
$ simplehttpserver # or "python -mSimpleHTTPServer" or "serve"
$ open -a Chromium.app http://localhost:8000/stress.html
```
## XLS Support
XLS is available in [js-xls](https://github.com/SheetJS/js-xls).

View File

@ -1 +1 @@
XLSX.version = '0.4.3';
XLSX.version = '0.5.0';

View File

@ -133,7 +133,7 @@ var parse_sst_xml = function(data) {
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(isval(sst)) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;

View File

@ -70,12 +70,7 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
p.raw = p.v;
p.rawt = p.t;
try {
p.v = SSF.format(fmtid,p.v,_ssfopts);
p.t = 'str';
} catch(e) { p.v = p.raw; p.t = p.rawt; }
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
s[cell.r] = p;
});

View File

@ -1,4 +1,8 @@
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var XMLNS_WB = [
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
'http://schemas.microsoft.com/office/excel/2006/main',
'http://schemas.microsoft.com/office/excel/2006/2'
];
/* 18.2 Workbook */
function parse_workbook(data) {
@ -102,7 +106,7 @@ function parse_workbook(data) {
case '</mc:AlternateContent>': pass=false; break;
}
});
if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);
var z;
/* defaults */

View File

@ -16,9 +16,12 @@ function parseZip(zip) {
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
var props = propdata !== "" ? parseProps(propdata) : {};
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;

View File

@ -20,7 +20,8 @@ function sheet_to_row_object_array(sheet, opts){
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c:C,r:R})];
if(!val) continue;
switch(val.t) {
if(val.w) hdr[C] = val.w;
else switch(val.t) {
case 's': case 'str': hdr[C] = val.v; break;
case 'n': hdr[C] = val.v; break;
}
@ -33,7 +34,7 @@ function sheet_to_row_object_array(sheet, opts){
for (C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c: C,r: R})];
if(!val || !val.t) continue;
if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
else switch(val.t){
case 's': case 'str': case 'b': case 'n':
if(val.v !== undefined) {

View File

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.4.3",
"version": "0.5.0",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],

View File

@ -51,6 +51,7 @@ apachepoi_52348.xlsx
apachepoi_52716.xlsx
apachepoi_53101.xlsx
apachepoi_53282.xlsx
apachepoi_53282b.xlsx
apachepoi_53568.xlsx
apachepoi_53734.xlsx
apachepoi_53798.xlsx
@ -66,6 +67,12 @@ apachepoi_54607.xlsx
apachepoi_55640.xlsx
apachepoi_55745.xlsx
apachepoi_55850.xlsx
apachepoi_55923.xlsx
apachepoi_55924.xlsx
apachepoi_55926.xlsx
apachepoi_55927.xlsx
apachepoi_56011.xlsx
apachepoi_56017.xlsx
apachepoi_AverageTaxRates.xlsx
apachepoi_Booleans.xlsx
apachepoi_BrNotClosed.xlsx
@ -111,9 +118,10 @@ apachepoi_WithVariousData.xlsx
apachepoi_atp.xlsx
apachepoi_chart_sheet.xlsx.pending
apachepoi_comments.xlsx
apachepoi_headerFooterTest.xlsx
apachepoi_picture.xlsx
apachepoi_reordered_sheets.xlsx
apachepoi_sample-beta.xlsx.pending
apachepoi_sample-beta.xlsx
apachepoi_sample.xlsx
apachepoi_shared_formulas.xlsx
apachepoi_sheetProtection_allLocked.xlsx
@ -154,7 +162,7 @@ openpyxl_g_empty-with-styles.xlsx
openpyxl_g_empty.xlsx
openpyxl_g_empty_libre.xlsx
openpyxl_g_empty_no_dimensions.xlsx
openpyxl_g_empty_with_no_properties.xlsx.pending
openpyxl_g_empty_with_no_properties.xlsx
openpyxl_g_guess_types.xlsx
openpyxl_g_libreoffice_nrt.xlsx
openpyxl_g_merge_range.xlsx

@ -1 +1 @@
Subproject commit a9fc7e00949b0f7621f0b51e0ed5b139bac8e109
Subproject commit dc341311b12ebd490dd876086e822eb992f45e49

31
xlsx.js
View File

@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
make_ssf(SSF);
var XLSX = {};
(function(XLSX){
XLSX.version = '0.4.3';
XLSX.version = '0.5.0';
var current_codepage, current_cptable, cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
@ -932,7 +932,7 @@ var parse_sst_xml = function(data) {
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(isval(sst)) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
@ -1385,12 +1385,7 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
p.raw = p.v;
p.rawt = p.t;
try {
p.v = SSF.format(fmtid,p.v,_ssfopts);
p.t = 'str';
} catch(e) { p.v = p.raw; p.t = p.rawt; }
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
s[cell.r] = p;
});
@ -1635,7 +1630,11 @@ var CustomWBViewDef = {
xWindow: '0',
yWindow: '0'
};
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var XMLNS_WB = [
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
'http://schemas.microsoft.com/office/excel/2006/main',
'http://schemas.microsoft.com/office/excel/2006/2'
];
/* 18.2 Workbook */
function parse_workbook(data) {
@ -1739,7 +1738,7 @@ function parse_workbook(data) {
case '</mc:AlternateContent>': pass=false; break;
}
});
if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);
var z;
/* defaults */
@ -2656,9 +2655,12 @@ function parseZip(zip) {
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
var props = {}, propdata = "";
try {
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
var props = propdata !== "" ? parseProps(propdata) : {};
props = propdata !== "" ? parseProps(propdata) : {};
} catch(e) { }
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;
@ -2755,7 +2757,8 @@ function sheet_to_row_object_array(sheet, opts){
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c:C,r:R})];
if(!val) continue;
switch(val.t) {
if(val.w) hdr[C] = val.w;
else switch(val.t) {
case 's': case 'str': hdr[C] = val.v; break;
case 'n': hdr[C] = val.v; break;
}
@ -2768,7 +2771,7 @@ function sheet_to_row_object_array(sheet, opts){
for (C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c: C,r: R})];
if(!val || !val.t) continue;
if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
else switch(val.t){
case 's': case 'str': case 'b': case 'n':
if(val.v !== undefined) {