version bump 0.5.0: changes to cell interface
Cell object stores the formatted text in the `.w` field - Raw format and types are preserved in the `.t`, `.v` fields - Accessors have been updated to use the field updates for older OOXML: - ECMA-376 makes reference to old sstItem shared string item - [MS-XLSX] xmlns for workbook
This commit is contained in:
parent
5c4bf62b83
commit
a96d8964e2
33
README.md
33
README.md
@ -1,6 +1,6 @@
|
||||
# xlsx
|
||||
|
||||
Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
|
||||
Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
|
||||
ISO 29500 Office Open XML specifications, [MS-XLSB], and related documents.
|
||||
|
||||
## Installation
|
||||
@ -19,7 +19,7 @@ In the browser:
|
||||
|
||||
The node version installs a binary `xlsx2csv` which can read XLSX/XLSM/XLSB files and output the contents in various formats. The source is available at `xlsx2csv.njs` in the bin directory.
|
||||
|
||||
See <http://oss.sheetjs.com/js-xlsx/> for a browser example.
|
||||
See <http://oss.sheetjs.com/js-xlsx/> for a browser example.
|
||||
|
||||
Note that older versions of IE does not support HTML5 File API, so the base64 mode is provided for testing. On OSX you can get the base64 encoding by running:
|
||||
|
||||
@ -39,18 +39,24 @@ Simple usage (walks through every cell of every sheet and dumps the values):
|
||||
|
||||
Some helper functions in `XLSX.utils` generate different views of the sheets:
|
||||
|
||||
- `XLSX.utils.sheet_to_csv` generates CSV
|
||||
- `XLSX.utils.sheet_to_csv` generates CSV
|
||||
- `XLSX.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects
|
||||
- `XLSX.utils.get_formulae` generates a list of formulae
|
||||
|
||||
## Notes
|
||||
## Notes
|
||||
|
||||
`.SheetNames` is an ordered list of the sheets in the workbook
|
||||
|
||||
`.Sheets[sheetname]` returns a data structure representing the sheet. Each key
|
||||
that does not start with `!` corresponds to a cell (using `A-1` notation).
|
||||
that does not start with `!` corresponds to a cell (using `A-1` notation).
|
||||
|
||||
`.Sheets[sheetname][address].v` returns the value of the specified cell and `.Sheets[sheetname][address].t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
|
||||
`.Sheets[sheetname][address]` returns the specified cell:
|
||||
|
||||
- `.v` returns the raw value of the cell
|
||||
- `.w` returns the formatted text of the cell
|
||||
- `.t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
|
||||
|
||||
For dates, `.v` holds the raw date code from the sheet and `.w` holds the text
|
||||
|
||||
For more details:
|
||||
|
||||
@ -69,12 +75,25 @@ Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links:
|
||||
|
||||
- <https://travis-ci.org/SheetJS/js-xlsx> for XLSX module in node
|
||||
- <https://travis-ci.org/SheetJS/SheetJS.github.io> for XLS* modules
|
||||
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs
|
||||
- <https://saucelabs.com/u/sheetjs> for XLS* modules using Sauce Labs
|
||||
|
||||
## Test Files
|
||||
|
||||
Test files are housed in [another repo](https://github.com/SheetJS/test_files).
|
||||
|
||||
## Testing
|
||||
|
||||
`make test` will run the node-based tests. To run the in-browser tests, clone
|
||||
[the oss.sheetjs.com repo](https://github.com/SheetJS/SheetJS.github.io) and
|
||||
replace the xlsx.js file (then fire up the browser and go to `stress.html`):
|
||||
|
||||
```
|
||||
$ cp xlsx.js ../SheetJS.github.io
|
||||
$ cd ../SheetJS.github.io
|
||||
$ simplehttpserver # or "python -mSimpleHTTPServer" or "serve"
|
||||
$ open -a Chromium.app http://localhost:8000/stress.html
|
||||
```
|
||||
|
||||
## XLS Support
|
||||
|
||||
XLS is available in [js-xls](https://github.com/SheetJS/js-xls).
|
||||
|
@ -1 +1 @@
|
||||
XLSX.version = '0.4.3';
|
||||
XLSX.version = '0.5.0';
|
||||
|
@ -133,7 +133,7 @@ var parse_sst_xml = function(data) {
|
||||
/* 18.4.9 sst CT_Sst */
|
||||
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
|
||||
if(isval(sst)) {
|
||||
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
|
||||
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
|
||||
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
|
||||
}
|
||||
return s;
|
||||
|
@ -70,12 +70,7 @@ function parse_worksheet(data) {
|
||||
var cf = styles.CellXf[cell.s];
|
||||
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
|
||||
}
|
||||
p.raw = p.v;
|
||||
p.rawt = p.t;
|
||||
try {
|
||||
p.v = SSF.format(fmtid,p.v,_ssfopts);
|
||||
p.t = 'str';
|
||||
} catch(e) { p.v = p.raw; p.t = p.rawt; }
|
||||
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
|
||||
|
||||
s[cell.r] = p;
|
||||
});
|
||||
|
@ -1,4 +1,8 @@
|
||||
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
var XMLNS_WB = [
|
||||
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
|
||||
'http://schemas.microsoft.com/office/excel/2006/main',
|
||||
'http://schemas.microsoft.com/office/excel/2006/2'
|
||||
];
|
||||
|
||||
/* 18.2 Workbook */
|
||||
function parse_workbook(data) {
|
||||
@ -102,7 +106,7 @@ function parse_workbook(data) {
|
||||
case '</mc:AlternateContent>': pass=false; break;
|
||||
}
|
||||
});
|
||||
if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
|
||||
if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);
|
||||
|
||||
var z;
|
||||
/* defaults */
|
||||
|
@ -16,9 +16,12 @@ function parseZip(zip) {
|
||||
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
|
||||
|
||||
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
|
||||
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
|
||||
var props = {}, propdata = "";
|
||||
try {
|
||||
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
|
||||
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
|
||||
var props = propdata !== "" ? parseProps(propdata) : {};
|
||||
props = propdata !== "" ? parseProps(propdata) : {};
|
||||
} catch(e) { }
|
||||
var deps = {};
|
||||
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
|
||||
var sheets = {}, i=0;
|
||||
|
@ -20,7 +20,8 @@ function sheet_to_row_object_array(sheet, opts){
|
||||
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
|
||||
val = sheet[encode_cell({c:C,r:R})];
|
||||
if(!val) continue;
|
||||
switch(val.t) {
|
||||
if(val.w) hdr[C] = val.w;
|
||||
else switch(val.t) {
|
||||
case 's': case 'str': hdr[C] = val.v; break;
|
||||
case 'n': hdr[C] = val.v; break;
|
||||
}
|
||||
@ -33,7 +34,7 @@ function sheet_to_row_object_array(sheet, opts){
|
||||
for (C = r.s.c; C <= r.e.c; ++C) {
|
||||
val = sheet[encode_cell({c: C,r: R})];
|
||||
if(!val || !val.t) continue;
|
||||
if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
|
||||
if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
|
||||
else switch(val.t){
|
||||
case 's': case 'str': case 'b': case 'n':
|
||||
if(val.v !== undefined) {
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "xlsx",
|
||||
"version": "0.4.3",
|
||||
"version": "0.5.0",
|
||||
"author": "sheetjs",
|
||||
"description": "XLSB / XLSX / XLSM parser",
|
||||
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],
|
||||
|
12
tests.lst
12
tests.lst
@ -51,6 +51,7 @@ apachepoi_52348.xlsx
|
||||
apachepoi_52716.xlsx
|
||||
apachepoi_53101.xlsx
|
||||
apachepoi_53282.xlsx
|
||||
apachepoi_53282b.xlsx
|
||||
apachepoi_53568.xlsx
|
||||
apachepoi_53734.xlsx
|
||||
apachepoi_53798.xlsx
|
||||
@ -66,6 +67,12 @@ apachepoi_54607.xlsx
|
||||
apachepoi_55640.xlsx
|
||||
apachepoi_55745.xlsx
|
||||
apachepoi_55850.xlsx
|
||||
apachepoi_55923.xlsx
|
||||
apachepoi_55924.xlsx
|
||||
apachepoi_55926.xlsx
|
||||
apachepoi_55927.xlsx
|
||||
apachepoi_56011.xlsx
|
||||
apachepoi_56017.xlsx
|
||||
apachepoi_AverageTaxRates.xlsx
|
||||
apachepoi_Booleans.xlsx
|
||||
apachepoi_BrNotClosed.xlsx
|
||||
@ -111,9 +118,10 @@ apachepoi_WithVariousData.xlsx
|
||||
apachepoi_atp.xlsx
|
||||
apachepoi_chart_sheet.xlsx.pending
|
||||
apachepoi_comments.xlsx
|
||||
apachepoi_headerFooterTest.xlsx
|
||||
apachepoi_picture.xlsx
|
||||
apachepoi_reordered_sheets.xlsx
|
||||
apachepoi_sample-beta.xlsx.pending
|
||||
apachepoi_sample-beta.xlsx
|
||||
apachepoi_sample.xlsx
|
||||
apachepoi_shared_formulas.xlsx
|
||||
apachepoi_sheetProtection_allLocked.xlsx
|
||||
@ -154,7 +162,7 @@ openpyxl_g_empty-with-styles.xlsx
|
||||
openpyxl_g_empty.xlsx
|
||||
openpyxl_g_empty_libre.xlsx
|
||||
openpyxl_g_empty_no_dimensions.xlsx
|
||||
openpyxl_g_empty_with_no_properties.xlsx.pending
|
||||
openpyxl_g_empty_with_no_properties.xlsx
|
||||
openpyxl_g_guess_types.xlsx
|
||||
openpyxl_g_libreoffice_nrt.xlsx
|
||||
openpyxl_g_merge_range.xlsx
|
||||
|
@ -1 +1 @@
|
||||
Subproject commit a9fc7e00949b0f7621f0b51e0ed5b139bac8e109
|
||||
Subproject commit dc341311b12ebd490dd876086e822eb992f45e49
|
31
xlsx.js
31
xlsx.js
@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
|
||||
make_ssf(SSF);
|
||||
var XLSX = {};
|
||||
(function(XLSX){
|
||||
XLSX.version = '0.4.3';
|
||||
XLSX.version = '0.5.0';
|
||||
var current_codepage, current_cptable, cptable;
|
||||
if(typeof module !== "undefined" && typeof require !== 'undefined') {
|
||||
if(typeof cptable === 'undefined') cptable = require('codepage');
|
||||
@ -932,7 +932,7 @@ var parse_sst_xml = function(data) {
|
||||
/* 18.4.9 sst CT_Sst */
|
||||
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
|
||||
if(isval(sst)) {
|
||||
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
|
||||
s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
|
||||
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
|
||||
}
|
||||
return s;
|
||||
@ -1385,12 +1385,7 @@ function parse_worksheet(data) {
|
||||
var cf = styles.CellXf[cell.s];
|
||||
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
|
||||
}
|
||||
p.raw = p.v;
|
||||
p.rawt = p.t;
|
||||
try {
|
||||
p.v = SSF.format(fmtid,p.v,_ssfopts);
|
||||
p.t = 'str';
|
||||
} catch(e) { p.v = p.raw; p.t = p.rawt; }
|
||||
try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
|
||||
|
||||
s[cell.r] = p;
|
||||
});
|
||||
@ -1635,7 +1630,11 @@ var CustomWBViewDef = {
|
||||
xWindow: '0',
|
||||
yWindow: '0'
|
||||
};
|
||||
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
var XMLNS_WB = [
|
||||
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
|
||||
'http://schemas.microsoft.com/office/excel/2006/main',
|
||||
'http://schemas.microsoft.com/office/excel/2006/2'
|
||||
];
|
||||
|
||||
/* 18.2 Workbook */
|
||||
function parse_workbook(data) {
|
||||
@ -1739,7 +1738,7 @@ function parse_workbook(data) {
|
||||
case '</mc:AlternateContent>': pass=false; break;
|
||||
}
|
||||
});
|
||||
if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
|
||||
if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);
|
||||
|
||||
var z;
|
||||
/* defaults */
|
||||
@ -2656,9 +2655,12 @@ function parseZip(zip) {
|
||||
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
|
||||
|
||||
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
|
||||
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
|
||||
var props = {}, propdata = "";
|
||||
try {
|
||||
propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
|
||||
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
|
||||
var props = propdata !== "" ? parseProps(propdata) : {};
|
||||
props = propdata !== "" ? parseProps(propdata) : {};
|
||||
} catch(e) { }
|
||||
var deps = {};
|
||||
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
|
||||
var sheets = {}, i=0;
|
||||
@ -2755,7 +2757,8 @@ function sheet_to_row_object_array(sheet, opts){
|
||||
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
|
||||
val = sheet[encode_cell({c:C,r:R})];
|
||||
if(!val) continue;
|
||||
switch(val.t) {
|
||||
if(val.w) hdr[C] = val.w;
|
||||
else switch(val.t) {
|
||||
case 's': case 'str': hdr[C] = val.v; break;
|
||||
case 'n': hdr[C] = val.v; break;
|
||||
}
|
||||
@ -2768,7 +2771,7 @@ function sheet_to_row_object_array(sheet, opts){
|
||||
for (C = r.s.c; C <= r.e.c; ++C) {
|
||||
val = sheet[encode_cell({c: C,r: R})];
|
||||
if(!val || !val.t) continue;
|
||||
if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
|
||||
if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
|
||||
else switch(val.t){
|
||||
case 's': case 'str': case 'b': case 'n':
|
||||
if(val.v !== undefined) {
|
||||
|
Loading…
Reference in New Issue
Block a user