diff --git a/README.md b/README.md
index 1140de4..bcfbf0f 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
# xlsx
-Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
+Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the
ISO 29500 Office Open XML specifications, [MS-XLSB], and related documents.
## Installation
@@ -19,7 +19,7 @@ In the browser:
The node version installs a binary `xlsx2csv` which can read XLSX/XLSM/XLSB files and output the contents in various formats. The source is available at `xlsx2csv.njs` in the bin directory.
-See for a browser example.
+See for a browser example.
Note that older versions of IE does not support HTML5 File API, so the base64 mode is provided for testing. On OSX you can get the base64 encoding by running:
@@ -39,18 +39,24 @@ Simple usage (walks through every cell of every sheet and dumps the values):
Some helper functions in `XLSX.utils` generate different views of the sheets:
-- `XLSX.utils.sheet_to_csv` generates CSV
+- `XLSX.utils.sheet_to_csv` generates CSV
- `XLSX.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects
- `XLSX.utils.get_formulae` generates a list of formulae
-## Notes
+## Notes
`.SheetNames` is an ordered list of the sheets in the workbook
`.Sheets[sheetname]` returns a data structure representing the sheet. Each key
-that does not start with `!` corresponds to a cell (using `A-1` notation).
+that does not start with `!` corresponds to a cell (using `A-1` notation).
-`.Sheets[sheetname][address].v` returns the value of the specified cell and `.Sheets[sheetname][address].t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
+`.Sheets[sheetname][address]` returns the specified cell:
+
+- `.v` returns the raw value of the cell
+- `.w` returns the formatted text of the cell
+- `.t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
+
+For dates, `.v` holds the raw date code from the sheet and `.w` holds the text
For more details:
@@ -69,12 +75,25 @@ Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links:
- for XLSX module in node
- for XLS* modules
- - for XLS* modules using Sauce Labs
+ - for XLS* modules using Sauce Labs
## Test Files
Test files are housed in [another repo](https://github.com/SheetJS/test_files).
+## Testing
+
+`make test` will run the node-based tests. To run the in-browser tests, clone
+[the oss.sheetjs.com repo](https://github.com/SheetJS/SheetJS.github.io) and
+replace the xlsx.js file (then fire up the browser and go to `stress.html`):
+
+```
+$ cp xlsx.js ../SheetJS.github.io
+$ cd ../SheetJS.github.io
+$ simplehttpserver # or "python -mSimpleHTTPServer" or "serve"
+$ open -a Chromium.app http://localhost:8000/stress.html
+```
+
## XLS Support
XLS is available in [js-xls](https://github.com/SheetJS/js-xls).
diff --git a/bits/31_version.js b/bits/31_version.js
index 78e331d..8c49085 100644
--- a/bits/31_version.js
+++ b/bits/31_version.js
@@ -1 +1 @@
-XLSX.version = '0.4.3';
+XLSX.version = '0.5.0';
diff --git a/bits/52_sstxml.js b/bits/52_sstxml.js
index 4f68ec4..078dd5a 100644
--- a/bits/52_sstxml.js
+++ b/bits/52_sstxml.js
@@ -133,7 +133,7 @@ var parse_sst_xml = function(data) {
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m"));
if(isval(sst)) {
- s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
+ s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
diff --git a/bits/72_wsxml.js b/bits/72_wsxml.js
index d24c851..6eb44b6 100644
--- a/bits/72_wsxml.js
+++ b/bits/72_wsxml.js
@@ -70,12 +70,7 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
- p.raw = p.v;
- p.rawt = p.t;
- try {
- p.v = SSF.format(fmtid,p.v,_ssfopts);
- p.t = 'str';
- } catch(e) { p.v = p.raw; p.t = p.rawt; }
+ try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
s[cell.r] = p;
});
diff --git a/bits/77_wbxml.js b/bits/77_wbxml.js
index b2ee513..56bc457 100644
--- a/bits/77_wbxml.js
+++ b/bits/77_wbxml.js
@@ -1,4 +1,8 @@
-var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
+var XMLNS_WB = [
+ 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
+ 'http://schemas.microsoft.com/office/excel/2006/main',
+ 'http://schemas.microsoft.com/office/excel/2006/2'
+];
/* 18.2 Workbook */
function parse_workbook(data) {
@@ -102,7 +106,7 @@ function parse_workbook(data) {
case '': pass=false; break;
}
});
- if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
+ if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);
var z;
/* defaults */
diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js
index 64bdfde..3dcd589 100644
--- a/bits/85_parsezip.js
+++ b/bits/85_parsezip.js
@@ -16,9 +16,12 @@ function parseZip(zip) {
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
- var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
+ var props = {}, propdata = "";
+ try {
+ propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
- var props = propdata !== "" ? parseProps(propdata) : {};
+ props = propdata !== "" ? parseProps(propdata) : {};
+ } catch(e) { }
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;
diff --git a/bits/90_utils.js b/bits/90_utils.js
index 60f3454..fd31f2c 100644
--- a/bits/90_utils.js
+++ b/bits/90_utils.js
@@ -20,7 +20,8 @@ function sheet_to_row_object_array(sheet, opts){
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c:C,r:R})];
if(!val) continue;
- switch(val.t) {
+ if(val.w) hdr[C] = val.w;
+ else switch(val.t) {
case 's': case 'str': hdr[C] = val.v; break;
case 'n': hdr[C] = val.v; break;
}
@@ -33,7 +34,7 @@ function sheet_to_row_object_array(sheet, opts){
for (C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c: C,r: R})];
if(!val || !val.t) continue;
- if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
+ if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
else switch(val.t){
case 's': case 'str': case 'b': case 'n':
if(val.v !== undefined) {
diff --git a/package.json b/package.json
index ce5c6f3..bfd3f31 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "xlsx",
- "version": "0.4.3",
+ "version": "0.5.0",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],
diff --git a/tests.lst b/tests.lst
index 090af1f..354b3ff 100644
--- a/tests.lst
+++ b/tests.lst
@@ -51,6 +51,7 @@ apachepoi_52348.xlsx
apachepoi_52716.xlsx
apachepoi_53101.xlsx
apachepoi_53282.xlsx
+apachepoi_53282b.xlsx
apachepoi_53568.xlsx
apachepoi_53734.xlsx
apachepoi_53798.xlsx
@@ -66,6 +67,12 @@ apachepoi_54607.xlsx
apachepoi_55640.xlsx
apachepoi_55745.xlsx
apachepoi_55850.xlsx
+apachepoi_55923.xlsx
+apachepoi_55924.xlsx
+apachepoi_55926.xlsx
+apachepoi_55927.xlsx
+apachepoi_56011.xlsx
+apachepoi_56017.xlsx
apachepoi_AverageTaxRates.xlsx
apachepoi_Booleans.xlsx
apachepoi_BrNotClosed.xlsx
@@ -111,9 +118,10 @@ apachepoi_WithVariousData.xlsx
apachepoi_atp.xlsx
apachepoi_chart_sheet.xlsx.pending
apachepoi_comments.xlsx
+apachepoi_headerFooterTest.xlsx
apachepoi_picture.xlsx
apachepoi_reordered_sheets.xlsx
-apachepoi_sample-beta.xlsx.pending
+apachepoi_sample-beta.xlsx
apachepoi_sample.xlsx
apachepoi_shared_formulas.xlsx
apachepoi_sheetProtection_allLocked.xlsx
@@ -154,7 +162,7 @@ openpyxl_g_empty-with-styles.xlsx
openpyxl_g_empty.xlsx
openpyxl_g_empty_libre.xlsx
openpyxl_g_empty_no_dimensions.xlsx
-openpyxl_g_empty_with_no_properties.xlsx.pending
+openpyxl_g_empty_with_no_properties.xlsx
openpyxl_g_guess_types.xlsx
openpyxl_g_libreoffice_nrt.xlsx
openpyxl_g_merge_range.xlsx
diff --git a/tests/files b/tests/files
index a9fc7e0..dc34131 160000
--- a/tests/files
+++ b/tests/files
@@ -1 +1 @@
-Subproject commit a9fc7e00949b0f7621f0b51e0ed5b139bac8e109
+Subproject commit dc341311b12ebd490dd876086e822eb992f45e49
diff --git a/xlsx.js b/xlsx.js
index fd7ca39..20c54a8 100644
--- a/xlsx.js
+++ b/xlsx.js
@@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
make_ssf(SSF);
var XLSX = {};
(function(XLSX){
-XLSX.version = '0.4.3';
+XLSX.version = '0.5.0';
var current_codepage, current_cptable, cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
@@ -932,7 +932,7 @@ var parse_sst_xml = function(data) {
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m"));
if(isval(sst)) {
- s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; });
+ s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; });
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
@@ -1385,12 +1385,7 @@ function parse_worksheet(data) {
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId) fmtid = cf.numFmtId;
}
- p.raw = p.v;
- p.rawt = p.t;
- try {
- p.v = SSF.format(fmtid,p.v,_ssfopts);
- p.t = 'str';
- } catch(e) { p.v = p.raw; p.t = p.rawt; }
+ try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { }
s[cell.r] = p;
});
@@ -1635,7 +1630,11 @@ var CustomWBViewDef = {
xWindow: '0',
yWindow: '0'
};
-var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
+var XMLNS_WB = [
+ 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
+ 'http://schemas.microsoft.com/office/excel/2006/main',
+ 'http://schemas.microsoft.com/office/excel/2006/2'
+];
/* 18.2 Workbook */
function parse_workbook(data) {
@@ -1739,7 +1738,7 @@ function parse_workbook(data) {
case '': pass=false; break;
}
});
- if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns);
+ if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns);
var z;
/* defaults */
@@ -2656,9 +2655,12 @@ function parseZip(zip) {
if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style);
var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]);
- var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
+ var props = {}, propdata = "";
+ try {
+ propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
- var props = propdata !== "" ? parseProps(propdata) : {};
+ props = propdata !== "" ? parseProps(propdata) : {};
+ } catch(e) { }
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;
@@ -2755,7 +2757,8 @@ function sheet_to_row_object_array(sheet, opts){
for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c:C,r:R})];
if(!val) continue;
- switch(val.t) {
+ if(val.w) hdr[C] = val.w;
+ else switch(val.t) {
case 's': case 'str': hdr[C] = val.v; break;
case 'n': hdr[C] = val.v; break;
}
@@ -2768,7 +2771,7 @@ function sheet_to_row_object_array(sheet, opts){
for (C = r.s.c; C <= r.e.c; ++C) {
val = sheet[encode_cell({c: C,r: R})];
if(!val || !val.t) continue;
- if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; }
+ if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; }
else switch(val.t){
case 's': case 'str': case 'b': case 'n':
if(val.v !== undefined) {