From a96d8964e20ee20c20fe2cd6cd3ff29b258388d9 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Mon, 3 Feb 2014 19:00:44 -0500 Subject: [PATCH] version bump 0.5.0: changes to cell interface Cell object stores the formatted text in the `.w` field - Raw format and types are preserved in the `.t`, `.v` fields - Accessors have been updated to use the field updates for older OOXML: - ECMA-376 makes reference to old sstItem shared string item - [MS-XLSX] xmlns for workbook --- README.md | 33 ++++++++++++++++++++++++++------- bits/31_version.js | 2 +- bits/52_sstxml.js | 2 +- bits/72_wsxml.js | 7 +------ bits/77_wbxml.js | 8 ++++++-- bits/85_parsezip.js | 7 +++++-- bits/90_utils.js | 5 +++-- package.json | 2 +- tests.lst | 12 ++++++++++-- tests/files | 2 +- xlsx.js | 31 +++++++++++++++++-------------- 11 files changed, 72 insertions(+), 39 deletions(-) diff --git a/README.md b/README.md index 1140de4..bcfbf0f 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # xlsx -Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the +Currently a parser for XLSX/XLSM/XLSB files. Cleanroom implementation from the ISO 29500 Office Open XML specifications, [MS-XLSB], and related documents. ## Installation @@ -19,7 +19,7 @@ In the browser: The node version installs a binary `xlsx2csv` which can read XLSX/XLSM/XLSB files and output the contents in various formats. The source is available at `xlsx2csv.njs` in the bin directory. -See for a browser example. +See for a browser example. Note that older versions of IE does not support HTML5 File API, so the base64 mode is provided for testing. On OSX you can get the base64 encoding by running: @@ -39,18 +39,24 @@ Simple usage (walks through every cell of every sheet and dumps the values): Some helper functions in `XLSX.utils` generate different views of the sheets: -- `XLSX.utils.sheet_to_csv` generates CSV +- `XLSX.utils.sheet_to_csv` generates CSV - `XLSX.utils.sheet_to_row_object_array` interprets sheets as tables with a header column and generates an array of objects - `XLSX.utils.get_formulae` generates a list of formulae -## Notes +## Notes `.SheetNames` is an ordered list of the sheets in the workbook `.Sheets[sheetname]` returns a data structure representing the sheet. Each key -that does not start with `!` corresponds to a cell (using `A-1` notation). +that does not start with `!` corresponds to a cell (using `A-1` notation). -`.Sheets[sheetname][address].v` returns the value of the specified cell and `.Sheets[sheetname][address].t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) ) +`.Sheets[sheetname][address]` returns the specified cell: + +- `.v` returns the raw value of the cell +- `.w` returns the formatted text of the cell +- `.t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) ) + +For dates, `.v` holds the raw date code from the sheet and `.w` holds the text For more details: @@ -69,12 +75,25 @@ Tests utilize the mocha testing framework. Travis-CI and Sauce Labs links: - for XLSX module in node - for XLS* modules - - for XLS* modules using Sauce Labs + - for XLS* modules using Sauce Labs ## Test Files Test files are housed in [another repo](https://github.com/SheetJS/test_files). +## Testing + +`make test` will run the node-based tests. To run the in-browser tests, clone +[the oss.sheetjs.com repo](https://github.com/SheetJS/SheetJS.github.io) and +replace the xlsx.js file (then fire up the browser and go to `stress.html`): + +``` +$ cp xlsx.js ../SheetJS.github.io +$ cd ../SheetJS.github.io +$ simplehttpserver # or "python -mSimpleHTTPServer" or "serve" +$ open -a Chromium.app http://localhost:8000/stress.html +``` + ## XLS Support XLS is available in [js-xls](https://github.com/SheetJS/js-xls). diff --git a/bits/31_version.js b/bits/31_version.js index 78e331d..8c49085 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -XLSX.version = '0.4.3'; +XLSX.version = '0.5.0'; diff --git a/bits/52_sstxml.js b/bits/52_sstxml.js index 4f68ec4..078dd5a 100644 --- a/bits/52_sstxml.js +++ b/bits/52_sstxml.js @@ -133,7 +133,7 @@ var parse_sst_xml = function(data) { /* 18.4.9 sst CT_Sst */ var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); if(isval(sst)) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; }); + s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; }); sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; } return s; diff --git a/bits/72_wsxml.js b/bits/72_wsxml.js index d24c851..6eb44b6 100644 --- a/bits/72_wsxml.js +++ b/bits/72_wsxml.js @@ -70,12 +70,7 @@ function parse_worksheet(data) { var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId) fmtid = cf.numFmtId; } - p.raw = p.v; - p.rawt = p.t; - try { - p.v = SSF.format(fmtid,p.v,_ssfopts); - p.t = 'str'; - } catch(e) { p.v = p.raw; p.t = p.rawt; } + try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { } s[cell.r] = p; }); diff --git a/bits/77_wbxml.js b/bits/77_wbxml.js index b2ee513..56bc457 100644 --- a/bits/77_wbxml.js +++ b/bits/77_wbxml.js @@ -1,4 +1,8 @@ -var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; +var XMLNS_WB = [ + 'http://schemas.openxmlformats.org/spreadsheetml/2006/main', + 'http://schemas.microsoft.com/office/excel/2006/main', + 'http://schemas.microsoft.com/office/excel/2006/2' +]; /* 18.2 Workbook */ function parse_workbook(data) { @@ -102,7 +106,7 @@ function parse_workbook(data) { case '': pass=false; break; } }); - if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns); + if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns); var z; /* defaults */ diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js index 64bdfde..3dcd589 100644 --- a/bits/85_parsezip.js +++ b/bits/85_parsezip.js @@ -16,9 +16,12 @@ function parseZip(zip) { if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style); var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]); - var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : ""; + var props = {}, propdata = ""; + try { + propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : ""; propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : ""; - var props = propdata !== "" ? parseProps(propdata) : {}; + props = propdata !== "" ? parseProps(propdata) : {}; + } catch(e) { } var deps = {}; if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0; diff --git a/bits/90_utils.js b/bits/90_utils.js index 60f3454..fd31f2c 100644 --- a/bits/90_utils.js +++ b/bits/90_utils.js @@ -20,7 +20,8 @@ function sheet_to_row_object_array(sheet, opts){ for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) { val = sheet[encode_cell({c:C,r:R})]; if(!val) continue; - switch(val.t) { + if(val.w) hdr[C] = val.w; + else switch(val.t) { case 's': case 'str': hdr[C] = val.v; break; case 'n': hdr[C] = val.v; break; } @@ -33,7 +34,7 @@ function sheet_to_row_object_array(sheet, opts){ for (C = r.s.c; C <= r.e.c; ++C) { val = sheet[encode_cell({c: C,r: R})]; if(!val || !val.t) continue; - if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; } + if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; } else switch(val.t){ case 's': case 'str': case 'b': case 'n': if(val.v !== undefined) { diff --git a/package.json b/package.json index ce5c6f3..bfd3f31 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xlsx", - "version": "0.4.3", + "version": "0.5.0", "author": "sheetjs", "description": "XLSB / XLSX / XLSM parser", "keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ], diff --git a/tests.lst b/tests.lst index 090af1f..354b3ff 100644 --- a/tests.lst +++ b/tests.lst @@ -51,6 +51,7 @@ apachepoi_52348.xlsx apachepoi_52716.xlsx apachepoi_53101.xlsx apachepoi_53282.xlsx +apachepoi_53282b.xlsx apachepoi_53568.xlsx apachepoi_53734.xlsx apachepoi_53798.xlsx @@ -66,6 +67,12 @@ apachepoi_54607.xlsx apachepoi_55640.xlsx apachepoi_55745.xlsx apachepoi_55850.xlsx +apachepoi_55923.xlsx +apachepoi_55924.xlsx +apachepoi_55926.xlsx +apachepoi_55927.xlsx +apachepoi_56011.xlsx +apachepoi_56017.xlsx apachepoi_AverageTaxRates.xlsx apachepoi_Booleans.xlsx apachepoi_BrNotClosed.xlsx @@ -111,9 +118,10 @@ apachepoi_WithVariousData.xlsx apachepoi_atp.xlsx apachepoi_chart_sheet.xlsx.pending apachepoi_comments.xlsx +apachepoi_headerFooterTest.xlsx apachepoi_picture.xlsx apachepoi_reordered_sheets.xlsx -apachepoi_sample-beta.xlsx.pending +apachepoi_sample-beta.xlsx apachepoi_sample.xlsx apachepoi_shared_formulas.xlsx apachepoi_sheetProtection_allLocked.xlsx @@ -154,7 +162,7 @@ openpyxl_g_empty-with-styles.xlsx openpyxl_g_empty.xlsx openpyxl_g_empty_libre.xlsx openpyxl_g_empty_no_dimensions.xlsx -openpyxl_g_empty_with_no_properties.xlsx.pending +openpyxl_g_empty_with_no_properties.xlsx openpyxl_g_guess_types.xlsx openpyxl_g_libreoffice_nrt.xlsx openpyxl_g_merge_range.xlsx diff --git a/tests/files b/tests/files index a9fc7e0..dc34131 160000 --- a/tests/files +++ b/tests/files @@ -1 +1 @@ -Subproject commit a9fc7e00949b0f7621f0b51e0ed5b139bac8e109 +Subproject commit dc341311b12ebd490dd876086e822eb992f45e49 diff --git a/xlsx.js b/xlsx.js index fd7ca39..20c54a8 100644 --- a/xlsx.js +++ b/xlsx.js @@ -420,7 +420,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa make_ssf(SSF); var XLSX = {}; (function(XLSX){ -XLSX.version = '0.4.3'; +XLSX.version = '0.5.0'; var current_codepage, current_cptable, cptable; if(typeof module !== "undefined" && typeof require !== 'undefined') { if(typeof cptable === 'undefined') cptable = require('codepage'); @@ -932,7 +932,7 @@ var parse_sst_xml = function(data) { /* 18.4.9 sst CT_Sst */ var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); if(isval(sst)) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si).filter(function(x) { return x; }); + s = sst[2].replace(/<(?:si|sstItem)>/g,"").split(/<\/(?:si|sstItem)>/).map(parse_si).filter(function(x) { return x; }); sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; } return s; @@ -1385,12 +1385,7 @@ function parse_worksheet(data) { var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId) fmtid = cf.numFmtId; } - p.raw = p.v; - p.rawt = p.t; - try { - p.v = SSF.format(fmtid,p.v,_ssfopts); - p.t = 'str'; - } catch(e) { p.v = p.raw; p.t = p.rawt; } + try { p.w = SSF.format(fmtid,p.v,_ssfopts); } catch(e) { } s[cell.r] = p; }); @@ -1635,7 +1630,11 @@ var CustomWBViewDef = { xWindow: '0', yWindow: '0' }; -var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; +var XMLNS_WB = [ + 'http://schemas.openxmlformats.org/spreadsheetml/2006/main', + 'http://schemas.microsoft.com/office/excel/2006/main', + 'http://schemas.microsoft.com/office/excel/2006/2' +]; /* 18.2 Workbook */ function parse_workbook(data) { @@ -1739,7 +1738,7 @@ function parse_workbook(data) { case '': pass=false; break; } }); - if(wb.xmlns !== XMLNS_WB) throw new Error("Unknown Namespace: " + wb.xmlns); + if(XMLNS_WB.indexOf(wb.xmlns) === -1) throw new Error("Unknown Namespace: " + wb.xmlns); var z; /* defaults */ @@ -2656,9 +2655,12 @@ function parseZip(zip) { if(dir.style) styles = parse_sty(getdata(getzipfile(zip, dir.style.replace(/^\//,''))),dir.style); var wb = parse_wb(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))), dir.workbooks[0]); - var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : ""; + var props = {}, propdata = ""; + try { + propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : ""; propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : ""; - var props = propdata !== "" ? parseProps(propdata) : {}; + props = propdata !== "" ? parseProps(propdata) : {}; + } catch(e) { } var deps = {}; if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0; @@ -2755,7 +2757,8 @@ function sheet_to_row_object_array(sheet, opts){ for(R=r.s.r, C = r.s.c; C <= r.e.c; ++C) { val = sheet[encode_cell({c:C,r:R})]; if(!val) continue; - switch(val.t) { + if(val.w) hdr[C] = val.w; + else switch(val.t) { case 's': case 'str': hdr[C] = val.v; break; case 'n': hdr[C] = val.v; break; } @@ -2768,7 +2771,7 @@ function sheet_to_row_object_array(sheet, opts){ for (C = r.s.c; C <= r.e.c; ++C) { val = sheet[encode_cell({c: C,r: R})]; if(!val || !val.t) continue; - if(typeof val.w !== 'undefined') { row[hdr[C]] = val.w; isempty = false; } + if(typeof val.w !== 'undefined' && !opts.raw) { row[hdr[C]] = val.w; isempty = false; } else switch(val.t){ case 's': case 'str': case 'b': case 'n': if(val.v !== undefined) {