From b4f703063414948e6af5bc028948a517483cf586 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Tue, 12 Nov 2013 08:29:53 -0800 Subject: [PATCH] version bump 0.3.3: malformed xlsx parsing Files that cannot be processed by Excel 2011 were removed from the suite --- bits/70_xlsx.js | 42 +++++++++++++++++++++++++++--------------- bits/90_utils.js | 6 +++--- bits/99_footer.js | 2 +- index.html | 8 ++++++++ package.json | 11 +++-------- tests.lst | 6 +++--- xlsx.js | 40 ++++++++++++++++++++++++++-------------- 7 files changed, 71 insertions(+), 44 deletions(-) diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index 498bcb5..5be23ed 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -98,6 +98,7 @@ var _ssfopts = {}; // spreadsheet formatting options /* 18.3 Worksheets */ function parseSheet(data) { + if(!data) return data; /* 18.3.1.99 worksheet CT_Worksheet */ var s = {}; @@ -146,7 +147,7 @@ function parseSheet(data) { } break; case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string case 'inlineStr': - p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]); + p.t = 'str'; p.v = unescapexml((d.match(matchtag('t'))||["",""])[1]); break; // inline string case 'b': switch(p.v) { @@ -160,7 +161,7 @@ function parseSheet(data) { } /* formatting */ - if(cell.s) { + if(cell.s && styles.CellXf) { /* TODO: second check is a hacked guard */ var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId && cf.numFmtId !== 0) { p.raw = p.v; @@ -238,10 +239,10 @@ function parseDeps(data) { var ctext = {}; function parseCT(data) { - if(!data) return data; + if(!data || !data.match) return data; var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [], coreprops: [], extprops: [], strs:[], xmlns: "" }; - data.match(/<[^>]*>/g).forEach(function(x) { + (data.match(/<[^>]*>/g)||[]).forEach(function(x) { var y = parsexmltag(x); switch(y[0]) { case ' + + + +JS-XLSX Live Demo + + JS-XLSX Live Demo
CSV
JSON
@@ -127,3 +133,5 @@ if(drop.addEventListener) { drop.addEventListener('drop', handleDrop, false); } + + diff --git a/package.json b/package.json index ca0a087..9bf63cf 100644 --- a/package.json +++ b/package.json @@ -1,11 +1,9 @@ { "name": "xlsx", - "version": "0.3.2", + "version": "0.3.3", "author": "sheetjs", "description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser", - "keywords": [ - "xlsx", "office", "excel", "spreadsheet" - ], + "keywords": [ "xlsx", "office", "excel", "spreadsheet" ], "bin": { "xlsx2csv": "./bin/xlsx2csv.njs" }, @@ -18,10 +16,7 @@ "mocha":"", "jasmine-node": "x" }, - "repository": { - "type":"git", - "url": "git://github.com/SheetJS/js-xlsx.git" - }, + "repository": { "type":"git", "url":"git://github.com/SheetJS/js-xlsx.git" }, "scripts": { "pretest": "git submodule init && git submodule update", "test": "make mocha", diff --git a/tests.lst b/tests.lst index 065d938..8e37f60 100644 --- a/tests.lst +++ b/tests.lst @@ -11,7 +11,7 @@ 47668.xlsx 47737.xlsx 47804.xlsx -47813.xlsx +47813.xlsx.pending 47862.xlsx 47889.xlsx 48495.xlsx @@ -96,7 +96,7 @@ WithThreeCharts.xlsx WithTwoCharts.xlsx WithVariousData.xlsx atp.xlsx -chart_sheet.xlsx +chart_sheet.xlsx.pending comments.xlsx formula_stress_test.xlsx.pending interview.xlsx @@ -105,7 +105,7 @@ mixed_sheets.xlsx named_ranges_2011.xlsx picture.xlsx reordered_sheets.xlsx -sample-beta.xlsx +sample-beta.xlsx.pending sample.xlsx shared_formulas.xlsx sheetProtection_allLocked.xlsx diff --git a/xlsx.js b/xlsx.js index 1c030f8..052f607 100644 --- a/xlsx.js +++ b/xlsx.js @@ -529,6 +529,7 @@ var _ssfopts = {}; // spreadsheet formatting options /* 18.3 Worksheets */ function parseSheet(data) { + if(!data) return data; /* 18.3.1.99 worksheet CT_Worksheet */ var s = {}; @@ -577,7 +578,7 @@ function parseSheet(data) { } break; case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string case 'inlineStr': - p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]); + p.t = 'str'; p.v = unescapexml((d.match(matchtag('t'))||["",""])[1]); break; // inline string case 'b': switch(p.v) { @@ -591,7 +592,7 @@ function parseSheet(data) { } /* formatting */ - if(cell.s) { + if(cell.s && styles.CellXf) { /* TODO: second check is a hacked guard */ var cf = styles.CellXf[cell.s]; if(cf && cf.numFmtId && cf.numFmtId !== 0) { p.raw = p.v; @@ -669,7 +670,7 @@ function parseDeps(data) { var ctext = {}; function parseCT(data) { - if(!data) return data; + if(!data || !data.match) return data; var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [], coreprops: [], extprops: [], strs:[], xmlns: "" }; (data.match(/<[^>]*>/g)||[]).forEach(function(x) { @@ -877,29 +878,36 @@ function parseStyles(data) { } function getdata(data) { - if(!data) return {}; + if(!data) return null; if(data.data) return data.data; if(data._data && data._data.getContent) return Array.prototype.slice.call(data._data.getContent(),0).map(function(x) { return String.fromCharCode(x); }).join(""); - return {}; + return null; +} + +function getzipfile(zip, file) { + var f = file; if(zip.files[f]) return zip.files[f]; + f = file.toLowerCase(); if(zip.files[f]) return zip.files[f]; + f = f.replace(/\//g,'\\'); if(zip.files[f]) return zip.files[f]; + throw new Error("Cannot find file " + file + " in zip") } function parseZip(zip) { var entries = Object.keys(zip.files); var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort(); - var dir = parseCT(getdata(zip.files['[Content_Types].xml'])); + var dir = parseCT(getdata(getzipfile(zip, '[Content_Types].xml'))); strs = {}; - if(dir.sst) strs=parse_sst(getdata(zip.files[dir.sst.replace(/^\//,'')])); + if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,'')))); styles = {}; - if(dir.style) styles = parseStyles(getdata(zip.files[dir.style.replace(/^\//,'')])); + if(dir.style) styles = parseStyles(getdata(getzipfile(zip, dir.style.replace(/^\//,'')))); - var wb = parseWB(getdata(zip.files[dir.workbooks[0].replace(/^\//,'')])); - var propdata = dir.coreprops.length !== 0 ? getdata(zip.files[dir.coreprops[0].replace(/^\//,'')]) : ""; - propdata += dir.extprops.length !== 0 ? getdata(zip.files[dir.extprops[0].replace(/^\//,'')]) : ""; + var wb = parseWB(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,'')))); + var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : ""; + propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : ""; var props = propdata !== "" ? parseProps(propdata) : {}; var deps = {}; - if(dir.calcchain) deps=parseDeps(getdata(zip.files[dir.calcchain.replace(/^\//,'')])); + if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0; if(!props.Worksheets) { /* Google Docs doesn't generate the appropriate metadata, so we impute: */ @@ -910,12 +918,16 @@ function parseZip(zip) { props.SheetNames[j] = wbsheets[j].name; } for(i = 0; i != props.Worksheets; ++i) { - sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files['xl/worksheets/sheet' + (i+1) + '.xml'])); + try { /* TODO: remove these guards */ + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml'))); + } catch(e) {} } } else { for(i = 0; i != props.Worksheets; ++i) { - sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files[dir.sheets[i].replace(/^\//,'')])); + try { + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,'')))); + } catch(e) {} } } return {