version bump 0.3.3: malformed xlsx parsing

Files that cannot be processed by Excel 2011 were removed from the suite
This commit is contained in:
SheetJS 2013-11-12 08:29:53 -08:00
parent 2cdd09f801
commit b4f7030634
7 changed files with 71 additions and 44 deletions

View File

@ -98,6 +98,7 @@ var _ssfopts = {}; // spreadsheet formatting options
/* 18.3 Worksheets */
function parseSheet(data) {
if(!data) return data;
/* 18.3.1.99 worksheet CT_Worksheet */
var s = {};
@ -146,7 +147,7 @@ function parseSheet(data) {
} break;
case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string
case 'inlineStr':
p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]);
p.t = 'str'; p.v = unescapexml((d.match(matchtag('t'))||["",""])[1]);
break; // inline string
case 'b':
switch(p.v) {
@ -160,7 +161,7 @@ function parseSheet(data) {
}
/* formatting */
if(cell.s) {
if(cell.s && styles.CellXf) { /* TODO: second check is a hacked guard */
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId && cf.numFmtId !== 0) {
p.raw = p.v;
@ -238,10 +239,10 @@ function parseDeps(data) {
var ctext = {};
function parseCT(data) {
if(!data) return data;
if(!data || !data.match) return data;
var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
coreprops: [], extprops: [], strs:[], xmlns: "" };
data.match(/<[^>]*>/g).forEach(function(x) {
(data.match(/<[^>]*>/g)||[]).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
case '<?xml': break;
@ -446,29 +447,36 @@ function parseStyles(data) {
}
function getdata(data) {
if(!data) return {};
if(!data) return null;
if(data.data) return data.data;
if(data._data && data._data.getContent) return Array.prototype.slice.call(data._data.getContent(),0).map(function(x) { return String.fromCharCode(x); }).join("");
return {};
return null;
}
function getzipfile(zip, file) {
var f = file; if(zip.files[f]) return zip.files[f];
f = file.toLowerCase(); if(zip.files[f]) return zip.files[f];
f = f.replace(/\//g,'\\'); if(zip.files[f]) return zip.files[f];
throw new Error("Cannot find file " + file + " in zip")
}
function parseZip(zip) {
var entries = Object.keys(zip.files);
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
var dir = parseCT(getdata(zip.files['[Content_Types].xml']));
var dir = parseCT(getdata(getzipfile(zip, '[Content_Types].xml')));
strs = {};
if(dir.sst) strs=parse_sst(getdata(zip.files[dir.sst.replace(/^\//,'')]));
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))));
styles = {};
if(dir.style) styles = parseStyles(getdata(zip.files[dir.style.replace(/^\//,'')]));
if(dir.style) styles = parseStyles(getdata(getzipfile(zip, dir.style.replace(/^\//,''))));
var wb = parseWB(getdata(zip.files[dir.workbooks[0].replace(/^\//,'')]));
var propdata = dir.coreprops.length !== 0 ? getdata(zip.files[dir.coreprops[0].replace(/^\//,'')]) : "";
propdata += dir.extprops.length !== 0 ? getdata(zip.files[dir.extprops[0].replace(/^\//,'')]) : "";
var wb = parseWB(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))));
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
var props = propdata !== "" ? parseProps(propdata) : {};
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(zip.files[dir.calcchain.replace(/^\//,'')]));
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;
if(!props.Worksheets) {
/* Google Docs doesn't generate the appropriate metadata, so we impute: */
@ -479,12 +487,16 @@ function parseZip(zip) {
props.SheetNames[j] = wbsheets[j].name;
}
for(i = 0; i != props.Worksheets; ++i) {
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files['xl/worksheets/sheet' + (i+1) + '.xml']));
try { /* TODO: remove these guards */
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml')));
} catch(e) {}
}
}
else {
for(i = 0; i != props.Worksheets; ++i) {
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files[dir.sheets[i].replace(/^\//,'')]));
try {
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,''))));
} catch(e) {}
}
}
return {

View File

@ -76,11 +76,11 @@ function sheet_to_csv(sheet) {
};
var out = "";
if(sheet["!ref"]) {
var r = utils.decode_range(sheet["!ref"]);
var r = XLSX.utils.decode_range(sheet["!ref"]);
for(var R = r.s.r; R <= r.e.r; ++R) {
var row = [];
for(var C = r.s.c; C <= r.e.c; ++C) {
var val = sheet[utils.encode_cell({c:C,r:R})];
var val = sheet[XLSX.utils.encode_cell({c:C,r:R})];
row.push(val ? stringify(val).replace(/\\r\\n/g,"\n").replace(/\\t/g,"\t").replace(/\\\\/g,"\\").replace("\\\"","\"\"") : "");
}
out += row.join(",") + "\n";
@ -103,7 +103,7 @@ function get_formulae(ws) {
return cmds;
}
var utils = {
XLSX.utils = {
encode_col: encode_col,
encode_row: encode_row,
encode_cell: encode_cell,

View File

@ -2,7 +2,7 @@
if(typeof require !== 'undefined' && typeof exports !== 'undefined') {
exports.read = XLSX.read;
exports.readFile = XLSX.readFile;
exports.utils = utils;
exports.utils = XLSX.utils;
exports.main = function(args) {
var zip = XLSX.read(args[0], {type:'file'});
console.log(zip.Sheets);

View File

@ -1,4 +1,8 @@
<!DOCTYPE html>
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>JS-XLSX Live Demo</title>
<style>
#drop{
border:2px dashed #bbb;
@ -13,6 +17,8 @@
width:100%;
}
</style>
</head>
<body>
<b>JS-XLSX Live Demo</b><br />
<input type="radio" name="format" value="csv" checked> CSV<br>
<input type="radio" name="format" value="json"> JSON<br>
@ -127,3 +133,5 @@ if(drop.addEventListener) {
drop.addEventListener('drop', handleDrop, false);
}
</script>
</body>
</html>

View File

@ -1,11 +1,9 @@
{
"name": "xlsx",
"version": "0.3.2",
"version": "0.3.3",
"author": "sheetjs",
"description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser",
"keywords": [
"xlsx", "office", "excel", "spreadsheet"
],
"keywords": [ "xlsx", "office", "excel", "spreadsheet" ],
"bin": {
"xlsx2csv": "./bin/xlsx2csv.njs"
},
@ -18,10 +16,7 @@
"mocha":"",
"jasmine-node": "x"
},
"repository": {
"type":"git",
"url": "git://github.com/SheetJS/js-xlsx.git"
},
"repository": { "type":"git", "url":"git://github.com/SheetJS/js-xlsx.git" },
"scripts": {
"pretest": "git submodule init && git submodule update",
"test": "make mocha",

View File

@ -11,7 +11,7 @@
47668.xlsx
47737.xlsx
47804.xlsx
47813.xlsx
47813.xlsx.pending
47862.xlsx
47889.xlsx
48495.xlsx
@ -96,7 +96,7 @@ WithThreeCharts.xlsx
WithTwoCharts.xlsx
WithVariousData.xlsx
atp.xlsx
chart_sheet.xlsx
chart_sheet.xlsx.pending
comments.xlsx
formula_stress_test.xlsx.pending
interview.xlsx
@ -105,7 +105,7 @@ mixed_sheets.xlsx
named_ranges_2011.xlsx
picture.xlsx
reordered_sheets.xlsx
sample-beta.xlsx
sample-beta.xlsx.pending
sample.xlsx
shared_formulas.xlsx
sheetProtection_allLocked.xlsx

40
xlsx.js
View File

@ -529,6 +529,7 @@ var _ssfopts = {}; // spreadsheet formatting options
/* 18.3 Worksheets */
function parseSheet(data) {
if(!data) return data;
/* 18.3.1.99 worksheet CT_Worksheet */
var s = {};
@ -577,7 +578,7 @@ function parseSheet(data) {
} break;
case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string
case 'inlineStr':
p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]);
p.t = 'str'; p.v = unescapexml((d.match(matchtag('t'))||["",""])[1]);
break; // inline string
case 'b':
switch(p.v) {
@ -591,7 +592,7 @@ function parseSheet(data) {
}
/* formatting */
if(cell.s) {
if(cell.s && styles.CellXf) { /* TODO: second check is a hacked guard */
var cf = styles.CellXf[cell.s];
if(cf && cf.numFmtId && cf.numFmtId !== 0) {
p.raw = p.v;
@ -669,7 +670,7 @@ function parseDeps(data) {
var ctext = {};
function parseCT(data) {
if(!data) return data;
if(!data || !data.match) return data;
var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
coreprops: [], extprops: [], strs:[], xmlns: "" };
(data.match(/<[^>]*>/g)||[]).forEach(function(x) {
@ -877,29 +878,36 @@ function parseStyles(data) {
}
function getdata(data) {
if(!data) return {};
if(!data) return null;
if(data.data) return data.data;
if(data._data && data._data.getContent) return Array.prototype.slice.call(data._data.getContent(),0).map(function(x) { return String.fromCharCode(x); }).join("");
return {};
return null;
}
function getzipfile(zip, file) {
var f = file; if(zip.files[f]) return zip.files[f];
f = file.toLowerCase(); if(zip.files[f]) return zip.files[f];
f = f.replace(/\//g,'\\'); if(zip.files[f]) return zip.files[f];
throw new Error("Cannot find file " + file + " in zip")
}
function parseZip(zip) {
var entries = Object.keys(zip.files);
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
var dir = parseCT(getdata(zip.files['[Content_Types].xml']));
var dir = parseCT(getdata(getzipfile(zip, '[Content_Types].xml')));
strs = {};
if(dir.sst) strs=parse_sst(getdata(zip.files[dir.sst.replace(/^\//,'')]));
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))));
styles = {};
if(dir.style) styles = parseStyles(getdata(zip.files[dir.style.replace(/^\//,'')]));
if(dir.style) styles = parseStyles(getdata(getzipfile(zip, dir.style.replace(/^\//,''))));
var wb = parseWB(getdata(zip.files[dir.workbooks[0].replace(/^\//,'')]));
var propdata = dir.coreprops.length !== 0 ? getdata(zip.files[dir.coreprops[0].replace(/^\//,'')]) : "";
propdata += dir.extprops.length !== 0 ? getdata(zip.files[dir.extprops[0].replace(/^\//,'')]) : "";
var wb = parseWB(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))));
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
var props = propdata !== "" ? parseProps(propdata) : {};
var deps = {};
if(dir.calcchain) deps=parseDeps(getdata(zip.files[dir.calcchain.replace(/^\//,'')]));
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
var sheets = {}, i=0;
if(!props.Worksheets) {
/* Google Docs doesn't generate the appropriate metadata, so we impute: */
@ -910,12 +918,16 @@ function parseZip(zip) {
props.SheetNames[j] = wbsheets[j].name;
}
for(i = 0; i != props.Worksheets; ++i) {
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files['xl/worksheets/sheet' + (i+1) + '.xml']));
try { /* TODO: remove these guards */
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml')));
} catch(e) {}
}
}
else {
for(i = 0; i != props.Worksheets; ++i) {
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files[dir.sheets[i].replace(/^\//,'')]));
try {
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,''))));
} catch(e) {}
}
}
return {