forked from sheetjs/sheetjs
version bump 0.3.3: malformed xlsx parsing
Files that cannot be processed by Excel 2011 were removed from the suite
This commit is contained in:
parent
2cdd09f801
commit
b4f7030634
@ -98,6 +98,7 @@ var _ssfopts = {}; // spreadsheet formatting options
|
||||
|
||||
/* 18.3 Worksheets */
|
||||
function parseSheet(data) {
|
||||
if(!data) return data;
|
||||
/* 18.3.1.99 worksheet CT_Worksheet */
|
||||
var s = {};
|
||||
|
||||
@ -146,7 +147,7 @@ function parseSheet(data) {
|
||||
} break;
|
||||
case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string
|
||||
case 'inlineStr':
|
||||
p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]);
|
||||
p.t = 'str'; p.v = unescapexml((d.match(matchtag('t'))||["",""])[1]);
|
||||
break; // inline string
|
||||
case 'b':
|
||||
switch(p.v) {
|
||||
@ -160,7 +161,7 @@ function parseSheet(data) {
|
||||
}
|
||||
|
||||
/* formatting */
|
||||
if(cell.s) {
|
||||
if(cell.s && styles.CellXf) { /* TODO: second check is a hacked guard */
|
||||
var cf = styles.CellXf[cell.s];
|
||||
if(cf && cf.numFmtId && cf.numFmtId !== 0) {
|
||||
p.raw = p.v;
|
||||
@ -238,10 +239,10 @@ function parseDeps(data) {
|
||||
var ctext = {};
|
||||
|
||||
function parseCT(data) {
|
||||
if(!data) return data;
|
||||
if(!data || !data.match) return data;
|
||||
var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
|
||||
coreprops: [], extprops: [], strs:[], xmlns: "" };
|
||||
data.match(/<[^>]*>/g).forEach(function(x) {
|
||||
(data.match(/<[^>]*>/g)||[]).forEach(function(x) {
|
||||
var y = parsexmltag(x);
|
||||
switch(y[0]) {
|
||||
case '<?xml': break;
|
||||
@ -446,29 +447,36 @@ function parseStyles(data) {
|
||||
}
|
||||
|
||||
function getdata(data) {
|
||||
if(!data) return {};
|
||||
if(!data) return null;
|
||||
if(data.data) return data.data;
|
||||
if(data._data && data._data.getContent) return Array.prototype.slice.call(data._data.getContent(),0).map(function(x) { return String.fromCharCode(x); }).join("");
|
||||
return {};
|
||||
return null;
|
||||
}
|
||||
|
||||
function getzipfile(zip, file) {
|
||||
var f = file; if(zip.files[f]) return zip.files[f];
|
||||
f = file.toLowerCase(); if(zip.files[f]) return zip.files[f];
|
||||
f = f.replace(/\//g,'\\'); if(zip.files[f]) return zip.files[f];
|
||||
throw new Error("Cannot find file " + file + " in zip")
|
||||
}
|
||||
|
||||
function parseZip(zip) {
|
||||
var entries = Object.keys(zip.files);
|
||||
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
|
||||
var dir = parseCT(getdata(zip.files['[Content_Types].xml']));
|
||||
var dir = parseCT(getdata(getzipfile(zip, '[Content_Types].xml')));
|
||||
|
||||
strs = {};
|
||||
if(dir.sst) strs=parse_sst(getdata(zip.files[dir.sst.replace(/^\//,'')]));
|
||||
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))));
|
||||
|
||||
styles = {};
|
||||
if(dir.style) styles = parseStyles(getdata(zip.files[dir.style.replace(/^\//,'')]));
|
||||
if(dir.style) styles = parseStyles(getdata(getzipfile(zip, dir.style.replace(/^\//,''))));
|
||||
|
||||
var wb = parseWB(getdata(zip.files[dir.workbooks[0].replace(/^\//,'')]));
|
||||
var propdata = dir.coreprops.length !== 0 ? getdata(zip.files[dir.coreprops[0].replace(/^\//,'')]) : "";
|
||||
propdata += dir.extprops.length !== 0 ? getdata(zip.files[dir.extprops[0].replace(/^\//,'')]) : "";
|
||||
var wb = parseWB(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))));
|
||||
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
|
||||
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
|
||||
var props = propdata !== "" ? parseProps(propdata) : {};
|
||||
var deps = {};
|
||||
if(dir.calcchain) deps=parseDeps(getdata(zip.files[dir.calcchain.replace(/^\//,'')]));
|
||||
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
|
||||
var sheets = {}, i=0;
|
||||
if(!props.Worksheets) {
|
||||
/* Google Docs doesn't generate the appropriate metadata, so we impute: */
|
||||
@ -479,12 +487,16 @@ function parseZip(zip) {
|
||||
props.SheetNames[j] = wbsheets[j].name;
|
||||
}
|
||||
for(i = 0; i != props.Worksheets; ++i) {
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files['xl/worksheets/sheet' + (i+1) + '.xml']));
|
||||
try { /* TODO: remove these guards */
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml')));
|
||||
} catch(e) {}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for(i = 0; i != props.Worksheets; ++i) {
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files[dir.sheets[i].replace(/^\//,'')]));
|
||||
try {
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,''))));
|
||||
} catch(e) {}
|
||||
}
|
||||
}
|
||||
return {
|
||||
|
@ -76,11 +76,11 @@ function sheet_to_csv(sheet) {
|
||||
};
|
||||
var out = "";
|
||||
if(sheet["!ref"]) {
|
||||
var r = utils.decode_range(sheet["!ref"]);
|
||||
var r = XLSX.utils.decode_range(sheet["!ref"]);
|
||||
for(var R = r.s.r; R <= r.e.r; ++R) {
|
||||
var row = [];
|
||||
for(var C = r.s.c; C <= r.e.c; ++C) {
|
||||
var val = sheet[utils.encode_cell({c:C,r:R})];
|
||||
var val = sheet[XLSX.utils.encode_cell({c:C,r:R})];
|
||||
row.push(val ? stringify(val).replace(/\\r\\n/g,"\n").replace(/\\t/g,"\t").replace(/\\\\/g,"\\").replace("\\\"","\"\"") : "");
|
||||
}
|
||||
out += row.join(",") + "\n";
|
||||
@ -103,7 +103,7 @@ function get_formulae(ws) {
|
||||
return cmds;
|
||||
}
|
||||
|
||||
var utils = {
|
||||
XLSX.utils = {
|
||||
encode_col: encode_col,
|
||||
encode_row: encode_row,
|
||||
encode_cell: encode_cell,
|
||||
|
@ -2,7 +2,7 @@
|
||||
if(typeof require !== 'undefined' && typeof exports !== 'undefined') {
|
||||
exports.read = XLSX.read;
|
||||
exports.readFile = XLSX.readFile;
|
||||
exports.utils = utils;
|
||||
exports.utils = XLSX.utils;
|
||||
exports.main = function(args) {
|
||||
var zip = XLSX.read(args[0], {type:'file'});
|
||||
console.log(zip.Sheets);
|
||||
|
@ -1,4 +1,8 @@
|
||||
<!DOCTYPE html>
|
||||
<html>
|
||||
<head>
|
||||
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
|
||||
<title>JS-XLSX Live Demo</title>
|
||||
<style>
|
||||
#drop{
|
||||
border:2px dashed #bbb;
|
||||
@ -13,6 +17,8 @@
|
||||
width:100%;
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<b>JS-XLSX Live Demo</b><br />
|
||||
<input type="radio" name="format" value="csv" checked> CSV<br>
|
||||
<input type="radio" name="format" value="json"> JSON<br>
|
||||
@ -127,3 +133,5 @@ if(drop.addEventListener) {
|
||||
drop.addEventListener('drop', handleDrop, false);
|
||||
}
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
11
package.json
11
package.json
@ -1,11 +1,9 @@
|
||||
{
|
||||
"name": "xlsx",
|
||||
"version": "0.3.2",
|
||||
"version": "0.3.3",
|
||||
"author": "sheetjs",
|
||||
"description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser",
|
||||
"keywords": [
|
||||
"xlsx", "office", "excel", "spreadsheet"
|
||||
],
|
||||
"keywords": [ "xlsx", "office", "excel", "spreadsheet" ],
|
||||
"bin": {
|
||||
"xlsx2csv": "./bin/xlsx2csv.njs"
|
||||
},
|
||||
@ -18,10 +16,7 @@
|
||||
"mocha":"",
|
||||
"jasmine-node": "x"
|
||||
},
|
||||
"repository": {
|
||||
"type":"git",
|
||||
"url": "git://github.com/SheetJS/js-xlsx.git"
|
||||
},
|
||||
"repository": { "type":"git", "url":"git://github.com/SheetJS/js-xlsx.git" },
|
||||
"scripts": {
|
||||
"pretest": "git submodule init && git submodule update",
|
||||
"test": "make mocha",
|
||||
|
@ -11,7 +11,7 @@
|
||||
47668.xlsx
|
||||
47737.xlsx
|
||||
47804.xlsx
|
||||
47813.xlsx
|
||||
47813.xlsx.pending
|
||||
47862.xlsx
|
||||
47889.xlsx
|
||||
48495.xlsx
|
||||
@ -96,7 +96,7 @@ WithThreeCharts.xlsx
|
||||
WithTwoCharts.xlsx
|
||||
WithVariousData.xlsx
|
||||
atp.xlsx
|
||||
chart_sheet.xlsx
|
||||
chart_sheet.xlsx.pending
|
||||
comments.xlsx
|
||||
formula_stress_test.xlsx.pending
|
||||
interview.xlsx
|
||||
@ -105,7 +105,7 @@ mixed_sheets.xlsx
|
||||
named_ranges_2011.xlsx
|
||||
picture.xlsx
|
||||
reordered_sheets.xlsx
|
||||
sample-beta.xlsx
|
||||
sample-beta.xlsx.pending
|
||||
sample.xlsx
|
||||
shared_formulas.xlsx
|
||||
sheetProtection_allLocked.xlsx
|
||||
|
40
xlsx.js
40
xlsx.js
@ -529,6 +529,7 @@ var _ssfopts = {}; // spreadsheet formatting options
|
||||
|
||||
/* 18.3 Worksheets */
|
||||
function parseSheet(data) {
|
||||
if(!data) return data;
|
||||
/* 18.3.1.99 worksheet CT_Worksheet */
|
||||
var s = {};
|
||||
|
||||
@ -577,7 +578,7 @@ function parseSheet(data) {
|
||||
} break;
|
||||
case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string
|
||||
case 'inlineStr':
|
||||
p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]);
|
||||
p.t = 'str'; p.v = unescapexml((d.match(matchtag('t'))||["",""])[1]);
|
||||
break; // inline string
|
||||
case 'b':
|
||||
switch(p.v) {
|
||||
@ -591,7 +592,7 @@ function parseSheet(data) {
|
||||
}
|
||||
|
||||
/* formatting */
|
||||
if(cell.s) {
|
||||
if(cell.s && styles.CellXf) { /* TODO: second check is a hacked guard */
|
||||
var cf = styles.CellXf[cell.s];
|
||||
if(cf && cf.numFmtId && cf.numFmtId !== 0) {
|
||||
p.raw = p.v;
|
||||
@ -669,7 +670,7 @@ function parseDeps(data) {
|
||||
var ctext = {};
|
||||
|
||||
function parseCT(data) {
|
||||
if(!data) return data;
|
||||
if(!data || !data.match) return data;
|
||||
var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
|
||||
coreprops: [], extprops: [], strs:[], xmlns: "" };
|
||||
(data.match(/<[^>]*>/g)||[]).forEach(function(x) {
|
||||
@ -877,29 +878,36 @@ function parseStyles(data) {
|
||||
}
|
||||
|
||||
function getdata(data) {
|
||||
if(!data) return {};
|
||||
if(!data) return null;
|
||||
if(data.data) return data.data;
|
||||
if(data._data && data._data.getContent) return Array.prototype.slice.call(data._data.getContent(),0).map(function(x) { return String.fromCharCode(x); }).join("");
|
||||
return {};
|
||||
return null;
|
||||
}
|
||||
|
||||
function getzipfile(zip, file) {
|
||||
var f = file; if(zip.files[f]) return zip.files[f];
|
||||
f = file.toLowerCase(); if(zip.files[f]) return zip.files[f];
|
||||
f = f.replace(/\//g,'\\'); if(zip.files[f]) return zip.files[f];
|
||||
throw new Error("Cannot find file " + file + " in zip")
|
||||
}
|
||||
|
||||
function parseZip(zip) {
|
||||
var entries = Object.keys(zip.files);
|
||||
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
|
||||
var dir = parseCT(getdata(zip.files['[Content_Types].xml']));
|
||||
var dir = parseCT(getdata(getzipfile(zip, '[Content_Types].xml')));
|
||||
|
||||
strs = {};
|
||||
if(dir.sst) strs=parse_sst(getdata(zip.files[dir.sst.replace(/^\//,'')]));
|
||||
if(dir.sst) strs=parse_sst(getdata(getzipfile(zip, dir.sst.replace(/^\//,''))));
|
||||
|
||||
styles = {};
|
||||
if(dir.style) styles = parseStyles(getdata(zip.files[dir.style.replace(/^\//,'')]));
|
||||
if(dir.style) styles = parseStyles(getdata(getzipfile(zip, dir.style.replace(/^\//,''))));
|
||||
|
||||
var wb = parseWB(getdata(zip.files[dir.workbooks[0].replace(/^\//,'')]));
|
||||
var propdata = dir.coreprops.length !== 0 ? getdata(zip.files[dir.coreprops[0].replace(/^\//,'')]) : "";
|
||||
propdata += dir.extprops.length !== 0 ? getdata(zip.files[dir.extprops[0].replace(/^\//,'')]) : "";
|
||||
var wb = parseWB(getdata(getzipfile(zip, dir.workbooks[0].replace(/^\//,''))));
|
||||
var propdata = dir.coreprops.length !== 0 ? getdata(getzipfile(zip, dir.coreprops[0].replace(/^\//,''))) : "";
|
||||
propdata += dir.extprops.length !== 0 ? getdata(getzipfile(zip, dir.extprops[0].replace(/^\//,''))) : "";
|
||||
var props = propdata !== "" ? parseProps(propdata) : {};
|
||||
var deps = {};
|
||||
if(dir.calcchain) deps=parseDeps(getdata(zip.files[dir.calcchain.replace(/^\//,'')]));
|
||||
if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
|
||||
var sheets = {}, i=0;
|
||||
if(!props.Worksheets) {
|
||||
/* Google Docs doesn't generate the appropriate metadata, so we impute: */
|
||||
@ -910,12 +918,16 @@ function parseZip(zip) {
|
||||
props.SheetNames[j] = wbsheets[j].name;
|
||||
}
|
||||
for(i = 0; i != props.Worksheets; ++i) {
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files['xl/worksheets/sheet' + (i+1) + '.xml']));
|
||||
try { /* TODO: remove these guards */
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml')));
|
||||
} catch(e) {}
|
||||
}
|
||||
}
|
||||
else {
|
||||
for(i = 0; i != props.Worksheets; ++i) {
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(zip.files[dir.sheets[i].replace(/^\//,'')]));
|
||||
try {
|
||||
sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,''))));
|
||||
} catch(e) {}
|
||||
}
|
||||
}
|
||||
return {
|
||||
|
Loading…
Reference in New Issue
Block a user