forked from sheetjs/sheetjs
Handling empty typeless cells
@nathanathan presented a case generated from excel, which had the cell: <c r="E1" s="1"/> The schema is incorrect here (advising to treat it as a NaN value). The right thing to do is to treat it as `undefined`.
This commit is contained in:
parent
125713bba7
commit
077056d984
37
xlsx.js
37
xlsx.js
@ -102,13 +102,15 @@ function parseSheet(data) { //TODO: use a real xml parser
|
|||||||
if(!data.match(/<sheetData *\/>/))
|
if(!data.match(/<sheetData *\/>/))
|
||||||
data.match(/<sheetData>(.*)<\/sheetData>/)[1].split("</row>").forEach(function(x) { if(x === "") return;
|
data.match(/<sheetData>(.*)<\/sheetData>/)[1].split("</row>").forEach(function(x) { if(x === "") return;
|
||||||
var row = parsexmltag(x.match(/<row[^>]*>/)[0]); //s.rows[row.r]=row.spans;
|
var row = parsexmltag(x.match(/<row[^>]*>/)[0]); //s.rows[row.r]=row.spans;
|
||||||
var cells = x.substr(x.indexOf('>')+1).split("</c>");
|
var cells = x.substr(x.indexOf('>')+1).split(/<\/c>|\/>/);
|
||||||
cells.forEach(function(c) { if(c === "") return;
|
cells.forEach(function(c) { if(c === "") return;
|
||||||
var cell = parsexmltag(c.match(/<c[^>]*>/)[0]); delete cell[0];
|
var cell = parsexmltag((c.match(/<c[^>]*>/)||[c])[0]); delete cell[0];
|
||||||
var d = c.substr(c.indexOf('>')+1);
|
var d = c.substr(c.indexOf('>')+1);
|
||||||
var p = {};
|
var p = {};
|
||||||
q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);});
|
q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);});
|
||||||
p.t = (cell.t ? cell.t : "n"); // default is "n" in schema
|
/* SCHEMA IS ACTUALLY INCORRECT HERE. IF A CELL HAS NO T, EMIT "" */
|
||||||
|
if(cell.t === undefined) { p.t = "str"; p.v = undefined; }
|
||||||
|
else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema
|
||||||
switch(p.t) {
|
switch(p.t) {
|
||||||
case 'n': p.v = parseFloat(p.v); break;
|
case 'n': p.v = parseFloat(p.v); break;
|
||||||
case 's': p.v = strs[parseInt(p.v, 10)].t; break;
|
case 's': p.v = strs[parseInt(p.v, 10)].t; break;
|
||||||
@ -125,14 +127,14 @@ function parseSheet(data) { //TODO: use a real xml parser
|
|||||||
}
|
}
|
||||||
|
|
||||||
// matches <foo>...</foo> extracts content
|
// matches <foo>...</foo> extracts content
|
||||||
function matchtag(f,g) {return new RegExp('<' + f + '>(.*)</' + f + '>',g||"");}
|
function matchtag(f,g) {return new RegExp('<' + f + '>([\\s\\S]*)</' + f + '>',g||"");}
|
||||||
|
|
||||||
function parseStrs(data) {
|
function parseStrs(data) {
|
||||||
var s = [];
|
var s = [];
|
||||||
var sst = data.match(new RegExp("<sst ([^>]*)>(.*)<\/sst>"));
|
var sst = data.match(new RegExp("<sst ([^>]*)>([\\s\\S]*)<\/sst>","m"));
|
||||||
if(sst) {
|
if(sst) {
|
||||||
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(function(x) { var z = {};
|
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(function(x) { var z = {};
|
||||||
var y=x.match(/<(.*)>(.*)<\/.*/); if(x) z[y[1]]=unescapexml(y[2]); return z;});
|
var y=x.match(/<(.*)>([\s\S]*)<\/.*/); if(y) z[y[1]]=unescapexml(y[2]); return z;});
|
||||||
|
|
||||||
sst = parsexmltag(sst[1]); s.count = sst.count; s.uniqueCount = sst.uniqueCount;
|
sst = parsexmltag(sst[1]); s.count = sst.count; s.uniqueCount = sst.uniqueCount;
|
||||||
}
|
}
|
||||||
@ -147,7 +149,7 @@ function parseProps(data) {
|
|||||||
var xtra = ["HeadingPairs", "TitlesOfParts","dc:creator","cp:lastModifiedBy","dcterms:created", "dcterms:modified"];
|
var xtra = ["HeadingPairs", "TitlesOfParts","dc:creator","cp:lastModifiedBy","dcterms:created", "dcterms:modified"];
|
||||||
|
|
||||||
strings.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1];});
|
strings.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1];});
|
||||||
bools.forEach(function(f){p[f] = data.match(matchtag(f))[1] == "true";});
|
bools.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1] == "true";});
|
||||||
xtra.forEach(function(f) {
|
xtra.forEach(function(f) {
|
||||||
var cur = data.match(new RegExp("<" + f + "[^>]*>(.*)<\/" + f + ">"));
|
var cur = data.match(new RegExp("<" + f + "[^>]*>(.*)<\/" + f + ">"));
|
||||||
if(cur && cur.length > 0) q[f] = cur[1];
|
if(cur && cur.length > 0) q[f] = cur[1];
|
||||||
@ -215,6 +217,7 @@ function parseWB(data) {
|
|||||||
if(y.appName != "xl") throw "Unexpected workbook.appName: "+y.appName;
|
if(y.appName != "xl") throw "Unexpected workbook.appName: "+y.appName;
|
||||||
delete y[0]; wb.AppVersion = y; break;
|
delete y[0]; wb.AppVersion = y; break;
|
||||||
case '<workbookPr': delete y[0]; wb.WBProps = y; break;
|
case '<workbookPr': delete y[0]; wb.WBProps = y; break;
|
||||||
|
case '<workbookPr/>': delete y[0]; wb.WBProps = y; break;
|
||||||
case '<bookViews>': case '</bookViews>': break; // aggregate workbookView
|
case '<bookViews>': case '</bookViews>': break; // aggregate workbookView
|
||||||
case '<workbookView': delete y[0]; wb.WBView.push(y); break;
|
case '<workbookView': delete y[0]; wb.WBView.push(y); break;
|
||||||
case '<sheets>': case '</sheets>': break; // aggregate sheet
|
case '<sheets>': case '</sheets>': break; // aggregate sheet
|
||||||
@ -222,6 +225,7 @@ function parseWB(data) {
|
|||||||
case '</ext>': case '</extLst>': case '</workbook>': break;
|
case '</ext>': case '</extLst>': case '</workbook>': break;
|
||||||
case '<extLst>': break;
|
case '<extLst>': break;
|
||||||
case '<calcPr': delete y[0]; wb.CalcPr = y; break;
|
case '<calcPr': delete y[0]; wb.CalcPr = y; break;
|
||||||
|
case '<calcPr/>': delete y[0]; wb.CalcPr = y; break;
|
||||||
|
|
||||||
case '<mx:ArchID': break;
|
case '<mx:ArchID': break;
|
||||||
case '<ext': break;//TODO: check with different versions of excel
|
case '<ext': break;//TODO: check with different versions of excel
|
||||||
@ -244,7 +248,9 @@ function parseZip(zip) {
|
|||||||
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
|
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
|
||||||
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
|
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
|
||||||
var wb = parseWB(zip.files[dir.workbooks[0].replace(/^\//,'')].data);
|
var wb = parseWB(zip.files[dir.workbooks[0].replace(/^\//,'')].data);
|
||||||
var props = parseProps(zip.files[dir.coreprops[0].replace(/^\//,'')].data + zip.files[dir.extprops[0].replace(/^\//,'')].data);
|
var propdata = dir.coreprops.length !== 0 ? zip.files[dir.coreprops[0].replace(/^\//,'')].data : "";
|
||||||
|
propdata += dir.extprops.length !== 0 ? zip.files[dir.extprops[0].replace(/^\//,'')].data : "";
|
||||||
|
var props = propdata !== "" ? parseProps(propdata) : {};
|
||||||
var deps = {};
|
var deps = {};
|
||||||
if(dir.calcchain) deps=parseDeps(zip.files[dir.calcchain.replace(/^\//,'')].data);
|
if(dir.calcchain) deps=parseDeps(zip.files[dir.calcchain.replace(/^\//,'')].data);
|
||||||
if(dir.strs[0]) strs=parseStrs(zip.files[dir.strs[0].replace(/^\//,'')].data);
|
if(dir.strs[0]) strs=parseStrs(zip.files[dir.strs[0].replace(/^\//,'')].data);
|
||||||
@ -339,13 +345,14 @@ function sheet_to_row_object_array(sheet){
|
|||||||
c: C,
|
c: C,
|
||||||
r: R
|
r: R
|
||||||
})];
|
})];
|
||||||
if(val){
|
if(val !== undefined) switch(val.t){
|
||||||
if(val.t === "s"){
|
case 's': case 'str':
|
||||||
rowObject[columnHeaders[C]] = val.v;
|
if(val.v !== undefined) {
|
||||||
} else {
|
rowObject[columnHeaders[C]] = val.v;
|
||||||
throw 'unrecognized type ' + val.t;
|
emptyRow = false;
|
||||||
}
|
}
|
||||||
emptyRow = false;
|
break;
|
||||||
|
default: throw 'unrecognized type ' + val.t;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if(!emptyRow) {
|
if(!emptyRow) {
|
||||||
|
Loading…
Reference in New Issue
Block a user