Handling empty typeless cells

@nathanathan presented a case generated from excel, which had the cell:

    <c r="E1" s="1"/>

The schema is incorrect here (advising to treat it as a NaN value).  The right
thing to do is to treat it as `undefined`.
This commit is contained in:
SheetJS 2013-02-20 10:10:42 -05:00
parent 125713bba7
commit 077056d984

37
xlsx.js

@ -102,13 +102,15 @@ function parseSheet(data) { //TODO: use a real xml parser
if(!data.match(/<sheetData *\/>/)) if(!data.match(/<sheetData *\/>/))
data.match(/<sheetData>(.*)<\/sheetData>/)[1].split("</row>").forEach(function(x) { if(x === "") return; data.match(/<sheetData>(.*)<\/sheetData>/)[1].split("</row>").forEach(function(x) { if(x === "") return;
var row = parsexmltag(x.match(/<row[^>]*>/)[0]); //s.rows[row.r]=row.spans; var row = parsexmltag(x.match(/<row[^>]*>/)[0]); //s.rows[row.r]=row.spans;
var cells = x.substr(x.indexOf('>')+1).split("</c>"); var cells = x.substr(x.indexOf('>')+1).split(/<\/c>|\/>/);
cells.forEach(function(c) { if(c === "") return; cells.forEach(function(c) { if(c === "") return;
var cell = parsexmltag(c.match(/<c[^>]*>/)[0]); delete cell[0]; var cell = parsexmltag((c.match(/<c[^>]*>/)||[c])[0]); delete cell[0];
var d = c.substr(c.indexOf('>')+1); var d = c.substr(c.indexOf('>')+1);
var p = {}; var p = {};
q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);}); q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);});
p.t = (cell.t ? cell.t : "n"); // default is "n" in schema /* SCHEMA IS ACTUALLY INCORRECT HERE. IF A CELL HAS NO T, EMIT "" */
if(cell.t === undefined) { p.t = "str"; p.v = undefined; }
else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema
switch(p.t) { switch(p.t) {
case 'n': p.v = parseFloat(p.v); break; case 'n': p.v = parseFloat(p.v); break;
case 's': p.v = strs[parseInt(p.v, 10)].t; break; case 's': p.v = strs[parseInt(p.v, 10)].t; break;
@ -125,14 +127,14 @@ function parseSheet(data) { //TODO: use a real xml parser
} }
// matches <foo>...</foo> extracts content // matches <foo>...</foo> extracts content
function matchtag(f,g) {return new RegExp('<' + f + '>(.*)</' + f + '>',g||"");} function matchtag(f,g) {return new RegExp('<' + f + '>([\\s\\S]*)</' + f + '>',g||"");}
function parseStrs(data) { function parseStrs(data) {
var s = []; var s = [];
var sst = data.match(new RegExp("<sst ([^>]*)>(.*)<\/sst>")); var sst = data.match(new RegExp("<sst ([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(sst) { if(sst) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(function(x) { var z = {}; s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(function(x) { var z = {};
var y=x.match(/<(.*)>(.*)<\/.*/); if(x) z[y[1]]=unescapexml(y[2]); return z;}); var y=x.match(/<(.*)>([\s\S]*)<\/.*/); if(y) z[y[1]]=unescapexml(y[2]); return z;});
sst = parsexmltag(sst[1]); s.count = sst.count; s.uniqueCount = sst.uniqueCount; sst = parsexmltag(sst[1]); s.count = sst.count; s.uniqueCount = sst.uniqueCount;
} }
@ -147,7 +149,7 @@ function parseProps(data) {
var xtra = ["HeadingPairs", "TitlesOfParts","dc:creator","cp:lastModifiedBy","dcterms:created", "dcterms:modified"]; var xtra = ["HeadingPairs", "TitlesOfParts","dc:creator","cp:lastModifiedBy","dcterms:created", "dcterms:modified"];
strings.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1];}); strings.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1];});
bools.forEach(function(f){p[f] = data.match(matchtag(f))[1] == "true";}); bools.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1] == "true";});
xtra.forEach(function(f) { xtra.forEach(function(f) {
var cur = data.match(new RegExp("<" + f + "[^>]*>(.*)<\/" + f + ">")); var cur = data.match(new RegExp("<" + f + "[^>]*>(.*)<\/" + f + ">"));
if(cur && cur.length > 0) q[f] = cur[1]; if(cur && cur.length > 0) q[f] = cur[1];
@ -215,6 +217,7 @@ function parseWB(data) {
if(y.appName != "xl") throw "Unexpected workbook.appName: "+y.appName; if(y.appName != "xl") throw "Unexpected workbook.appName: "+y.appName;
delete y[0]; wb.AppVersion = y; break; delete y[0]; wb.AppVersion = y; break;
case '<workbookPr': delete y[0]; wb.WBProps = y; break; case '<workbookPr': delete y[0]; wb.WBProps = y; break;
case '<workbookPr/>': delete y[0]; wb.WBProps = y; break;
case '<bookViews>': case '</bookViews>': break; // aggregate workbookView case '<bookViews>': case '</bookViews>': break; // aggregate workbookView
case '<workbookView': delete y[0]; wb.WBView.push(y); break; case '<workbookView': delete y[0]; wb.WBView.push(y); break;
case '<sheets>': case '</sheets>': break; // aggregate sheet case '<sheets>': case '</sheets>': break; // aggregate sheet
@ -222,6 +225,7 @@ function parseWB(data) {
case '</ext>': case '</extLst>': case '</workbook>': break; case '</ext>': case '</extLst>': case '</workbook>': break;
case '<extLst>': break; case '<extLst>': break;
case '<calcPr': delete y[0]; wb.CalcPr = y; break; case '<calcPr': delete y[0]; wb.CalcPr = y; break;
case '<calcPr/>': delete y[0]; wb.CalcPr = y; break;
case '<mx:ArchID': break; case '<mx:ArchID': break;
case '<ext': break;//TODO: check with different versions of excel case '<ext': break;//TODO: check with different versions of excel
@ -244,7 +248,9 @@ function parseZip(zip) {
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort(); var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data); var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
var wb = parseWB(zip.files[dir.workbooks[0].replace(/^\//,'')].data); var wb = parseWB(zip.files[dir.workbooks[0].replace(/^\//,'')].data);
var props = parseProps(zip.files[dir.coreprops[0].replace(/^\//,'')].data + zip.files[dir.extprops[0].replace(/^\//,'')].data); var propdata = dir.coreprops.length !== 0 ? zip.files[dir.coreprops[0].replace(/^\//,'')].data : "";
propdata += dir.extprops.length !== 0 ? zip.files[dir.extprops[0].replace(/^\//,'')].data : "";
var props = propdata !== "" ? parseProps(propdata) : {};
var deps = {}; var deps = {};
if(dir.calcchain) deps=parseDeps(zip.files[dir.calcchain.replace(/^\//,'')].data); if(dir.calcchain) deps=parseDeps(zip.files[dir.calcchain.replace(/^\//,'')].data);
if(dir.strs[0]) strs=parseStrs(zip.files[dir.strs[0].replace(/^\//,'')].data); if(dir.strs[0]) strs=parseStrs(zip.files[dir.strs[0].replace(/^\//,'')].data);
@ -339,13 +345,14 @@ function sheet_to_row_object_array(sheet){
c: C, c: C,
r: R r: R
})]; })];
if(val){ if(val !== undefined) switch(val.t){
if(val.t === "s"){ case 's': case 'str':
rowObject[columnHeaders[C]] = val.v; if(val.v !== undefined) {
} else { rowObject[columnHeaders[C]] = val.v;
throw 'unrecognized type ' + val.t; emptyRow = false;
} }
emptyRow = false; break;
default: throw 'unrecognized type ' + val.t;
} }
} }
if(!emptyRow) { if(!emptyRow) {