sheetjs/xlsx.js
2013-10-10 02:44:18 -04:00

596 lines
19 KiB
JavaScript

/* vim: set ts=2:*/
/*jshint eqnull:true */
var XLSX = (function(){
var ct2type = {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks",
"application/vnd.openxmlformats-package.core-properties+xml": "coreprops",
"application/vnd.openxmlformats-officedocument.extended-properties+xml": "extprops",
"application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml": "calcchains",
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml":"sheets",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml": "strs",
"application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml":"styles",
"application/vnd.openxmlformats-officedocument.theme+xml":"themes",
"foo": "bar"
};
/* Table 18.2.28 Defaults */
var WBPropsDef = {
allowRefreshQuery: '0',
autoCompressPictures: '1',
backupFile: '0',
checkCompatibility: '0',
codeName: '',
date1904: '0',
dateCompatibility: '1',
//defaultThemeVersion: '0',
filterPrivacy: '0',
hidePivotFieldList: '0',
promptedSolutions: '0',
publishItems: '0',
refreshAllConnections: false,
saveExternalLinkValues: '1',
showBorderUnselectedTables: '1',
showInkAnnotation: '1',
showObjects: 'all',
showPivotChartFilter: '0'
//updateLinks: 'userSet'
};
var WBViewDef = {
activeTab: '0',
autoFilterDateGrouping: '1',
firstSheet: '0',
minimized: '0',
showHorizontalScroll: '1',
showSheetTabs: '1',
showVerticalScroll: '1',
tabRatio: '600',
visibility: 'visible'
//window{Height,Width}, {x,y}Window
};
var SheetDef = {
state: 'visible'
};
/* Table 18.2.2 Defaults */
var CalcPrDef = {
calcCompleted: '1',
calcMode: 'auto',
calcOnSave: '1',
concurrentCalc: '1',
fullCalcOnLoad: '0',
iterate: 'false',
iterateCount: '100',
iterateDelta: '0.001',
refMode: 'A1'
};
/* Table 18.2.3 Defaults */
var CustomWBViewDef = {
autoUpdate: 'false',
changesSavedWin: 'false',
includeHiddenRowCol: 'true',
includePrintSettings: 'true',
maximized: 'false',
minimized: 'false',
onlySync: 'false',
personalView: 'false',
showHorizontalScroll: 'true',
showObjects: 'all',
showSheetTabs: 'true',
showStatusbar: 'true',
showVerticalScroll: 'true',
tabRatio: '600'
};
var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types';
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var encodings = {
'"': '"',
''': "'",
'>': '>',
'&lt;': '<',
'&amp;': '&'
};
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function parsexmltag(tag) {
var words = tag.split(/\s+/);
var z = {'0': words[0]};
if(words.length === 1) return z;
tag.match(/(\w+)="([^"]*)"/g).map(
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
return z;
}
var strs = {}; // shared strings
/* 18.3 Worksheets */
function parseSheet(data) {
var s = {};
var ref = data.match(/<dimension ref="([^"]*)"\s*\/>/);
if(ref) s["!ref"] = ref[1];
var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };
//s.rows = {};
//s.cells = {};
var q = ["v","f"];
if(!data.match(/<sheetData *\/>/))
data.match(/<sheetData>([^]*)<\/sheetData>/m)[1].split("</row>").forEach(function(x) {
if(x === "") return;
var row = parsexmltag(x.match(/<row[^>]*>/)[0]); //s.rows[row.r]=row.spans;
if(refguess.s.r > row.r - 1) refguess.s.r = row.r - 1;
if(refguess.e.r < row.r - 1) refguess.e.r = row.r - 1;
var cells = x.substr(x.indexOf('>')+1).split(/<c/);
cells.forEach(function(c, idx) { if(c === "") return;
c = "<c" + c;
if(refguess.s.c > idx - 1) refguess.s.c = idx - 1;
if(refguess.e.c < idx - 1) refguess.e.c = idx - 1;
var cell = parsexmltag((c.match(/<c[^>]*>/)||[c])[0]); delete cell[0];
var d = c.substr(c.indexOf('>')+1);
var p = {};
q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);});
/* SCHEMA IS ACTUALLY INCORRECT HERE. IF A CELL HAS NO T, EMIT "" */
if(cell.t === undefined && p.v === undefined) { p.t = "str"; p.v = undefined; }
else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema
switch(p.t) {
case 'n': p.v = parseFloat(p.v); break;
case 's': p.v = strs[parseInt(p.v, 10)].t; break;
case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string
case 'b':
switch(p.v) {
case '0': case 'FALSE': case "false": case false: p.v=false; break;
case '1': case 'TRUE': case "true": case true: p.v=true; break;
default: throw "Unrecognized boolean: " + p.v;
} break;
/* in case of error, stick value in .err */
case 'e': p.err = p.v; p.v = undefined; break;
default: throw "Unrecognized cell type: " + p.t;
}
//s.cells[cell.r] = p;
s[cell.r] = p;
});
});
if(!s["!ref"]) s["!ref"] = encode_range(refguess);
return s;
}
// matches <foo>...</foo> extracts content
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^]*)</'+f+'>',(g||"")+"m");}
function parseVector(data) {
var h = parsexmltag(data);
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
var res = [];
matches.forEach(function(x) {
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
res.push({v:v[2], t:v[1]});
});
return res;
}
var utf8read = function(orig) {
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out += _chr(c);
else {
c2 = orig.charCodeAt(i++);
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
else {
c3 = orig.charCodeAt(i++);
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
}
}
}
return out;
};
/* 18.4 Shared String Table */
function parseStrs(data) {
var s = [];
var sst = data.match(new RegExp("<sst ([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(sst) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(function(x) { var z = {};
var y=x.match(/<(.*)>([\s\S]*)<\/.*/); if(y) z[y[1].split(" ")[0]]=utf8read(unescapexml(y[2])); return z;});
sst = parsexmltag(sst[1]); s.count = sst.count; s.uniqueCount = sst.uniqueCount;
}
return s;
}
function parseProps(data) {
var p = { Company:'' }, q = {};
var strings = ["Application", "DocSecurity", "Company", "AppVersion"];
var bools = ["HyperlinksChanged","SharedDoc","LinksUpToDate","ScaleCrop"];
var xtra = ["HeadingPairs", "TitlesOfParts","dc:creator","cp:lastModifiedBy","dcterms:created", "dcterms:modified"];
strings.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1];});
bools.forEach(function(f){p[f] = (data.match(matchtag(f))||[])[1] == "true";});
xtra.forEach(function(f) {
var cur = data.match(new RegExp("<" + f + "[^>]*>(.*)<\/" + f + ">"));
if(cur && cur.length > 0) q[f] = cur[1];
});
if(q["HeadingPairs"] && q["TitlesOfParts"]) {
var v = parseVector(q["HeadingPairs"]);
var j = 0, widx = 0;
for(var i = 0; i !== v.length; ++i) {
switch(v[i].v) {
case "Worksheets": widx = j; p["Worksheets"] = +v[++i]; break;
case "Named Ranges": ++i; break; // TODO: Handle Named Ranges
default: console.error("Unrecognized key in Heading Pairs: " + v[i++].v);
}
}
var parts = parseVector(q["TitlesOfParts"]).map(utf8read);
p["SheetNames"] = parts.slice(widx, widx + p["Worksheets"]);
}
p["Creator"] = q["dc:creator"];
p["LastModifiedBy"] = q["cp:lastModifiedBy"];
p["CreatedDate"] = new Date(q["dcterms:created"]);
p["ModifiedDate"] = new Date(q["dcterms:modified"]);
return p;
}
/* 18.6 Calculation Chain */
function parseDeps(data) {
var d = [];
var l = 0, i = 1;
data.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
case '<?xml': break;
/* 18.6.2 calcChain CT_CalcChain 1 */
case '<calcChain': case '<calcChain>': case '</calcChain>': break;
/* 18.6.1 c CT_CalcCell 1 */
case '<c': delete y[0]; if(y.i) i = y.i; else y.i = i; d.push(y); break;
}
});
return d;
}
var ctext = {};
function parseCT(data) {
var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
coreprops: [], extprops: [], strs:[], xmlns: "" };
if(data == null) return data;
data.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
case '<?xml': break;
case '<Types': ct.xmlns = y.xmlns; break;
case '<Default': ctext[y.Extension] = y.ContentType; break;
case '<Override':
if(y.ContentType in ct2type)ct[ct2type[y.ContentType]].push(y.PartName);
break;
}
});
if(ct.xmlns !== XMLNS_CT) throw "Unknown Namespace: " + ct.xmlns;
ct.calcchain = ct.calcchains.length > 0 ? ct.calcchains[0] : "";
ct.sst = ct.strs.length > 0 ? ct.strs[0] : "";
delete ct.calcchains;
return ct;
}
/* 18.2 Workbook */
function parseWB(data) {
var wb = { AppVersion:{}, WBProps:{}, WBView:[], Sheets:[], CalcPr:{}, xmlns: "" };
var pass = false;
data.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
case '<?xml': break;
/* 18.2.27 workbook CT_Workbook 1 */
case '<workbook': wb.xmlns = y.xmlns; break;
case '</workbook>': break;
/* 18.2.13 fileVersion CT_FileVersion ? */
case '<fileVersion': delete y[0]; wb.AppVersion = y; break;
case '<fileVersion/>': break;
/* 18.2.12 fileSharing CT_FileSharing ? */
case '<fileSharing': case '<fileSharing/>': break;
/* 18.2.28 workbookPr CT_WorkbookPr ? */
case '<workbookPr': delete y[0]; wb.WBProps = y; break;
case '<workbookPr/>': delete y[0]; wb.WBProps = y; break;
/* 18.2.29 workbookProtection CT_WorkbookProtection ? */
case '<workbookProtection/>': break;
/* 18.2.1 bookViews CT_BookViews ? */
case '<bookViews>': case '</bookViews>': break;
/* 18.2.30 workbookView CT_BookView + */
case '<workbookView': delete y[0]; wb.WBView.push(y); break;
/* 18.2.20 sheets CT_Sheets 1 */
case '<sheets>': case '</sheets>': break; // aggregate sheet
/* 18.2.19 sheet CT_Sheet + */
case '<sheet': delete y[0]; y.name = utf8read(y.name); wb.Sheets.push(y); break;
/* 18.2.15 functionGroups CT_FunctionGroups ? */
case '<functionGroups': case '<functionGroups/>': break;
/* 18.2.14 functionGroup CT_FunctionGroup + */
case '<functionGroup': break;
/* 18.2.9 externalReferences CT_ExternalReferences ? */
case '<externalReferences': case '</externalReferences>': break;
/* 18.2.8 externalReference CT_ExternalReference + */
case '<externalReference': break;
/* 18.2.6 definedNames CT_DefinedNames ? */
case '<definedNames/>': break;
case '<definedNames>': pass=true; break;
case '</definedNames>': pass=false; break;
/* 18.2.5 definedName CT_DefinedName + */
case '<definedName': case '<definedName/>': case '</definedName>': break;
/* 18.2.2 calcPr CT_CalcPr ? */
case '<calcPr': delete y[0]; wb.CalcPr = y; break;
case '<calcPr/>': delete y[0]; wb.CalcPr = y; break;
/* 18.2.16 oleSize CT_OleSize ? (ref required) */
case '<oleSize': break;
/* 18.2.4 customWorkbookViews CT_CustomWorkbookViews ? */
case '<customWorkbookViews>': case '</customWorkbookViews>': case '<customWorkbookViews': break;
/* 18.2.3 customWorkbookView CT_CustomWorkbookView + */
case '<customWorkbookView': case '</customWorkbookView>': break;
/* 18.2.18 pivotCaches CT_PivotCaches ? */
case '<pivotCaches>': case '</pivotCaches>': case '<pivotCaches': break;
/* 18.2.17 pivotCache CT_PivotCache ? */
case '<pivotCache': break;
/* 18.2.21 smartTagPr CT_SmartTagPr ? */
case '<smartTagPr': case '<smartTagPr/>': break;
/* 18.2.23 smartTagTypes CT_SmartTagTypes ? */
case '<smartTagTypes': case '<smartTagTypes>': case '</smartTagTypes>': break;
/* 18.2.22 smartTagType CT_SmartTagType ? */
case '<smartTagType': break;
/* 18.2.24 webPublishing CT_WebPublishing ? */
case '<webPublishing': case '<webPublishing/>': break;
/* 18.2.11 fileRecoveryPr CT_FileRecoveryPr ? */
case '<fileRecoveryPr': case '<fileRecoveryPr/>': break;
/* 18.2.26 webPublishObjects CT_WebPublishObjects ? */
case '<webPublishObjects>': case '<webPublishObjects': case '</webPublishObjects>': break;
/* 18.2.25 webPublishObject CT_WebPublishObject ? */
case '<webPublishObject': break;
/* 18.2.10 extLst CT_ExtensionList ? */
case '<extLst>': case '</extLst>': case '<extLst/>': break;
/* 18.2.7 ext CT_Extension + */
case '<ext': pass=true; break; //TODO: check with versions of excel
case '</ext>': pass=false; break;
/* Others */
case '<mx:ArchID': break;
case '<mc:AlternateContent': pass=true; break;
case '</mc:AlternateContent>': pass=false; break;
default: if(!pass) console.error("WB Tag",x,y);
}
});
if(wb.xmlns !== XMLNS_WB) throw "Unknown Namespace: " + wb.xmlns;
var z;
/* defaults */
for(z in WBPropsDef) if(null == wb.WBProps[z]) wb.WBProps[z] = WBPropsDef[z];
for(z in CalcPrDef) if(null == wb.CalcPr[z]) wb.CalcPr[z] = CalcPrDef[z];
wb.WBView.forEach(function(w){for(var z in WBViewDef) if(null==w[z]) w[z]=WBViewDef[z]; });
wb.Sheets.forEach(function(w){for(var z in SheetDef) if(null==w[z]) w[z]=SheetDef[z]; });
return wb;
}
function parseZip(zip) {
var entries = Object.keys(zip.files);
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
var wb = parseWB(zip.files[dir.workbooks[0].replace(/^\//,'')].data);
var propdata = dir.coreprops.length !== 0 ? zip.files[dir.coreprops[0].replace(/^\//,'')].data : "";
propdata += dir.extprops.length !== 0 ? zip.files[dir.extprops[0].replace(/^\//,'')].data : "";
var props = propdata !== "" ? parseProps(propdata) : {};
var deps = {};
if(dir.calcchain) deps=parseDeps(zip.files[dir.calcchain.replace(/^\//,'')].data);
if(dir.sst) strs=parseStrs(zip.files[dir.sst.replace(/^\//,'')].data);
var sheets = {}, i=0;
if(!props.Worksheets) {
/* Google Docs doesn't generate the appropriate metadata, so we impute: */
var wbsheets = wb.Sheets;
props.Worksheets = wbsheets.length;
props.SheetNames = [];
for(var j = 0; j != wbsheets.length; ++j) {
props.SheetNames[j] = wbsheets[j].name;
}
for(i = 0; i != props.Worksheets; ++i) {
sheets[props.SheetNames[i]]=parseSheet(zip.files['xl/worksheets/sheet' + (i+1) + '.xml'].data);
}
}
else {
for(i = 0; i != props.Worksheets; ++i) {
sheets[props.SheetNames[i]]=parseSheet(zip.files[dir.sheets[i].replace(/^\//,'')].data);
}
}
return {
Directory: dir,
Workbook: wb,
Props: props,
Deps: deps,
Sheets: sheets,
SheetNames: props.SheetNames,
Strings: strs,
keys: keys,
files: zip.files
};
}
var _fs, jszip;
if(typeof JSZip !== "undefined") jszip = JSZip;
if (typeof exports !== 'undefined') {
if (typeof module !== 'undefined' && module.exports) {
if(typeof jszip === 'undefined') jszip = require('./jszip').JSZip;
_fs = require('fs');
}
}
function readSync(data, options) {
var zip, d = data;
var o = options||{};
switch((o.type||"base64")){
case "file": d = _fs.readFileSync(data).toString('base64');
/* falls through */
case "base64": zip = new jszip(d, { base64:true }); break;
case "binary": zip = new jszip(d, { base64:false }); break;
}
return parseZip(zip);
}
function readFileSync(data, options) {
var o = options||{}; o.type = 'file';
return readSync(data, o);
}
this.read = readSync;
this.readFile = readFileSync;
this.parseZip = parseZip;
return this;
})();
var _chr = function(c) { return String.fromCharCode(c); };
function encode_col(col) { var s=""; for(++col; col; col=Math.floor((col-1)/26)) s = _chr(((col-1)%26) + 65) + s; return s; }
function encode_row(row) { return "" + (row + 1); }
function encode_cell(cell) { return encode_col(cell.c) + encode_row(cell.r); }
function decode_col(c) { var d = 0, i = 0; for(; i !== c.length; ++i) d = 26*d + c.charCodeAt(i) - 64; return d - 1; }
function decode_row(rowstr) { return Number(rowstr) - 1; }
function split_cell(cstr) { return cstr.replace(/(\$?[A-Z]*)(\$?[0-9]*)/,"$1,$2").split(","); }
function decode_cell(cstr) { var splt = split_cell(cstr); return { c:decode_col(splt[0]), r:decode_row(splt[1]) }; }
function decode_range(range) { var x =range.split(":").map(decode_cell); return {s:x[0],e:x[x.length-1]}; }
function encode_range(range) { return encode_cell(range.s) + ":" + encode_cell(range.e); }
/**
* Convert a sheet into an array of objects where the column headers are keys.
**/
function sheet_to_row_object_array(sheet){
var val, rowObject, range, columnHeaders, emptyRow, C;
var outSheet = [];
if (sheet["!ref"]) {
range = decode_range(sheet["!ref"]);
columnHeaders = {};
for (C = range.s.c; C <= range.e.c; ++C) {
val = sheet[encode_cell({
c: C,
r: range.s.r
})];
if(val){
switch(val.t) {
case 's': case 'str': columnHeaders[C] = val.v; break;
case 'n': columnHeaders[C] = val.v; break;
}
}
}
for (var R = range.s.r + 1; R <= range.e.r; ++R) {
emptyRow = true;
//Row number is recorded in the prototype
//so that it doesn't appear when stringified.
rowObject = Object.create({ __rowNum__ : R });
for (C = range.s.c; C <= range.e.c; ++C) {
val = sheet[encode_cell({
c: C,
r: R
})];
if(val !== undefined) switch(val.t){
case 's': case 'str': case 'b': case 'n':
if(val.v !== undefined) {
rowObject[columnHeaders[C]] = val.v;
emptyRow = false;
}
break;
case 'e': break; /* thorw */
default: throw 'unrecognized type ' + val.t;
}
}
if(!emptyRow) {
outSheet.push(rowObject);
}
}
}
return outSheet;
}
function sheet_to_csv(sheet) {
var stringify = function stringify(val) {
switch(val.t){
case 'n': return val.v;
case 's': case 'str': return JSON.stringify(val.v);
case 'b': return val.v ? "TRUE" : "FALSE";
case 'e': return ""; /* throw out value in case of error */
default: throw 'unrecognized type ' + val.t;
}
};
var out = "";
if(sheet["!ref"]) {
var r = utils.decode_range(sheet["!ref"]);
for(var R = r.s.r; R <= r.e.r; ++R) {
var row = [];
for(var C = r.s.c; C <= r.e.c; ++C) {
var val = sheet[utils.encode_cell({c:C,r:R})];
row.push(val ? stringify(val) : "");
}
out += row.join(",") + "\n";
}
}
return out;
}
var utils = {
encode_col: encode_col,
encode_row: encode_row,
encode_cell: encode_cell,
encode_range: encode_range,
decode_col: decode_col,
decode_row: decode_row,
split_cell: split_cell,
decode_cell: decode_cell,
decode_range: decode_range,
sheet_to_csv: sheet_to_csv,
sheet_to_row_object_array: sheet_to_row_object_array
};
if(typeof require !== 'undefined' && typeof exports !== 'undefined') {
exports.read = XLSX.read;
exports.readFile = XLSX.readFile;
exports.utils = utils;
exports.main = function(args) {
var zip = XLSX.read(args[0], {type:'file'});
console.log(zip.Sheets);
};
if(typeof module !== 'undefined' && require.main === module)
exports.main(process.argv.slice(2));
}