This commit is contained in:
SheetJS 2012-12-04 14:27:20 -05:00
parent 13fbe76d6b
commit 5806249485
6 changed files with 2589 additions and 3 deletions

14
LICENSE Normal file

@ -0,0 +1,14 @@
Copyright (C) 2012 Niggler
The MIT License (MIT)
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
Except where noted, this license applies to any and all software programs and associated documentation files created by the Original Author and distributed with the Software:
'jszip.js' is a modified version of JSZip, Copyright (c) Stuart Knightley, David Duponchel, Franz Buchinger, Ant'onio Afonso. JSZip is dual licensed and is used according to the terms of the MIT License.

@ -1,4 +1,54 @@
js-xlsx
=======
# xlsx
Currently a parser for XLSX files. Cleanroom implementation from the ISO 29500 Office Open XML specifications.
This has been tested on some very basic XLSX files generated from Excel 2011.
*THIS WAS WHIPPED UP VERY QUICKLY TO SATISFY A VERY SPECIFIC NEED*. If you need something that is not currently supported, file an issue and attach a sample file. I will get to it :)
## Installation
In node:
npm install xlsx
In the browser:
<script lang="javascript" src="/path/to/jszip.js"></script>
<script lang="javascript" src="/path/to/xlsx.js"></script>
## Usage
See `xlsx2csv.njs` in the bin directory for usage in node.
See http://niggler.github.com/js-xlsx/ for a browser example.
## Notes
`.SheetNames` is an ordered list of the sheets in the workbook
`.Sheets[sheetname]` returns a data structure representing the sheet. Each key
that does not start with `!` corresponds to a cell (using `A-1` notation).
`.Sheets[sheetname][address].v` returns the value of the cell and `.Sheets[sheetname][address].t` returns the type of the cell (constrained to the enumeration `ST_CellType` as documented in page 4215 of ISO/IEC 29500-1:2012(E) )
Simple usage:
var XLSX = require('xlsx')
var xlsx = XLSX.readFile('test.xlsx');
var sheet_name_list = xlsx.SheetNames;
xlsx.SheetNames.forEach(function(y) {
for (z in zip.Sheets[y]) {
if(z[0] === '!') continue;
console.log(y + "!" + z + "=" + JSON.stringify(zip.Sheets[y][z].v));
}
});
## License
Please consult the attached LICENSE file for details. All rights not explicitly granted by the MIT license are reserved by the Original Author.
## References
ISO/IEC 29500:2012(E) "Information technology — Document description and processing languages — Office Open XML File Formats"
Javascript XLSX parser and (one day) writer

46
bin/xlsx2csv.njs Executable file

@ -0,0 +1,46 @@
#!/usr/bin/env node
var XLSX = require('../xlsx');
var utils = XLSX.utils;
var filename = process.argv[2];
if(!filename || filename == "-h" || filename === "--help") {
console.log("usage:",process.argv[1],"<workbook> [sheet]");
console.log(" when sheet = :list, print a list of sheets in the workbook");
process.exit(0);
}
var fs = require('fs');
if(!fs.existsSync(filename)) {
console.error("error:",filename,"does not exist!");
process.exit(1);
}
var xlsx = XLSX.readFile(filename);
var sheetname = process.argv[3] || xlsx.SheetNames[0];
if(sheetname === ":list") {
xlsx.SheetNames.forEach(function(x) { console.log(x); });
process.exit(0);
}
if(xlsx.SheetNames.indexOf(sheetname)===-1) {
console.error("Sheet", sheetname, "not found in", filename, ". I see:");
xlsx.SheetNames.forEach(function(x) { console.error(" - " + x); });
process.exit(1);
}
function stringify(val) {
switch(val.t){
case 'n': return val.v;
case 's': case 'str': return JSON.stringify(val.v);
default: throw 'unrecognized type ' + val.t;
}
}
var sheet = xlsx.Sheets[sheetname];
if(sheet["!ref"]) {
var r = utils.decode_range(sheet["!ref"]);
for(var R = r.s.r; R <= r.e.r; ++R) {
var row = [];
for(var C = r.s.c; C <= r.e.c; ++C) {
var val = sheet[utils.encode_cell({c:C,r:R})];
row.push(val ? stringify(val) : "");
}
console.log(row.join(","));
}
}

2128
jszip.js Normal file

File diff suppressed because it is too large Load Diff

23
package.json Normal file

@ -0,0 +1,23 @@
{
"name": "xlsx",
"version": "0.0.3",
"author": "Niggler",
"description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser",
"keywords": [
"xlsx", "office", "excel", "spreadsheet"
],
"bin": {
"xlsx2csv": "./bin/xlsx2csv.njs"
},
"main": "./xlsx",
"repository": {
"type":"git",
"url": "git://github.com/Niggler/js-xlsx.git"
},
"bugs": {
"url": "https://github.com/Niggler/js-xlsx/issues"
},
"engines": {
"node": ">=0.8"
}
}

325
xlsx.js Normal file

@ -0,0 +1,325 @@
/* vim: set ts=2:*/
/*jshint eqnull:true */
var XLSX = (function(){
var debug = 0;
var ct2type = {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks",
"application/vnd.openxmlformats-package.core-properties+xml": "coreprops",
"application/vnd.openxmlformats-officedocument.extended-properties+xml": "extprops",
"application/vnd.openxmlformats-officedocument.spreadsheetml.calcChain+xml": "calcchains",
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml":"sheets",
"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml": "strs",
"application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml":"styles",
"application/vnd.openxmlformats-officedocument.theme+xml":"themes",
"foo": "bar"
};
var WBPropsDef = {
allowRefreshQuery: '0',
autoCompressPictures: '1',
backupFile: '0',
checkCompatibility: '0',
codeName: '',
date1904: '0',
dateCompatibility: '1',
//defaultThemeVersion: '0',
filterPrivacy: '0',
hidePivotFieldList: '0',
promptedSolutions: '0',
publishItems: '0',
refreshAllConnections: false,
saveExternalLinkValues: '1',
showBorderUnselectedTables: '1',
showInkAnnotation: '1',
showObjects: 'all',
showPivotChartFilter: '0'
//updateLinks: 'userSet'
};
var WBViewDef = {
activeTab: '0',
autoFilterDateGrouping: '1',
firstSheet: '0',
minimized: '0',
showHorizontalScroll: '1',
showSheetTabs: '1',
showVerticalScroll: '1',
tabRatio: '600',
visibility: 'visible'
//window{Height,Width}, {x,y}Window
};
var SheetDef = {
state: 'visible'
};
var CalcPrDef = {
calcCompleted: '1',
calcMode: 'auto',
calcOnSave: '1',
concurrentCalc: '1',
fullCalcOnLoad: '0',
iterate: 'false',
iterateCount: '100',
iterateDelta: '0.001',
refMode: 'A1'
};
var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types';
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var encodings = {
'&gt;': '>',
'&lt;': '<',
'&amp;': '&'
};
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s;
}
function parsexmltag(tag) {
var words = tag.split(/\s+/);
var z = {'0': words[0]};
if(words.length === 1) return z;
tag.match(/(\w+)="([^"]*)"/g).map(
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
return z;
}
var strs = {}; // shared strings
function parseSheet(data) { //TODO: use a real xml parser
var s = {};
s["!ref"] = data.match(/<dimension ref="([^"]*)"\s*\/>/)[1];
//s.rows = {};
//s.cells = {};
var q = ["v","f"];
data.match(/<sheetData>(.*)<\/sheetData>/)[1].split("</row>").forEach(function(x) { if(x === "") return;
var row = parsexmltag(x.match(/<row[^>]*>/)[0]); //s.rows[row.r]=row.spans;
var cells = x.substr(x.indexOf('>')+1).split("</c>");
cells.forEach(function(c) { if(c === "") return;
var cell = parsexmltag(c.match(/<c[^>]*>/)[0]); delete cell[0];
var d = c.substr(c.indexOf('>')+1);
var p = {};
q.forEach(function(f){var x=d.match(matchtag(f));if(x)p[f]=unescapexml(x[1]);});
p.t = (cell.t ? cell.t : "n"); // default is "n" in schema
switch(p.t) {
case 'n': p.v = parseFloat(p.v); break;
case 's': p.v = strs[parseInt(p.v, 10)].t; break;
case 'str': break; // normal string
default: throw "Unrecognized cell type: " + p.t;
}
//s.cells[cell.r] = p;
s[cell.r] = p;
});
});
if(debug) s.rawdata = data;
return s;
}
// matches <foo>...</foo> extracts content
function matchtag(f,g) {return new RegExp('<' + f + '>(.*)</' + f + '>',g||"");}
function parseStrs(data) {
var s = [];
var sst = data.match(new RegExp("<sst ([^>]*)>(.*)<\/sst>"));
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(function(x) { var z = {};
var y=x.match(/<(.*)>(.*)<\/.*/); if(x) z[y[1]]=unescapexml(y[2]); return z;});
sst = parsexmltag(sst[1]); s.count = sst.count; s.uniqueCount = sst.uniqueCount;
if(debug) s.rawdata = data;
return s;
}
function parseProps(data) {
var p = { Company:'' }, q = {};
var strings = ["Application", "DocSecurity", "Company", "AppVersion"];
var bools = ["HyperlinksChanged","SharedDoc","LinksUpToDate","ScaleCrop"];
var xtra = ["HeadingPairs", "TitlesOfParts","dc:creator","cp:lastModifiedBy","dcterms:created", "dcterms:modified"];
strings.forEach(function(f){p[f] = data.match(matchtag(f))[1];});
bools.forEach(function(f){p[f] = data.match(matchtag(f))[1] == "true";});
xtra.forEach(function(f){q[f] = data.match(new RegExp("<" + f + "[^>]*>(.*)<\/" + f + ">"))[1];});
p["Worksheets"] = parseInt(q["HeadingPairs"].match(new RegExp("<vt:i4>(.*)<\/vt:i4>"))[1], 10);
p["SheetNames"] = q["TitlesOfParts"].match(new RegExp("<vt:lpstr>([^<]*)<\/vt:lpstr>","g")).map(function(x){return x.match(new RegExp("<vt:lpstr>([^<]*)<\/vt:lpstr>"))[1];});
p["Creator"] = q["dc:creator"];
p["LastModifiedBy"] = q["cp:lastModifiedBy"];
p["CreatedDate"] = new Date(q["dcterms:created"]);
p["ModifiedDate"] = new Date(q["dcterms:modified"]);
if(debug) p.rawdata = data;
return p;
}
function parseDeps(data) {
var d = [];
var l = 0, i = 1;
data.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
case '<?xml': break;
case '<calcChain': break;
case '<c': delete y[0]; if(y.i) i = y.i; else y.i = i; d.push(y); break;
}
});
if(debug) d.rawdata = data;
return d;
}
var ctext = {};
function parseCT(data) {
var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
coreprops: [], extprops: [], strs:[], xmlns: "" };
if(data == null) return data;
data.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
case '<?xml': break;
case '<Types': ct.xmlns = y.xmlns; break;
case '<Default': ctext[y.Extension] = y.ContentType; break;
case '<Override':
if(y.ContentType in ct2type)ct[ct2type[y.ContentType]].push(y.PartName);
break;
}
});
if(ct.xmlns !== XMLNS_CT) throw "Unknown Namespace: " + ct.xmlns;
ct.calcchain = ct.calcchains.length > 0 ? ct.calcchains[0] : "";
delete ct.calcchains;
if(debug) ct.rawdata = data;
return ct;
}
function parseWB(data) {
var wb = { AppVersion:{}, WBProps:{}, WBView:[], Sheets:[], CalcPr:{}, xmlns: "" };
data.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
case '<?xml': break;
case '<workbook': wb.xmlns = y.xmlns; break;
case '<fileVersion':
if(y.appName != "xl") throw "Unexpected workbook.appName: "+y.appName;
delete y[0]; wb.AppVersion = y; break;
case '<workbookPr': delete y[0]; wb.WBProps = y; break;
case '<bookViews>': case '</bookViews>': break; // aggregate workbookView
case '<workbookView': delete y[0]; wb.WBView.push(y); break;
case '<sheets>': case '</sheets>': break; // aggregate sheet
case '<sheet': delete y[0]; wb.Sheets.push(y); break;
case '</ext>': case '</extLst>': case '</workbook>': break;
case '<extLst>': break;
case '<calcPr': delete y[0]; wb.CalcPr = y; break;
case '<mx:ArchID': break;
case '<ext': break;//TODO: check with different versions of excel
default: console.log(y);
}
});
if(wb.xmlns !== XMLNS_WB) throw "Unknown Namespace: " + wb.xmlns;
var z;
for(z in WBPropsDef) if(null == wb.WBProps[z]) wb.WBProps[z] = WBPropsDef[z];
wb.WBView.forEach(function(w){for(var z in WBViewDef) if(null==w[z]) w[z]=WBViewDef[z]; });
for(z in CalcPrDef) if(null == wb.CalcPr[z]) wb.CalcPr[z] = CalcPrDef[z];
wb.Sheets.forEach(function(w){for(var z in SheetDef) if(null==w[z]) w[z]=SheetDef[z]; });
if(debug) wb.rawdata = data;
return wb;
}
function parseZip(zip) {
var entries = Object.keys(zip.files);
var keys = entries.filter(function(x){return x.substr(-1) != '/';}).sort();
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
var wb = parseWB(zip.files[dir.workbooks[0].replace(/^\//,'')].data);
var props = parseProps(zip.files[dir.coreprops[0].replace(/^\//,'')].data + zip.files[dir.extprops[0].replace(/^\//,'')].data);
var deps = {};
if(dir.calcchain) deps=parseDeps(zip.files[dir.calcchain.replace(/^\//,'')].data);
if(dir.strs[0]) strs=parseStrs(zip.files[dir.strs[0].replace(/^\//,'')].data);
var sheets = {};
for(var i = 0; i != props.Worksheets; ++i) {
sheets[props.SheetNames[i]]=parseSheet(zip.files[dir.sheets[i].replace(/^\//,'')].data);
}
return {
Directory: dir,
Workbook: wb,
Props: props,
Deps: deps,
Sheets: sheets,
SheetNames: props.SheetNames,
Strings: strs,
keys: keys,
files: zip.files
};
}
var fs, jszip;
if(typeof JSZip !== "undefined") jszip = JSZip;
if(typeof require !== "undefined") {
if(typeof jszip === 'undefined') jszip = require('./jszip').JSZip;
fs = require('fs');
}
function readSync(data, options) {
var zip, d = data;
var o = options||{};
switch((o.type||"base64")){
case "file": d = fs.readFileSync(data).toString('base64');
/* falls through */
case "base64": zip = new jszip(d, { base64:true }); break;
case "binary": zip = new jszip(d, { base64:false }); break;
}
return parseZip(zip);
}
function readFileSync(data, options) {
var o = options||{}; o.type = 'file';
return readSync(data, o);
}
this.read = readSync;
this.readFile = readFileSync;
this.parseZip = parseZip;
return this;
})();
function encode_col(col) { var s=""; for(++col; col; col=Math.floor((col-1)/26)) s = String.fromCharCode(((col-1)%26) + 65) + s; return s; }
function encode_row(row) { return "" + (row + 1); }
function encode_cell(cell) { return encode_col(cell.c) + encode_row(cell.r); }
function decode_col(c) { var d = 0, i = 0; for(; i !== c.length; ++i) d = 26*d + c.charCodeAt(i) - 64; return d - 1; }
function decode_row(rowstr) { return Number(rowstr) - 1; }
function split_cell(cstr) { return cstr.replace(/(\$?[A-Z]*)(\$?[0-9]*)/,"$1,$2").split(","); }
function decode_cell(cstr) { var splt = split_cell(cstr); return { c:decode_col(splt[0]), r:decode_row(splt[1]) }; }
function decode_range(range) { var x =range.split(":").map(decode_cell); return {s:x[0],e:x[x.length-1]}; }
var utils = {
encode_col: encode_col,
encode_row: encode_row,
encode_cell: encode_cell,
decode_col: decode_col,
decode_row: decode_row,
split_cell: split_cell,
decode_cell: decode_cell,
decode_range: decode_range
};
if(typeof require !== 'undefined' && typeof exports !== 'undefined') {
exports.read = XLSX.read;
exports.readFile = XLSX.readFile;
exports.utils = utils;
exports.main = function(args) {
var zip = XLSX.read(args[0], {type:'file'});
console.log(zip.Sheets);
};
if(typeof module !== 'undefined' && require.main === module)
exports.main(process.argv.slice(2));
}