version bump 0.7.3: performance

- require cpexcel instead of full codepage library
- hardcode the unescapexml regexp
- utf8read short circuit for ASCII strings
- cellFormulae also acts on XLSX/XLSM
- bin/xlsx.njs do not process formulae unless requested
This commit is contained in:
SheetJS 2014-05-25 02:04:08 -07:00
parent 86f21d76ce
commit 489b9cdde4
17 changed files with 43 additions and 29 deletions

@ -125,7 +125,7 @@ The exported `read` and `readFile` functions accept an options argument:
| Option Name | Default | Description |
| :---------- | ------: | :---------- |
| cellFormula | true | Save formulae to the .f field ** |
| cellFormula | true | Save formulae to the .f field |
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| sheetStubs | false | Create cell objects for stub cells |
@ -136,8 +136,6 @@ The exported `read` and `readFile` functions accept an options argument:
| bookSheets | false | If true, only parse enough to get the sheet names |
| bookVBA | false | If true, expose vbaProject.bin to `vbaraw` field ** |
- `cellFormula` only applies to constructing XLSB formulae. XLSX/XLSM formulae
are stored in plaintext, but XLSB formulae are stored in a binary format.
- Even if `cellNF` is false, formatted text (.w) will be generated
- In some cases, sheets may be parsed even if `bookSheets` is false.
- `bookSheets` and `bookProps` combine to give both sets of information

@ -20,6 +20,8 @@ program
.option('-F, --field-sep <sep>', 'CSV field separator', ",")
.option('-R, --row-sep <sep>', 'CSV row separator', "\n")
.option('-n, --sheet-rows <num>', 'Number of rows to process (0=all rows)')
.option('--no-sst', 'do not generate sst')
.option('--perf', 'do not generate output')
.option('--dev', 'development mode')
.option('--read', 'read but do not print out contents')
.option('-q, --quiet', 'quiet mode');
@ -57,6 +59,9 @@ if(program.xlsx || program.xlsm || program.xlsb) {
opts.cellNF = true;
if(program.output) sheetname = program.output;
}
else if(program.formulae);
else opts.cellFormula = false;
if(program.dev) {
X.verbose = 2;
opts.WTF = true;
@ -77,7 +82,7 @@ if(program.listSheets) {
process.exit(0);
}
var wopts = {WTF:opts.WTF};
var wopts = {WTF:opts.WTF, bookSST:program.sst};
if(program.xlsx) return X.writeFile(wb, sheetname || (filename + ".xlsx"), wopts);
if(program.xlsm) return X.writeFile(wb, sheetname || (filename + ".xlsm"), wopts);
@ -95,6 +100,8 @@ try {
process.exit(4);
}
if(program.perf) return;
var oo = "";
if(!program.quiet) console.error(target_sheet);
if(program.formulae) oo = X.utils.get_formulae(ws).join("\n");

@ -1 +1 @@
XLSX.version = '0.7.2';
XLSX.version = '0.7.3';

@ -1,6 +1,6 @@
var current_codepage = 1252, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel');
current_cptable = cptable[current_codepage];
}
function reset_cp() { set_cp(1252); }

@ -27,7 +27,8 @@ var rencstr = "&<>'\"".split("");
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
s = s.replace(/&quot;/g, '"').replace(/&apos;/g, "'").replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&");
//for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function escapexml(text){
@ -48,6 +49,7 @@ function parsexmlbool(value, tag) {
var utf8read = function(orig) {
var out = [], i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
if(!orig.match(/[\u0080-\uffff]/)) return orig;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out.push(_chr(c));

@ -18,7 +18,7 @@ function parse_ws_xml(data, opts, rels) {
}
var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };
var q = ["v","f"];
var q = (opts.cellFormula ? ["v","f"] : ["v"]);
var sidx = 0;
/* 18.3.1.80 sheetData CT_SheetData ? */

2
dist/cpexcel.js vendored

@ -959,10 +959,12 @@ if (typeof module !== 'undefined' && module.exports) module.exports = cptable;
var encache = function(cp) {
if(typeof Buffer !== 'undefined') {
sbcs_cache.forEach(function(s) {
if(!cpt[s]) return;
cpdcache[s] = sbcs_decode(s);
cpecache[s] = sbcs_encode(s);
});
dbcs_cache.forEach(function(s) {
if(!cpt[s]) return;
cpdcache[s] = dbcs_decode(s);
cpecache[s] = dbcs_encode(s);
});

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

10
dist/xlsx.js vendored

@ -2,10 +2,10 @@
/* vim: set ts=2: */
var XLSX = {};
(function(XLSX){
XLSX.version = '0.7.2';
XLSX.version = '0.7.3';
var current_codepage = 1252, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel');
current_cptable = cptable[current_codepage];
}
function reset_cp() { set_cp(1252); }
@ -675,7 +675,8 @@ var rencstr = "&<>'\"".split("");
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
s = s.replace(/&quot;/g, '"').replace(/&apos;/g, "'").replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&");
//for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function escapexml(text){
@ -696,6 +697,7 @@ function parsexmlbool(value, tag) {
var utf8read = function(orig) {
var out = [], i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
if(!orig.match(/[\u0080-\uffff]/)) return orig;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out.push(_chr(c));
@ -2165,7 +2167,7 @@ function parse_ws_xml(data, opts, rels) {
}
var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };
var q = ["v","f"];
var q = (opts.cellFormula ? ["v","f"] : ["v"]);
var sidx = 0;
/* 18.3.1.80 sheetData CT_SheetData ? */

4
dist/xlsx.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/xlsx.min.map vendored

File diff suppressed because one or more lines are too long

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.7.2-a",
"version": "0.7.3",
"author": "sheetjs",
"description": "XLSB/XLSX/XLSM (Excel 2007+ Spreadsheet) parser and writer",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],

@ -82,6 +82,7 @@ apachepoi_53568.xlsx
apachepoi_53734.xlsx
apachepoi_53798.xlsx
apachepoi_53798_shiftNegative_TMPL.xlsx
apachepoi_54034.xlsx
apachepoi_54071.xlsx
apachepoi_54084 - Greek - beyond BMP.xlsx
apachepoi_54206.xlsx

10
xlsx.js

@ -2,10 +2,10 @@
/* vim: set ts=2: */
var XLSX = {};
(function(XLSX){
XLSX.version = '0.7.2';
XLSX.version = '0.7.3';
var current_codepage = 1252, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel');
current_cptable = cptable[current_codepage];
}
function reset_cp() { set_cp(1252); }
@ -675,7 +675,8 @@ var rencstr = "&<>'\"".split("");
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
s = s.replace(/&quot;/g, '"').replace(/&apos;/g, "'").replace(/&gt;/g, ">").replace(/&lt;/g, "<").replace(/&amp;/g, "&");
//for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function escapexml(text){
@ -696,6 +697,7 @@ function parsexmlbool(value, tag) {
var utf8read = function(orig) {
var out = [], i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
if(!orig.match(/[\u0080-\uffff]/)) return orig;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out.push(_chr(c));
@ -2165,7 +2167,7 @@ function parse_ws_xml(data, opts, rels) {
}
var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };
var q = ["v","f"];
var q = (opts.cellFormula ? ["v","f"] : ["v"]);
var sidx = 0;
/* 18.3.1.80 sheetData CT_SheetData ? */