version bump 0.5.9: sheetRows partial processing

- opts.sheetRows limits parsing; default (0) parses all rows
- added -n mode to xlsx2csv to control number of rows
- !ref will be adjusted; !fullref holds full range
This commit is contained in:
SheetJS 2014-02-18 22:03:28 -05:00
parent 509f7bf9c7
commit 5c4b5827b5
12 changed files with 168 additions and 22 deletions

View File

@ -1,7 +1,7 @@
LIB=xlsx
DEPS=$(wildcard bits/*.js)
TARGET=$(LIB).js
FMT=xlsx xlsm xlsb
FMT=xlsx xlsm xlsb misc
$(TARGET): $(DEPS)
cat $^ > $@

View File

@ -79,7 +79,9 @@ The exported `read` and `readFile` functions accept an options argument:
| cellHTML | true | Parse rich text and save HTML to the .h field |
| cellNF | false | Save number format string to the .z field |
| sheetStubs | false | Create cell objects for stub cells |
| sheetRows | 0 | If >0, read the first `sheetRows` rows ** |
| bookDeps | false | If true, parse calculation chains |
| bookFiles | false | If true, add raw files to book object ** |
| bookProps | false | If true, only parse enough to get book metadata ** |
| bookSheets | false | If true, only parse enough to get the sheet names |
@ -89,13 +91,17 @@ The exported `read` and `readFile` functions accept an options argument:
- In some cases, sheets may be parsed even if `bookSheets` is false.
- `bookSheets` and `bookProps` combine to give both sets of information
- `Deps` will be an empty object if `bookDeps` is falsy
- `bookFiles` adds a `keys` array (paths in the ZIP) and a `files` hash (whose
keys are paths and values are objects representing the files)
- `sheetRows-1` rows will be generated when looking at the JSON object output
(since the header row is counted as a row when parsing the data)
The defaults are enumerated in bits/84_defaults.js
## Tested Environments
- Node 0.8.14, 0.10.1
- IE 6/7/8/9/10 using Base64 mode (IE10 using HTML5 mode)
- IE 6/7/8/9/10 using Base64 mode (IE10/11 using HTML5 mode)
- FF 18 using Base64 or HTML5 mode
- Chrome 24 using Base64 or HTML5 mode

View File

@ -15,6 +15,7 @@ program
.option('-J, --raw-js', 'emit raw JS object rather than CSV (raw numbers)')
.option('-F, --field-sep <sep>', 'CSV field separator', ",")
.option('-R, --row-sep <sep>', 'CSV row separator', "\n")
.option('-n, --sheet-rows <num>', 'Number of rows to process (0=all rows)')
.option('--dev', 'development mode')
.option('--read', 'read but do not print out contents')
.option('-q, --quiet', 'quiet mode');
@ -46,6 +47,7 @@ if(!fs.existsSync(filename)) {
var opts = {}, wb;
if(program.listSheets) opts.bookSheets = true;
if(program.sheetRows) opts.sheetRows = program.sheetRows;
if(program.dev) {
X.verbose = 2;

View File

@ -1 +1 @@
XLSX.version = '0.5.8';
XLSX.version = '0.5.9';

View File

@ -11,6 +11,8 @@ function parse_comments_xml(data, opts) {
if(x === "" || x.trim() === "") return;
var y = parsexmltag(x.match(/<comment[^>]*>/)[0]);
var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid };
var cell = decode_cell(y.ref);
if(opts.sheetRows && opts.sheetRows <= cell.r) return;
var textMatch = x.match(/<text>([^\u2603]*)<\/text>/m);
if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag.
var rt = parse_si(textMatch[1]);
@ -26,7 +28,7 @@ function parse_comments(zip, dirComments, sheets, sheetRels, opts) {
for(var i = 0; i != dirComments.length; ++i) {
var canonicalpath=dirComments[i];
var comments=parse_comments_xml(getzipdata(zip, canonicalpath.replace(/^\//,''), true), opts);
if(!comments || !comments.length) return;
if(!comments || !comments.length) continue;
// find the sheets targeted by these comments
var sheetNames = Object.keys(sheets);
for(var j = 0; j != sheetNames.length; ++j) {

View File

@ -18,9 +18,9 @@ function parse_ws_xml(data, opts) {
/* 18.3.1.73 row CT_Row */
var row = parsexmltag(x.match(/<row[^>]*>/)[0]);
if(opts.sheetRows && opts.sheetRows < +row.r) return;
if(refguess.s.r > row.r - 1) refguess.s.r = row.r - 1;
if(refguess.e.r < row.r - 1) refguess.e.r = row.r - 1;
/* 18.3.1.4 c CT_Cell */
var cells = x.substr(x.indexOf('>')+1).split(/<c /);
cells.forEach(function(c, idx) { if(c === "" || c.trim() === "") return;
@ -82,6 +82,18 @@ function parse_ws_xml(data, opts) {
});
});
if(!s["!ref"]) s["!ref"] = encode_range(refguess);
if(opts.sheetRows) {
var tmpref = decode_range(s["!ref"]);
if(opts.sheetRows < +tmpref.e.r) {
tmpref.e.r = opts.sheetRows - 1;
if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
s["!fullref"] = s["!ref"];
s["!ref"] = encode_range(tmpref);
}
}
return s;
}

View File

@ -123,13 +123,18 @@ var parse_ws_bin = function(data, opts) {
var s = {};
var ref;
var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };
var pass = false;
var pass = false, end = false;
var row, p, cf;
recordhopper(data, function(val, R) {
if(end) return;
switch(R.n) {
case 'BrtWsDim': ref = val; break;
case 'BrtRowHdr': row = val; break;
case 'BrtRowHdr':
row = val;
if(opts.sheetRows && opts.sheetRows <= row.r) end=true;
break;
case 'BrtFmlaBool':
case 'BrtFmlaError':
@ -154,7 +159,11 @@ var parse_ws_bin = function(data, opts) {
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
} catch(e) { if(opts.WTF) throw e; }
s[encode_cell({c:val[0].c,r:row.r})] = p;
break; // TODO
if(refguess.s.r > row.r) refguess.s.r = row.r;
if(refguess.s.c > val[0].c) refguess.s.c = val[0].c;
if(refguess.e.r < row.r) refguess.e.r = row.r;
if(refguess.e.c < val[0].c) refguess.e.c = val[0].c;
break;
case 'BrtCellBlank': break; // (blank cell)
@ -192,6 +201,18 @@ var parse_ws_bin = function(data, opts) {
}
}, opts);
s["!ref"] = encode_range(ref);
if(opts.sheetRows) {
var tmpref = decode_range(s["!ref"]);
if(opts.sheetRows < +tmpref.e.r) {
tmpref.e.r = opts.sheetRows - 1;
if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
s["!fullref"] = s["!ref"];
s["!ref"] = encode_range(tmpref);
}
}
return s;
};

View File

@ -5,7 +5,7 @@ function fixopts(opts) {
['cellFormula', true], /* emit formulae as .h */
['sheetStubs', false], /* emit empty cells */
['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */
['bookDeps', false], /* parse calculation chains */
['bookSheets', false], /* only try to get sheet names (no Sheets) */
['bookProps', false], /* only try to get properties (no Sheets) */
@ -13,5 +13,8 @@ function fixopts(opts) {
['WTF', false] /* WTF mode (throws errors) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
defaults.forEach(function(d) {
if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1];
if(d[2] === 'n') opts[d[0]] = Number(opts[d[0]]);
});
}

View File

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.5.8",
"version": "0.5.9",
"author": "sheetjs",
"description": "XLSB / XLSX / XLSM parser",
"keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ],

66
test.js
View File

@ -5,7 +5,6 @@ describe('source',function(){it('should load',function(){XLSX=require('./');});}
var opts = {};
if(process.env.WTF) opts.WTF = true;
var ex = [".xlsb", ".xlsm", ".xlsx"];
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
var exp = ex.map(function(x){ return x + ".pending"; });
@ -154,6 +153,42 @@ describe('options', function() {
var wb = XLSX.readFile(dir+'merge_cells.xlsx', {sheetStubs:true});
assert(typeof wb.Sheets.Merge.A2.t !== 'undefined');
});
it('should read all cells by default', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsb');
assert(typeof wb.Sheets.Text.A46 !== 'undefined');
assert(typeof wb.Sheets.Text.B26 !== 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
wb = XLSX.readFile(dir+'formula_stress_test.xlsx');
assert(typeof wb.Sheets.Text.A46 !== 'undefined');
assert(typeof wb.Sheets.Text.B26 !== 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
});
it('sheetRows n=20', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:20});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:20});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 !== 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
});
it('sheetRows n=10', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 === 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10});
assert(typeof wb.Sheets.Text.A46 === 'undefined');
assert(typeof wb.Sheets.Text.B26 === 'undefined');
assert(typeof wb.Sheets.Text.C16 === 'undefined');
assert(typeof wb.Sheets.Text.D2 !== 'undefined');
});
});
describe('book', function() {
it('bookSheets should not generate sheets', function() {
@ -224,7 +259,7 @@ describe('features', function() {
});
});
describe('should have core properties and custom properties parsed', function() {
describe('should parse core properties and custom properties', function() {
var wb;
before(function() {
XLSX = require('./');
@ -242,7 +277,7 @@ describe('features', function() {
});
});
describe('should parse cells with date type', function() {
describe('should parse cells with date type (XLSX/XLSB)', function() {
var wb, ws;
before(function() {
XLSX = require('./');
@ -255,4 +290,29 @@ describe('features', function() {
assert.equal(sheet[3]['てすと'], '2/14/14');
});
});
describe('sheetRows', function() {
it('should use original range if not set', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsb');
assert.equal(wb.Sheets.Text["!ref"],"A1:F49");
wb = XLSX.readFile(dir+'formula_stress_test.xlsx');
assert.equal(wb.Sheets.Text["!ref"],"A1:F49");
});
it('should adjust range if set', function() {
var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10});
assert.equal(wb.Sheets.Text["!fullref"],"A1:F49");
assert.equal(wb.Sheets.Text["!ref"],"A1:F10");
wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10});
assert.equal(wb.Sheets.Text["!fullref"],"A1:F49");
assert.equal(wb.Sheets.Text["!ref"],"A1:F10");
});
it('should not generate comment cells', function() {
var wb = XLSX.readFile(dir+'comments_stress_test.xlsx', {sheetRows:10});
assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34");
assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1");
wb = XLSX.readFile(dir+'comments_stress_test.xlsb', {sheetRows:10});
assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34");
assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1");
});
});
});

View File

@ -8,6 +8,7 @@ named_ranges_2011.xlsb
number_format.xlsb
rich_text_stress.xlsb
time_stress_test_1.xlsb
xlsx-stream-d-date-cell.xlsb
LONumbers-2010.xlsx
LONumbers-2011.xlsx
LONumbers.xlsx
@ -233,6 +234,7 @@ xlrd_test_comments_excel.xlsx
xlrd_test_comments_gdocs.xlsx
xlrd_text_bar.xlsx
חישוב_נקודות_זיכוי.xlsx
xlsx-stream-d-date-cell.xlsx
apachepoi_45431.xlsm
apachepoi_47026.xlsm
apachepoi_47089.xlsm

54
xlsx.js
View File

@ -423,7 +423,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa
make_ssf(SSF);
var XLSX = {};
(function(XLSX){
XLSX.version = '0.5.8';
XLSX.version = '0.5.9';
var current_codepage, current_cptable, cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('codepage');
@ -1242,6 +1242,8 @@ function parse_comments_xml(data, opts) {
if(x === "" || x.trim() === "") return;
var y = parsexmltag(x.match(/<comment[^>]*>/)[0]);
var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid };
var cell = decode_cell(y.ref);
if(opts.sheetRows && opts.sheetRows <= cell.r) return;
var textMatch = x.match(/<text>([^\u2603]*)<\/text>/m);
if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag.
var rt = parse_si(textMatch[1]);
@ -1257,7 +1259,7 @@ function parse_comments(zip, dirComments, sheets, sheetRels, opts) {
for(var i = 0; i != dirComments.length; ++i) {
var canonicalpath=dirComments[i];
var comments=parse_comments_xml(getzipdata(zip, canonicalpath.replace(/^\//,''), true), opts);
if(!comments || !comments.length) return;
if(!comments || !comments.length) continue;
// find the sheets targeted by these comments
var sheetNames = Object.keys(sheets);
for(var j = 0; j != sheetNames.length; ++j) {
@ -1322,9 +1324,9 @@ function parse_ws_xml(data, opts) {
/* 18.3.1.73 row CT_Row */
var row = parsexmltag(x.match(/<row[^>]*>/)[0]);
if(opts.sheetRows && opts.sheetRows < +row.r) return;
if(refguess.s.r > row.r - 1) refguess.s.r = row.r - 1;
if(refguess.e.r < row.r - 1) refguess.e.r = row.r - 1;
/* 18.3.1.4 c CT_Cell */
var cells = x.substr(x.indexOf('>')+1).split(/<c /);
cells.forEach(function(c, idx) { if(c === "" || c.trim() === "") return;
@ -1386,6 +1388,18 @@ function parse_ws_xml(data, opts) {
});
});
if(!s["!ref"]) s["!ref"] = encode_range(refguess);
if(opts.sheetRows) {
var tmpref = decode_range(s["!ref"]);
if(opts.sheetRows < +tmpref.e.r) {
tmpref.e.r = opts.sheetRows - 1;
if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
s["!fullref"] = s["!ref"];
s["!ref"] = encode_range(tmpref);
}
}
return s;
}
@ -1514,13 +1528,18 @@ var parse_ws_bin = function(data, opts) {
var s = {};
var ref;
var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} };
var pass = false;
var pass = false, end = false;
var row, p, cf;
recordhopper(data, function(val, R) {
if(end) return;
switch(R.n) {
case 'BrtWsDim': ref = val; break;
case 'BrtRowHdr': row = val; break;
case 'BrtRowHdr':
row = val;
if(opts.sheetRows && opts.sheetRows <= row.r) end=true;
break;
case 'BrtFmlaBool':
case 'BrtFmlaError':
@ -1545,7 +1564,11 @@ var parse_ws_bin = function(data, opts) {
if(opts.cellNF) p.z = SSF._table[cf.ifmt];
} catch(e) { if(opts.WTF) throw e; }
s[encode_cell({c:val[0].c,r:row.r})] = p;
break; // TODO
if(refguess.s.r > row.r) refguess.s.r = row.r;
if(refguess.s.c > val[0].c) refguess.s.c = val[0].c;
if(refguess.e.r < row.r) refguess.e.r = row.r;
if(refguess.e.c < val[0].c) refguess.e.c = val[0].c;
break;
case 'BrtCellBlank': break; // (blank cell)
@ -1583,6 +1606,18 @@ var parse_ws_bin = function(data, opts) {
}
}, opts);
s["!ref"] = encode_range(ref);
if(opts.sheetRows) {
var tmpref = decode_range(s["!ref"]);
if(opts.sheetRows < +tmpref.e.r) {
tmpref.e.r = opts.sheetRows - 1;
if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r;
if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r;
if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c;
if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c;
s["!fullref"] = s["!ref"];
s["!ref"] = encode_range(tmpref);
}
}
return s;
};
@ -2698,7 +2733,7 @@ function fixopts(opts) {
['cellFormula', true], /* emit formulae as .h */
['sheetStubs', false], /* emit empty cells */
['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */
['bookDeps', false], /* parse calculation chains */
['bookSheets', false], /* only try to get sheet names (no Sheets) */
['bookProps', false], /* only try to get properties (no Sheets) */
@ -2706,7 +2741,10 @@ function fixopts(opts) {
['WTF', false] /* WTF mode (throws errors) */
];
defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; });
defaults.forEach(function(d) {
if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1];
if(d[2] === 'n') opts[d[0]] = Number(opts[d[0]]);
});
}
function parseZip(zip, opts) {
opts = opts || {};