From 5c4b5827b5d4bfc5867f714abfd79dcf2cf494d8 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Tue, 18 Feb 2014 22:03:28 -0500 Subject: [PATCH] version bump 0.5.9: sheetRows partial processing - opts.sheetRows limits parsing; default (0) parses all rows - added -n mode to xlsx2csv to control number of rows - !ref will be adjusted; !fullref holds full range --- Makefile | 2 +- README.md | 8 +++++- bin/xlsx2csv.njs | 2 ++ bits/31_version.js | 2 +- bits/65_comments.js | 4 ++- bits/72_wsxml.js | 14 +++++++++- bits/73_wsbin.js | 27 ++++++++++++++++--- bits/84_defaults.js | 7 +++-- package.json | 2 +- test.js | 66 ++++++++++++++++++++++++++++++++++++++++++--- tests.lst | 2 ++ xlsx.js | 54 +++++++++++++++++++++++++++++++------ 12 files changed, 168 insertions(+), 22 deletions(-) diff --git a/Makefile b/Makefile index a0035ab..4b1c5b0 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ LIB=xlsx DEPS=$(wildcard bits/*.js) TARGET=$(LIB).js -FMT=xlsx xlsm xlsb +FMT=xlsx xlsm xlsb misc $(TARGET): $(DEPS) cat $^ > $@ diff --git a/README.md b/README.md index bd35219..09f5e1d 100644 --- a/README.md +++ b/README.md @@ -79,7 +79,9 @@ The exported `read` and `readFile` functions accept an options argument: | cellHTML | true | Parse rich text and save HTML to the .h field | | cellNF | false | Save number format string to the .z field | | sheetStubs | false | Create cell objects for stub cells | +| sheetRows | 0 | If >0, read the first `sheetRows` rows ** | | bookDeps | false | If true, parse calculation chains | +| bookFiles | false | If true, add raw files to book object ** | | bookProps | false | If true, only parse enough to get book metadata ** | | bookSheets | false | If true, only parse enough to get the sheet names | @@ -89,13 +91,17 @@ The exported `read` and `readFile` functions accept an options argument: - In some cases, sheets may be parsed even if `bookSheets` is false. - `bookSheets` and `bookProps` combine to give both sets of information - `Deps` will be an empty object if `bookDeps` is falsy +- `bookFiles` adds a `keys` array (paths in the ZIP) and a `files` hash (whose + keys are paths and values are objects representing the files) +- `sheetRows-1` rows will be generated when looking at the JSON object output + (since the header row is counted as a row when parsing the data) The defaults are enumerated in bits/84_defaults.js ## Tested Environments - Node 0.8.14, 0.10.1 - - IE 6/7/8/9/10 using Base64 mode (IE10 using HTML5 mode) + - IE 6/7/8/9/10 using Base64 mode (IE10/11 using HTML5 mode) - FF 18 using Base64 or HTML5 mode - Chrome 24 using Base64 or HTML5 mode diff --git a/bin/xlsx2csv.njs b/bin/xlsx2csv.njs index de34e91..aa86184 100755 --- a/bin/xlsx2csv.njs +++ b/bin/xlsx2csv.njs @@ -15,6 +15,7 @@ program .option('-J, --raw-js', 'emit raw JS object rather than CSV (raw numbers)') .option('-F, --field-sep ', 'CSV field separator', ",") .option('-R, --row-sep ', 'CSV row separator', "\n") + .option('-n, --sheet-rows ', 'Number of rows to process (0=all rows)') .option('--dev', 'development mode') .option('--read', 'read but do not print out contents') .option('-q, --quiet', 'quiet mode'); @@ -46,6 +47,7 @@ if(!fs.existsSync(filename)) { var opts = {}, wb; if(program.listSheets) opts.bookSheets = true; +if(program.sheetRows) opts.sheetRows = program.sheetRows; if(program.dev) { X.verbose = 2; diff --git a/bits/31_version.js b/bits/31_version.js index 3573c69..ddf5d6f 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -XLSX.version = '0.5.8'; +XLSX.version = '0.5.9'; diff --git a/bits/65_comments.js b/bits/65_comments.js index 83df24e..25e0113 100644 --- a/bits/65_comments.js +++ b/bits/65_comments.js @@ -11,6 +11,8 @@ function parse_comments_xml(data, opts) { if(x === "" || x.trim() === "") return; var y = parsexmltag(x.match(/]*>/)[0]); var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid }; + var cell = decode_cell(y.ref); + if(opts.sheetRows && opts.sheetRows <= cell.r) return; var textMatch = x.match(/([^\u2603]*)<\/text>/m); if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag. var rt = parse_si(textMatch[1]); @@ -26,7 +28,7 @@ function parse_comments(zip, dirComments, sheets, sheetRels, opts) { for(var i = 0; i != dirComments.length; ++i) { var canonicalpath=dirComments[i]; var comments=parse_comments_xml(getzipdata(zip, canonicalpath.replace(/^\//,''), true), opts); - if(!comments || !comments.length) return; + if(!comments || !comments.length) continue; // find the sheets targeted by these comments var sheetNames = Object.keys(sheets); for(var j = 0; j != sheetNames.length; ++j) { diff --git a/bits/72_wsxml.js b/bits/72_wsxml.js index dcc342b..e0c57a1 100644 --- a/bits/72_wsxml.js +++ b/bits/72_wsxml.js @@ -18,9 +18,9 @@ function parse_ws_xml(data, opts) { /* 18.3.1.73 row CT_Row */ var row = parsexmltag(x.match(/]*>/)[0]); + if(opts.sheetRows && opts.sheetRows < +row.r) return; if(refguess.s.r > row.r - 1) refguess.s.r = row.r - 1; if(refguess.e.r < row.r - 1) refguess.e.r = row.r - 1; - /* 18.3.1.4 c CT_Cell */ var cells = x.substr(x.indexOf('>')+1).split(/ refguess.e.r) tmpref.e.r = refguess.e.r; + if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r; + if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c; + if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c; + s["!fullref"] = s["!ref"]; + s["!ref"] = encode_range(tmpref); + } + } return s; } diff --git a/bits/73_wsbin.js b/bits/73_wsbin.js index 17bdf92..6a0f58d 100644 --- a/bits/73_wsbin.js +++ b/bits/73_wsbin.js @@ -123,13 +123,18 @@ var parse_ws_bin = function(data, opts) { var s = {}; var ref; + var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} }; - var pass = false; + var pass = false, end = false; var row, p, cf; recordhopper(data, function(val, R) { + if(end) return; switch(R.n) { case 'BrtWsDim': ref = val; break; - case 'BrtRowHdr': row = val; break; + case 'BrtRowHdr': + row = val; + if(opts.sheetRows && opts.sheetRows <= row.r) end=true; + break; case 'BrtFmlaBool': case 'BrtFmlaError': @@ -154,7 +159,11 @@ var parse_ws_bin = function(data, opts) { if(opts.cellNF) p.z = SSF._table[cf.ifmt]; } catch(e) { if(opts.WTF) throw e; } s[encode_cell({c:val[0].c,r:row.r})] = p; - break; // TODO + if(refguess.s.r > row.r) refguess.s.r = row.r; + if(refguess.s.c > val[0].c) refguess.s.c = val[0].c; + if(refguess.e.r < row.r) refguess.e.r = row.r; + if(refguess.e.c < val[0].c) refguess.e.c = val[0].c; + break; case 'BrtCellBlank': break; // (blank cell) @@ -192,6 +201,18 @@ var parse_ws_bin = function(data, opts) { } }, opts); s["!ref"] = encode_range(ref); + if(opts.sheetRows) { + var tmpref = decode_range(s["!ref"]); + if(opts.sheetRows < +tmpref.e.r) { + tmpref.e.r = opts.sheetRows - 1; + if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r; + if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r; + if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c; + if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c; + s["!fullref"] = s["!ref"]; + s["!ref"] = encode_range(tmpref); + } + } return s; }; diff --git a/bits/84_defaults.js b/bits/84_defaults.js index 762f681..fddaa04 100644 --- a/bits/84_defaults.js +++ b/bits/84_defaults.js @@ -5,7 +5,7 @@ function fixopts(opts) { ['cellFormula', true], /* emit formulae as .h */ ['sheetStubs', false], /* emit empty cells */ - + ['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */ ['bookDeps', false], /* parse calculation chains */ ['bookSheets', false], /* only try to get sheet names (no Sheets) */ ['bookProps', false], /* only try to get properties (no Sheets) */ @@ -13,5 +13,8 @@ function fixopts(opts) { ['WTF', false] /* WTF mode (throws errors) */ ]; - defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; }); + defaults.forEach(function(d) { + if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; + if(d[2] === 'n') opts[d[0]] = Number(opts[d[0]]); + }); } diff --git a/package.json b/package.json index a14e054..d431b1d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xlsx", - "version": "0.5.8", + "version": "0.5.9", "author": "sheetjs", "description": "XLSB / XLSX / XLSM parser", "keywords": [ "xlsx", "xlsb", "xlsm", "office", "excel", "spreadsheet" ], diff --git a/test.js b/test.js index 05380ed..5be532b 100644 --- a/test.js +++ b/test.js @@ -5,7 +5,6 @@ describe('source',function(){it('should load',function(){XLSX=require('./');});} var opts = {}; if(process.env.WTF) opts.WTF = true; - var ex = [".xlsb", ".xlsm", ".xlsx"]; if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;}); var exp = ex.map(function(x){ return x + ".pending"; }); @@ -154,6 +153,42 @@ describe('options', function() { var wb = XLSX.readFile(dir+'merge_cells.xlsx', {sheetStubs:true}); assert(typeof wb.Sheets.Merge.A2.t !== 'undefined'); }); + it('should read all cells by default', function() { + var wb = XLSX.readFile(dir+'formula_stress_test.xlsb'); + assert(typeof wb.Sheets.Text.A46 !== 'undefined'); + assert(typeof wb.Sheets.Text.B26 !== 'undefined'); + assert(typeof wb.Sheets.Text.C16 !== 'undefined'); + assert(typeof wb.Sheets.Text.D2 !== 'undefined'); + wb = XLSX.readFile(dir+'formula_stress_test.xlsx'); + assert(typeof wb.Sheets.Text.A46 !== 'undefined'); + assert(typeof wb.Sheets.Text.B26 !== 'undefined'); + assert(typeof wb.Sheets.Text.C16 !== 'undefined'); + assert(typeof wb.Sheets.Text.D2 !== 'undefined'); + }); + it('sheetRows n=20', function() { + var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:20}); + assert(typeof wb.Sheets.Text.A46 === 'undefined'); + assert(typeof wb.Sheets.Text.B26 === 'undefined'); + assert(typeof wb.Sheets.Text.C16 !== 'undefined'); + assert(typeof wb.Sheets.Text.D2 !== 'undefined'); + wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:20}); + assert(typeof wb.Sheets.Text.A46 === 'undefined'); + assert(typeof wb.Sheets.Text.B26 === 'undefined'); + assert(typeof wb.Sheets.Text.C16 !== 'undefined'); + assert(typeof wb.Sheets.Text.D2 !== 'undefined'); + }); + it('sheetRows n=10', function() { + var wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10}); + assert(typeof wb.Sheets.Text.A46 === 'undefined'); + assert(typeof wb.Sheets.Text.B26 === 'undefined'); + assert(typeof wb.Sheets.Text.C16 === 'undefined'); + assert(typeof wb.Sheets.Text.D2 !== 'undefined'); + wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10}); + assert(typeof wb.Sheets.Text.A46 === 'undefined'); + assert(typeof wb.Sheets.Text.B26 === 'undefined'); + assert(typeof wb.Sheets.Text.C16 === 'undefined'); + assert(typeof wb.Sheets.Text.D2 !== 'undefined'); + }); }); describe('book', function() { it('bookSheets should not generate sheets', function() { @@ -224,7 +259,7 @@ describe('features', function() { }); }); - describe('should have core properties and custom properties parsed', function() { + describe('should parse core properties and custom properties', function() { var wb; before(function() { XLSX = require('./'); @@ -242,7 +277,7 @@ describe('features', function() { }); }); - describe('should parse cells with date type', function() { + describe('should parse cells with date type (XLSX/XLSB)', function() { var wb, ws; before(function() { XLSX = require('./'); @@ -255,4 +290,29 @@ describe('features', function() { assert.equal(sheet[3]['てすと'], '2/14/14'); }); }); + + describe('sheetRows', function() { + it('should use original range if not set', function() { + var wb = XLSX.readFile(dir+'formula_stress_test.xlsb'); + assert.equal(wb.Sheets.Text["!ref"],"A1:F49"); + wb = XLSX.readFile(dir+'formula_stress_test.xlsx'); + assert.equal(wb.Sheets.Text["!ref"],"A1:F49"); + }); + it('should adjust range if set', function() { + var wb = XLSX.readFile(dir+'formula_stress_test.xlsx', {sheetRows:10}); + assert.equal(wb.Sheets.Text["!fullref"],"A1:F49"); + assert.equal(wb.Sheets.Text["!ref"],"A1:F10"); + wb = XLSX.readFile(dir+'formula_stress_test.xlsb', {sheetRows:10}); + assert.equal(wb.Sheets.Text["!fullref"],"A1:F49"); + assert.equal(wb.Sheets.Text["!ref"],"A1:F10"); + }); + it('should not generate comment cells', function() { + var wb = XLSX.readFile(dir+'comments_stress_test.xlsx', {sheetRows:10}); + assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34"); + assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1"); + wb = XLSX.readFile(dir+'comments_stress_test.xlsb', {sheetRows:10}); + assert.equal(wb.Sheets.Sheet7["!fullref"],"A1:N34"); + assert.equal(wb.Sheets.Sheet7["!ref"],"A1:A1"); + }); + }); }); diff --git a/tests.lst b/tests.lst index 4b304fd..86a9eaf 100644 --- a/tests.lst +++ b/tests.lst @@ -8,6 +8,7 @@ named_ranges_2011.xlsb number_format.xlsb rich_text_stress.xlsb time_stress_test_1.xlsb +xlsx-stream-d-date-cell.xlsb LONumbers-2010.xlsx LONumbers-2011.xlsx LONumbers.xlsx @@ -233,6 +234,7 @@ xlrd_test_comments_excel.xlsx xlrd_test_comments_gdocs.xlsx xlrd_text_bar.xlsx חישוב_נקודות_זיכוי.xlsx +xlsx-stream-d-date-cell.xlsx apachepoi_45431.xlsm apachepoi_47026.xlsm apachepoi_47089.xlsm diff --git a/xlsx.js b/xlsx.js index d1ebfe5..1078f48 100644 --- a/xlsx.js +++ b/xlsx.js @@ -423,7 +423,7 @@ SSF.load_table = function(tbl) { for(var i=0; i!=0x0188; ++i) if(tbl[i]) SSF.loa make_ssf(SSF); var XLSX = {}; (function(XLSX){ -XLSX.version = '0.5.8'; +XLSX.version = '0.5.9'; var current_codepage, current_cptable, cptable; if(typeof module !== "undefined" && typeof require !== 'undefined') { if(typeof cptable === 'undefined') cptable = require('codepage'); @@ -1242,6 +1242,8 @@ function parse_comments_xml(data, opts) { if(x === "" || x.trim() === "") return; var y = parsexmltag(x.match(/]*>/)[0]); var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid }; + var cell = decode_cell(y.ref); + if(opts.sheetRows && opts.sheetRows <= cell.r) return; var textMatch = x.match(/([^\u2603]*)<\/text>/m); if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag. var rt = parse_si(textMatch[1]); @@ -1257,7 +1259,7 @@ function parse_comments(zip, dirComments, sheets, sheetRels, opts) { for(var i = 0; i != dirComments.length; ++i) { var canonicalpath=dirComments[i]; var comments=parse_comments_xml(getzipdata(zip, canonicalpath.replace(/^\//,''), true), opts); - if(!comments || !comments.length) return; + if(!comments || !comments.length) continue; // find the sheets targeted by these comments var sheetNames = Object.keys(sheets); for(var j = 0; j != sheetNames.length; ++j) { @@ -1322,9 +1324,9 @@ function parse_ws_xml(data, opts) { /* 18.3.1.73 row CT_Row */ var row = parsexmltag(x.match(/]*>/)[0]); + if(opts.sheetRows && opts.sheetRows < +row.r) return; if(refguess.s.r > row.r - 1) refguess.s.r = row.r - 1; if(refguess.e.r < row.r - 1) refguess.e.r = row.r - 1; - /* 18.3.1.4 c CT_Cell */ var cells = x.substr(x.indexOf('>')+1).split(/ refguess.e.r) tmpref.e.r = refguess.e.r; + if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r; + if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c; + if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c; + s["!fullref"] = s["!ref"]; + s["!ref"] = encode_range(tmpref); + } + } return s; } @@ -1514,13 +1528,18 @@ var parse_ws_bin = function(data, opts) { var s = {}; var ref; + var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} }; - var pass = false; + var pass = false, end = false; var row, p, cf; recordhopper(data, function(val, R) { + if(end) return; switch(R.n) { case 'BrtWsDim': ref = val; break; - case 'BrtRowHdr': row = val; break; + case 'BrtRowHdr': + row = val; + if(opts.sheetRows && opts.sheetRows <= row.r) end=true; + break; case 'BrtFmlaBool': case 'BrtFmlaError': @@ -1545,7 +1564,11 @@ var parse_ws_bin = function(data, opts) { if(opts.cellNF) p.z = SSF._table[cf.ifmt]; } catch(e) { if(opts.WTF) throw e; } s[encode_cell({c:val[0].c,r:row.r})] = p; - break; // TODO + if(refguess.s.r > row.r) refguess.s.r = row.r; + if(refguess.s.c > val[0].c) refguess.s.c = val[0].c; + if(refguess.e.r < row.r) refguess.e.r = row.r; + if(refguess.e.c < val[0].c) refguess.e.c = val[0].c; + break; case 'BrtCellBlank': break; // (blank cell) @@ -1583,6 +1606,18 @@ var parse_ws_bin = function(data, opts) { } }, opts); s["!ref"] = encode_range(ref); + if(opts.sheetRows) { + var tmpref = decode_range(s["!ref"]); + if(opts.sheetRows < +tmpref.e.r) { + tmpref.e.r = opts.sheetRows - 1; + if(tmpref.e.r > refguess.e.r) tmpref.e.r = refguess.e.r; + if(tmpref.e.r < tmpref.s.r) tmpref.s.r = tmpref.e.r; + if(tmpref.e.c > refguess.e.c) tmpref.e.c = refguess.e.c; + if(tmpref.e.c < tmpref.s.c) tmpref.s.c = tmpref.e.c; + s["!fullref"] = s["!ref"]; + s["!ref"] = encode_range(tmpref); + } + } return s; }; @@ -2698,7 +2733,7 @@ function fixopts(opts) { ['cellFormula', true], /* emit formulae as .h */ ['sheetStubs', false], /* emit empty cells */ - + ['sheetRows', 0, 'n'], /* read n rows (0 = read all rows) */ ['bookDeps', false], /* parse calculation chains */ ['bookSheets', false], /* only try to get sheet names (no Sheets) */ ['bookProps', false], /* only try to get properties (no Sheets) */ @@ -2706,7 +2741,10 @@ function fixopts(opts) { ['WTF', false] /* WTF mode (throws errors) */ ]; - defaults.forEach(function(d) { if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; }); + defaults.forEach(function(d) { + if(typeof opts[d[0]] === 'undefined') opts[d[0]] = d[1]; + if(d[2] === 'n') opts[d[0]] = Number(opts[d[0]]); + }); } function parseZip(zip, opts) { opts = opts || {};