improved plaintext parsing

- fuzzy number check disregards commas (fixes #709 h/t @mz121star)
- updated to CFB 0.12.1
- parse_dom_table and html_to_sheet raw option
- canvas-datagrid demo
- docs and TS definition update
This commit is contained in:
SheetJS 2017-08-09 18:38:23 -04:00
parent 5855bcb678
commit 9ba09bab5e
23 changed files with 609 additions and 133 deletions

View File

@ -41,7 +41,6 @@ enhancements, additional features by request, and dedicated support.
[![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx)
[![Build Status](https://semaphoreci.com/api/v1/sheetjs/js-xlsx/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/js-xlsx)
[![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-xlsx/master.svg)](https://coveralls.io/r/SheetJS/js-xlsx?branch=master)
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx?ref=badge_shield)
[![Dependencies Status](https://david-dm.org/sheetjs/js-xlsx/status.svg)](https://david-dm.org/sheetjs/js-xlsx)
[![NPM Downloads](https://img.shields.io/npm/dt/xlsx.svg)](https://npmjs.org/package/xlsx)
[![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-xlsx)](https://ghit.me/repo/sheetjs/js-xlsx)
@ -171,6 +170,7 @@ The `demos` directory includes sample projects for:
- [`angular 1.x`](demos/angular/)
- [`angular 2.x / 4.x`](demos/angular2/)
- [`browserify`](demos/browserify/)
- [`canvas-datagrid`](demos/datagrid/)
- [`Adobe ExtendScript`](demos/extendscript/)
- [`meteor`](demos/meteor/)
- [`phantomjs`](demos/phantomjs/)
@ -1639,6 +1639,15 @@ as strings.
`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet.
Both functions accept options arguments:
| Option Name | Default | Description |
| :---------- | :------: | :-------------------------------------------------- |
| dateNF | fmt 14 | Use specified date format in string output |
| cellDates | false | Store dates as type `d` (default is `n`) |
| raw | | If true, every cell will hold raw strings |
<details>
<summary><b>Examples</b> (click to show)</summary>

View File

@ -35,7 +35,7 @@ type CFBFiles = {[n:string]:CFBEntry};
/* [MS-CFB] v20130118 */
var CFB = (function _CFB(){
var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/;
exports.version = '0.12.0';
exports.version = '0.12.1';
function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ {
var mver = 3; // major version
var ssz = 512; // sector size
@ -206,7 +206,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra
if(FI[i].type === 0 /* unknown */) continue;
j = dad[i];
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
else while(j !== 0) {
else while(j !== 0 && j !== dad[j]) {
FP[i] = FP[j] + "/" + FP[i];
j = dad[j];
}
@ -307,7 +307,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/;
prep_blob(blob, 64);
namelen = blob.read_shift(2);
if(namelen === 0) continue;
name = __utf16le(blob,0,namelen-pl);
Paths.push(name);
var o/*:CFBEntry*/ = ({
@ -328,6 +327,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
o.start = blob.read_shift(4, 'i');
o.size = blob.read_shift(4, 'i');
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
if(o.type === 5) { /* root */
minifat_store = o.start;
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
@ -340,7 +340,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
prep_blob(o.content, 0);
} else {
o.storage = 'minifat';
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/);
prep_blob(o.content, 0);
}
@ -369,6 +369,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) {
return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options);
}
function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ {
return cfb.find(path);
}
/** CFB Constants */
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
@ -394,6 +397,7 @@ var consts = {
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
};
exports.find = find;
exports.read = readSync;
exports.parse = parse;
exports.utils = {

View File

@ -114,6 +114,13 @@ function dup(o/*:any*/)/*:any*/ {
function fill(c/*:string*/,l/*:number*/)/*:string*/ { var o = ""; while(o.length < l) o+=c; return o; }
/* TODO: stress test */
function fuzzynum(s/*:string*/)/*:number*/ {
var v/*:number*/ = Number(s);
if(!isNaN(v)) return v;
var ss = s.replace(/([\d]),([\d])/g,"$1$2").replace(/[$]/g,"");
if(!isNaN(v = Number(ss))) return v;
return v;
}
function fuzzydate(s/*:string*/)/*:Date*/ {
var o = new Date(s), n = new Date(NaN);
var y = o.getYear(), m = o.getMonth(), d = o.getDate();

View File

@ -234,8 +234,8 @@ var SYLK = (function() {
if(val.charAt(0) === '"') val = val.substr(1,val.length - 2);
else if(val === 'TRUE') val = true;
else if(val === 'FALSE') val = false;
else if(+val === +val) {
val = +val;
else if(!isNaN(fuzzynum(val))) {
val = fuzzynum(val);
if(next_cell_format !== null && SSF.is_date(next_cell_format)) val = numdate(val);
} else if(!isNaN(fuzzydate(val).getDate())) {
val = parseDate(val);
@ -396,7 +396,7 @@ var DIF = (function() {
case 0:
if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
else if(+value == +value) arr[R][C] = +value;
else if(!isNaN(fuzzynum(value))) arr[R][C] = fuzzynum(value);
else if(!isNaN(fuzzydate(value).getDate())) arr[R][C] = parseDate(value);
else arr[R][C] = value;
++C; break;
@ -482,7 +482,7 @@ var PRN = (function() {
else if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
else if(data === ""){/* empty */}
else if(+data == +data) arr[R][C] = +data;
else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data);
else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data);
else arr[R][C] = data;
}
@ -533,7 +533,7 @@ var PRN = (function() {
else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
else if(!isNaN(v = +s)) { cell.t = 'n'; cell.w = s; cell.v = v; }
else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) {
cell.z = o.dateNF || SSF._table[14];
var k = 0;

View File

@ -811,11 +811,11 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
/* TODO: WTF */
function parse_props(cfb) {
/* [MS-OSHARED] 2.3.3.2.2 Document Summary Information Property Set */
var DSI = cfb.find('!DocumentSummaryInformation');
var DSI = CFB.find(cfb, '!DocumentSummaryInformation');
if(DSI) try { cfb.DocSummary = parse_PropertySetStream(DSI, DocSummaryPIDDSI); } catch(e) {/* empty */}
/* [MS-OSHARED] 2.3.3.2.1 Summary Information Property Set*/
var SI = cfb.find('!SummaryInformation');
var SI = CFB.find(cfb, '!SummaryInformation');
if(SI) try { cfb.Summary = parse_PropertySetStream(SI, SummaryPIDSI); } catch(e) {/* empty */}
}
@ -823,27 +823,28 @@ function parse_xlscfb(cfb/*:any*/, options/*:?ParseOpts*/)/*:Workbook*/ {
if(!options) options = {};
fix_read_opts(options);
reset_cp();
var CompObj, Summary, Workbook/*:?any*/;
var CompObj, Summary, WB/*:?any*/;
if(cfb.FullPaths) {
CompObj = cfb.find('!CompObj');
Summary = cfb.find('!SummaryInformation');
Workbook = cfb.find('/Workbook');
CompObj = CFB.find(cfb, '!CompObj');
Summary = CFB.find(cfb, '!SummaryInformation');
WB = CFB.find(cfb, '/Workbook');
} else {
prep_blob(cfb, 0);
Workbook = ({content: cfb}/*:any*/);
WB = ({content: cfb}/*:any*/);
}
if(!Workbook) Workbook = cfb.find('/Book');
if(!WB) WB = CFB.find(cfb, '/Book');
var CompObjP, SummaryP, WorkbookP/*:Workbook*/;
var _data/*:?any*/;
if(CompObj) CompObjP = parse_compobj(CompObj);
if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/);
else {
if(Workbook) WorkbookP = parse_workbook(Workbook.content, options);
if(WB && WB.content) WorkbookP = parse_workbook(WB.content, options);
/* Quattro Pro 7-8 */
else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options);
else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options);
/* Quattro Pro 9 */
else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options);
else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options);
else throw new Error("Cannot find Workbook stream");
}

View File

@ -37,12 +37,14 @@ var HTML_ = (function() {
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m};
if(opts.raw) ws[R][C] = {t:'s', v:m};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m};
if(opts.raw) ws[coord] = {t:'s', v:m};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
}
C += CS;
@ -134,7 +136,8 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
if(v != null && v.length) {
if(!isNaN(Number(v))) o = {t:'n', v:Number(v)};
if(opts.raw) o = {t:'s', v:v};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
else if(!isNaN(fuzzydate(v).getDate())) {
o = ({t:'d', v:parseDate(v)}/*:any*/);
if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/);

View File

@ -172,20 +172,20 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
/* references to [MS-OFFCRYPTO] */
function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ {
var f = 'Version';
var data = cfb.find(f);
var data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var version = parse_DataSpaceVersionInfo(data.content);
/* 2.3.4.1 */
f = 'DataSpaceMap';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var dsm = parse_DataSpaceMap(data.content);
if(dsm.length != 1 || dsm[0].comps.length != 1 || dsm[0].comps[0].t != 0 || dsm[0].name != "StrongEncryptionDataSpace" || dsm[0].comps[0].v != "EncryptedPackage")
throw new Error("ECMA-376 Encrypted file bad " + f);
f = 'StrongEncryptionDataSpace';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var seds = parse_DataSpaceDefinition(data.content);
if(seds.length != 1 || seds[0] != "StrongEncryptionTransform")
@ -193,12 +193,12 @@ function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ {
/* 2.3.4.3 */
f = '!Primary';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var hdr = parse_Primary(data.content);
f = 'EncryptionInfo';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var einfo = parse_EncryptionInfo(data.content);

View File

@ -10,8 +10,8 @@ function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array<number>*/ {
return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)];
}
function read_cfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(cfb.find("EncryptedPackage")) return parse_xlsxcfb(cfb, opts);
function read_cfb(cfb/*:CFBContainer*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(CFB.find(cfb, "EncryptedPackage")) return parse_xlsxcfb(cfb, opts);
return parse_xlscfb(cfb, opts);
}

View File

@ -1,8 +1,8 @@
function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/) {
if(sheet == null || sheet["!ref"] == null) return [];
var val = {t:'n',v:0}, header = 0, offset = 1, hdr/*:Array<any>*/ = [], isempty = true, v=0, vv="";
var r = {s:{r:0,c:0},e:{r:0,c:0}};
var o = opts != null ? opts : {};
var o = opts || {};
var raw = o.raw;
var defval = o.defval;
var range = o.range != null ? o.range : sheet["!ref"];
@ -16,8 +16,8 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
}
if(header > 0) offset = 0;
var rr = encode_row(r.s.r);
var cols = new Array(r.e.c-r.s.c+1);
var out = new Array(r.e.r-r.s.r-offset+1);
var cols/*:Array<string>*/ = [];
var out/*:Array<any>*/ = [];
var outi = 0, counter = 0;
var dense = Array.isArray(sheet);
var R = r.s.r, C = 0, CC = 0;
@ -37,7 +37,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
hdr[C] = vv;
}
}
var row = (header === 1) ? [] : {};
var row/*:any*/ = (header === 1) ? [] : {};
for (R = r.s.r + offset; R <= r.e.r; ++R) {
rr = encode_row(R);
isempty = true;
@ -81,7 +81,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
var qreg = /"/g;
function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array<string>*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ {
var isempty = true;
var row = [], txt = "", rr = encode_row(R);
var row/*:Array<string>*/ = [], txt = "", rr = encode_row(R);
for(var C = r.s.c; C <= r.e.c; ++C) {
if (!cols[C]) continue;
var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr];

67
demos/datagrid/README.md Normal file
View File

@ -0,0 +1,67 @@
# canvas-datagrid
The `sheet_to_json` utility function generates output arrays suitable for use
with other JS libraries such as data grids for previewing data. After extensive
testing, [`canvas-datagrid`](https://tonygermaneri.github.io/canvas-datagrid/)
stood out as a very high-performance grid with an incredibly simple API.
## Obtaining the Library
The [`canvas-datagrid` npm nodule](http://npm.im/canvas-datagrid) includes a
minified script `dist/canvas-datagrid.js` that can be directly inserted as a
script tag. The unpkg CDN also exposes the latest version:
```html
<script src="https://unpkg.com/canvas-datagrid/dist/canvas-datagrid.js"></script>
```
## Previewing Data
The HTML document needs a container element:
```html
<div id="gridctr"></div>
```
Grid initialization is a one-liner:
```js
var grid = canvasDatagrid({
parentNode: document.getElementById('gridctr'),
data: []
});
```
Once the workbook is read and the worksheet is selected, assigning the data
variable automatically updates the view:
```js
grid.data = XLSX.utils.sheet_to_json(ws, {header:1});
```
This demo previews the first worksheet, but it is easy to add buttons and other
features to support multiple worksheets.
## Editing
The library handles the whole edit cycle. No intervention is necessary.
## Saving Data
`grid.data` is immediately readable and can be converted back to a worksheet:
```js
/* build worksheet from the grid data */
var ws = XLSX.utils.aoa_to_sheet(grid.data);
/* build up workbook */
var wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, 'SheetJS');
/* .. generate download (see documentation for examples) .. */
```
## Additional Features
This demo barely scratches the surface. The underlying grid component includes
many additional features including massive data streaming, sorting and styling.

205
demos/datagrid/index.html Normal file
View File

@ -0,0 +1,205 @@
<!DOCTYPE html>
<!-- xlsx.js (C) 2013-present SheetJS http://sheetjs.com -->
<!-- vim: set ts=2: -->
<html>
<head>
<meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
<title>SheetJS + canvas-datagrid Live Demo</title>
<style>
#drop{
border:2px dashed #bbb;
-moz-border-radius:5px;
-webkit-border-radius:5px;
border-radius:5px;
padding:25px;
text-align:center;
font:20pt bold,"Vollkorn";color:#bbb
}
#b64data{
width:100%;
}
a { text-decoration: none }
</style>
</head>
<body>
<pre>
<b><a href="http://sheetjs.com">SheetJS Data Preview Live Demo</a></b>
<a href="https://github.com/TonyGermaneri/canvas-datagrid">canvas-datagrid component library</a>
<a href="https://github.com/SheetJS/js-xlsx">Source Code Repo</a>
<a href="https://github.com/SheetJS/js-xlsx/issues">Issues? Something look weird? Click here and report an issue</a>
<div id="drop">Drop a spreadsheet file here to see sheet data</div>
<input type="file" name="xlfile" id="xlf" /> ... or click here to select a file
<textarea id="b64data">... or paste a base64-encoding here</textarea>
<input type="button" id="dotext" value="Click here to process the base64 text" onclick="b64it();"/><br />
<b>Advanced Demo Options:</b>
Use readAsBinaryString: (when available) <input type="checkbox" name="userabs" checked>
</pre>
<p><input type="submit" value="Export to XLSX!" id="xport" onclick="doit();" disabled="true"></p>
<div id="htmlout"></div>
<br />
<script src="https://unpkg.com/canvas-datagrid/dist/canvas-datagrid.js"></script>
<script type="text/javascript" src="https://rawgit.com/eligrey/Blob.js/master/Blob.js"></script>
<script type="text/javascript" src="https://rawgit.com/eligrey/FileSaver.js/master/FileSaver.js"></script>
<script src="xlsx.full.min.js"></script>
<script>
/*jshint browser:true */
/* eslint-env browser */
/* eslint no-use-before-define:0 */
/*global Uint8Array, Uint16Array, ArrayBuffer */
/*global XLSX */
var X = XLSX;
var rABS = typeof FileReader !== "undefined" && typeof FileReader.prototype !== "undefined" && typeof FileReader.prototype.readAsBinaryString !== "undefined";
if(!rABS) {
document.getElementsByName("userabs")[0].disabled = true;
document.getElementsByName("userabs")[0].checked = false;
}
var wtf_mode = false;
function fixdata(data) {
var o = "", l = 0, w = 10240;
for(; l<data.byteLength/w; ++l) o+=String.fromCharCode.apply(null,new Uint8Array(data.slice(l*w,l*w+w)));
o+=String.fromCharCode.apply(null, new Uint8Array(data.slice(l*w)));
return o;
}
function ab2str(data) {
var o = "", l = 0, w = 10240;
for(; l<data.byteLength/w; ++l) o+=String.fromCharCode.apply(null,new Uint16Array(data.slice(l*w,l*w+w)));
o+=String.fromCharCode.apply(null, new Uint16Array(data.slice(l*w)));
return o;
}
function s2ab(s) {
var b = new ArrayBuffer(s.length), v = new Uint8Array(b);
for (var i=0; i != s.length; ++i) v[i] = s.charCodeAt(i) & 0xFF;
return b;
}
function get_radio_value( radioName ) {
var radios = document.getElementsByName( radioName );
for( var i = 0; i < radios.length; i++ ) {
if( radios[i].checked || radios.length === 1 ) {
return radios[i].value;
}
}
}
var tarea = document.getElementById('b64data');
function b64it() {
if(typeof console !== 'undefined') console.log("onload", new Date());
var wb = X.read(tarea.value, {type: 'base64',WTF:wtf_mode});
process_wb(wb);
}
window.b64it = b64it;
var global_wb;
var cDg;
function process_wb(wb) {
global_wb = wb;
var ws = wb.Sheets[wb.SheetNames[0]];
var data = XLSX.utils.sheet_to_json(ws, {header:1});
if(!cDg) cDg = canvasDatagrid({ parentNode:document.getElementById('htmlout'), data:data });
else cDg.data = data;
document.getElementById('xport').disabled = false;
if(typeof console !== 'undefined') console.log("output", new Date());
}
function doit() {
var new_wb = XLSX.utils.book_new();
var new_ws = XLSX.utils.aoa_to_sheet(cDg.data);
XLSX.utils.book_append_sheet(new_wb, new_ws, 'SheetJS');
var wbout = XLSX.write(new_wb, {bookType:'xlsx', bookSST:true, type:'binary'});
var fname = 'sheetjs.xlsx';
try {
saveAs(new Blob([s2ab(wbout)],{type:"application/octet-stream"}), fname);
} catch(e) { if(typeof console != 'undefined') console.log(e, wbout); }
}
var drop = document.getElementById('drop');
function handleDrop(e) {
e.stopPropagation();
e.preventDefault();
rABS = document.getElementsByName("userabs")[0].checked;
var files = e.dataTransfer.files;
var f = files[0];
{
var reader = new FileReader();
//var name = f.name;
reader.onload = function(e) {
if(typeof console !== 'undefined') console.log("onload", new Date(), rABS);
var data = e.target.result;
{
var wb;
if(rABS) {
wb = X.read(data, {type: 'binary'});
} else {
var arr = fixdata(data);
wb = X.read(btoa(arr), {type: 'base64'});
}
process_wb(wb);
}
};
if(rABS) reader.readAsBinaryString(f);
else reader.readAsArrayBuffer(f);
}
}
function handleDragover(e) {
e.stopPropagation();
e.preventDefault();
e.dataTransfer.dropEffect = 'copy';
}
if(drop.addEventListener) {
drop.addEventListener('dragenter', handleDragover, false);
drop.addEventListener('dragover', handleDragover, false);
drop.addEventListener('drop', handleDrop, false);
}
var xlf = document.getElementById('xlf');
function handleFile(e) {
rABS = document.getElementsByName("userabs")[0].checked;
var files = e.target.files;
var f = files[0];
{
var reader = new FileReader();
//var name = f.name;
reader.onload = function(e) {
if(typeof console !== 'undefined') console.log("onload", new Date(), rABS);
var data = e.target.result;
{
var wb;
if(rABS) {
wb = X.read(data, {type: 'binary'});
} else {
var arr = fixdata(data);
wb = X.read(btoa(arr), {type: 'base64'});
}
process_wb(wb);
}
};
if(rABS) reader.readAsBinaryString(f);
else reader.readAsArrayBuffer(f);
}
}
if(xlf.addEventListener) xlf.addEventListener('change', handleFile, false);
</script>
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-36810333-1']);
_gaq.push(['_trackPageview']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
</body>
</html>

1
demos/datagrid/xlsx.full.min.js vendored Symbolic link
View File

@ -0,0 +1 @@
../../dist/xlsx.full.min.js

View File

@ -41,7 +41,6 @@ enhancements, additional features by request, and dedicated support.
[![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx)
[![Build Status](https://semaphoreci.com/api/v1/sheetjs/js-xlsx/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/js-xlsx)
[![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-xlsx/master.svg)](https://coveralls.io/r/SheetJS/js-xlsx?branch=master)
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx?ref=badge_shield)
[![Dependencies Status](https://david-dm.org/sheetjs/js-xlsx/status.svg)](https://david-dm.org/sheetjs/js-xlsx)
[![NPM Downloads](https://img.shields.io/npm/dt/xlsx.svg)](https://npmjs.org/package/xlsx)
[![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-xlsx)](https://ghit.me/repo/sheetjs/js-xlsx)

View File

@ -28,6 +28,7 @@ The `demos` directory includes sample projects for:
- [`angular 1.x`](demos/angular/)
- [`angular 2.x / 4.x`](demos/angular2/)
- [`browserify`](demos/browserify/)
- [`canvas-datagrid`](demos/datagrid/)
- [`Adobe ExtendScript`](demos/extendscript/)
- [`meteor`](demos/meteor/)
- [`phantomjs`](demos/phantomjs/)

View File

@ -77,6 +77,15 @@ as strings.
`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet.
Both functions accept options arguments:
| Option Name | Default | Description |
| :---------- | :------: | :-------------------------------------------------- |
| dateNF | fmt 14 | Use specified date format in string output |
| cellDates | false | Store dates as type `d` (default is `n`) |
| raw | | If true, every cell will hold raw strings |
<details>
<summary><b>Examples</b> (click to show)</summary>

View File

@ -38,7 +38,6 @@ enhancements, additional features by request, and dedicated support.
[![Build Status](https://travis-ci.org/SheetJS/js-xlsx.svg?branch=master)](https://travis-ci.org/SheetJS/js-xlsx)
[![Build Status](https://semaphoreci.com/api/v1/sheetjs/js-xlsx/branches/master/shields_badge.svg)](https://semaphoreci.com/sheetjs/js-xlsx)
[![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-xlsx/master.svg)](https://coveralls.io/r/SheetJS/js-xlsx?branch=master)
[![FOSSA Status](https://app.fossa.io/api/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx.svg?type=shield)](https://app.fossa.io/projects/git%2Bhttps%3A%2F%2Fgithub.com%2FSheetJS%2Fjs-xlsx?ref=badge_shield)
[![Dependencies Status](https://david-dm.org/sheetjs/js-xlsx/status.svg)](https://david-dm.org/sheetjs/js-xlsx)
[![NPM Downloads](https://img.shields.io/npm/dt/xlsx.svg)](https://npmjs.org/package/xlsx)
[![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-xlsx)](https://ghit.me/repo/sheetjs/js-xlsx)
@ -165,6 +164,7 @@ The `demos` directory includes sample projects for:
- [`angular 1.x`](demos/angular/)
- [`angular 2.x / 4.x`](demos/angular2/)
- [`browserify`](demos/browserify/)
- [`canvas-datagrid`](demos/datagrid/)
- [`Adobe ExtendScript`](demos/extendscript/)
- [`meteor`](demos/meteor/)
- [`phantomjs`](demos/phantomjs/)
@ -1516,6 +1516,15 @@ as strings.
`XLSX.utils.table_to_book` produces a minimal workbook based on the worksheet.
Both functions accept options arguments:
| Option Name | Default | Description |
| :---------- | :------: | :-------------------------------------------------- |
| dateNF | fmt 14 | Use specified date format in string output |
| cellDates | false | Store dates as type `d` (default is `n`) |
| raw | | If true, every cell will hold raw strings |
To generate the example sheet, start with the HTML table:

View File

@ -45,6 +45,7 @@ type SSFModule = {
type CFBModule = {
version:string;
find:(cfb:CFBContainer, path:string)=>?CFBEntry;
read:(blob:RawBytes|string, opts:CFBReadOpts)=>CFBContainer;
parse:(file:RawBytes, opts:CFBReadOpts)=>CFBContainer;
utils:CFBUtils;

View File

@ -20,7 +20,7 @@
"exit-on-epipe":"~1.0.1",
"ssf":"~0.10.1",
"codepage":"~1.11.0",
"cfb":"~0.12.0",
"cfb":"~0.12.1",
"crc-32":"~1.1.0",
"adler-32":"~1.1.0",
"commander":"~2.11.0"

26
test.js
View File

@ -1845,6 +1845,32 @@ describe('csv', function() {
});
});
describe('HTML', function() {
describe('input', function(){
var b = "<table><tr><td>1</td><td>4,001</td></tr><tr><td>$41.08</td></tr></table>";
it('should generate numbers by default', function() {
var sheet = X.read(b, {type:"binary"}).Sheets.Sheet1
var cell = get_cell(sheet, "A1");
assert.equal(cell.v, 1);
assert.equal(cell.t, 'n');
cell = get_cell(sheet, "B1");
assert.equal(cell.v, 4001);
cell = get_cell(sheet, "A2");
assert.equal(cell.v, 41.08);
});
it('should generate strings if raw option is passed', function() {
var sheet = X.read(b, {type:"binary", raw:true}).Sheets.Sheet1
var cell = get_cell(sheet, "A1");
assert.equal(cell.v, "1");
assert.equal(cell.t, 's');
cell = get_cell(sheet, "B1");
assert.equal(cell.v, "4,001");
cell = get_cell(sheet, "A2");
assert.equal(cell.v, "$41.08");
});
});
});
describe('js -> file -> js', function() {
var data, ws, wb, BIN="binary";
var bef = (function() {

111
types/doc.ts Normal file
View File

@ -0,0 +1,111 @@
import * as XLSX from 'xlsx';
import * as fs from 'fs';
const version: string = XLSX.version;
const SSF = XLSX.SSF;
let read_opts: XLSX.ParsingOptions = {
type: "buffer",
raw: false,
cellFormula: false,
cellHTML: false,
cellNF: false,
cellStyles: false,
cellText: false,
cellDates: false,
dateNF: "yyyy-mm-dd",
sheetStubs: false,
sheetRows: 3,
bookDeps: false,
bookFiles: false,
bookProps: false,
bookSheets: false,
bookVBA: false,
password: "",
WTF: false
};
let write_opts: XLSX.WritingOptions = {
type: "buffer",
cellDates: false,
bookSST: false,
bookType: "xlsx",
sheet: "Sheet1",
compression: false,
Props: {
Author: "Someone",
Company: "SheetJS LLC"
}
};
const wb1 = XLSX.readFile("sheetjs.xls", read_opts);
XLSX.writeFile(wb1, "sheetjs.new.xlsx", write_opts);
read_opts.type = "binary";
const wb2 = XLSX.read("1,2,3\n4,5,6", read_opts);
write_opts.type = "binary";
const out2 = XLSX.write(wb2, write_opts);
read_opts.type = "buffer";
const wb3 = XLSX.read(fs.readFileSync("sheetjs.xlsx"), read_opts);
write_opts.type = "base64";
const out3 = XLSX.write(wb3, write_opts);
const ws1 = XLSX.utils.aoa_to_sheet([
"SheetJS".split(""),
[1,2,3,4,5,6,7],
[2,3,4,5,6,7,8]
], {
dateNF: "yyyy-mm-dd",
cellDates: true,
sheetStubs: false
});
const ws2 = XLSX.utils.json_to_sheet([
{S:1,h:2,e:3,e_1:4,t:5,J:6,S_1:7},
{S:2,h:3,e:4,e_1:5,t:6,J:7,S_1:8}
], {
header:["S","h","e","e_1","t","J","S_1"],
cellDates: true,
dateNF: "yyyy-mm-dd"
});
const tbl = {}; /* document.getElementById('table'); */
const ws3 = XLSX.utils.table_to_sheet(tbl, {
raw: true,
cellDates: true,
dateNF: "yyyy-mm-dd"
});
const obj1 = XLSX.utils.sheet_to_formulae(ws1);
const str1: string = XLSX.utils.sheet_to_csv(ws2, {
FS: "\t",
RS: "|",
dateNF: "yyyy-mm-dd",
strip: true,
blankrows: true,
skipHidden: true
});
const html1: string = XLSX.utils.sheet_to_html(ws3, {
editable: false
});
const arr1: object[] = XLSX.utils.sheet_to_json(ws1, {
raw: true,
range: 1,
header: "A",
dateNF: "yyyy-mm-dd",
defval: 0,
blankrows: true
});
const arr2: any[][] = XLSX.utils.sheet_to_json<any[][]>(ws2, {
header: 1
});
const arr3: any[] = XLSX.utils.sheet_to_json(ws3, {
header: ["Sheet", "JS", "Rocks"]
});

41
types/index.d.ts vendored
View File

@ -83,6 +83,11 @@ export interface CommonOptions {
cellDates?: boolean;
}
export interface DateNFOption {
/** Use specified date format */
dateNF?: NumberFormat;
}
/** Options for read and readFile */
export interface ParsingOptions extends CommonOptions {
/** Input data encoding */
@ -168,6 +173,9 @@ export interface ParsingOptions extends CommonOptions {
* @default ''
*/
password?: string;
/* If true, plaintext parsing will not parse values */
raw?: boolean;
}
/** Options for write and writeFile */
@ -541,16 +549,13 @@ export interface Range {
e: CellAddress;
}
export interface Sheet2CSVOpts {
export interface Sheet2CSVOpts extends DateNFOption {
/** Field Separator ("delimiter") */
FS?: string;
/** Record Separator ("row separator") */
RS?: string;
/** Use specified date format */
dateNF?: NumberFormat;
/** Remove trailing field separators in each record */
strip?: boolean;
@ -572,10 +577,7 @@ export interface Sheet2HTMLOpts {
footer?: string;
}
export interface Sheet2JSONOpts {
/** Use specified format for date cells */
dateNF?: NumberFormat;
export interface Sheet2JSONOpts extends DateNFOption {
/** Output format */
header?: "A"|number|string[];
@ -592,16 +594,7 @@ export interface Sheet2JSONOpts {
raw?: boolean;
}
export interface AOA2SheetOpts {
/** Use specified format for date cells */
dateNF?: NumberFormat;
/**
* Store dates as type d (default is n)
* @default false
*/
cellDates?: boolean;
export interface AOA2SheetOpts extends CommonOptions, DateNFOption {
/**
* Create cell objects for stub cells
* @default false
@ -609,14 +602,14 @@ export interface AOA2SheetOpts {
sheetStubs?: boolean;
}
export interface JSON2SheetOpts {
/** Use specified date format */
dateNF?: NumberFormat;
export interface JSON2SheetOpts extends CommonOptions, DateNFOption {
/** Use specified column order */
header?: string[];
}
export interface Table2SheetOpts {
/** Use specified date format */
dateNF?: NumberFormat;
export interface Table2SheetOpts extends CommonOptions, DateNFOption {
/* If true, plaintext parsing will not parse values */
raw?: boolean;
}
/** General utilities */

View File

@ -1088,7 +1088,7 @@ type CFBFiles = {[n:string]:CFBEntry};
/* [MS-CFB] v20130118 */
var CFB = (function _CFB(){
var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/;
exports.version = '0.12.0';
exports.version = '0.12.1';
function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ {
var mver = 3; // major version
var ssz = 512; // sector size
@ -1259,7 +1259,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra
if(FI[i].type === 0 /* unknown */) continue;
j = dad[i];
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
else while(j !== 0) {
else while(j !== 0 && j !== dad[j]) {
FP[i] = FP[j] + "/" + FP[i];
j = dad[j];
}
@ -1360,7 +1360,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/;
prep_blob(blob, 64);
namelen = blob.read_shift(2);
if(namelen === 0) continue;
name = __utf16le(blob,0,namelen-pl);
Paths.push(name);
var o/*:CFBEntry*/ = ({
@ -1381,6 +1380,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
o.start = blob.read_shift(4, 'i');
o.size = blob.read_shift(4, 'i');
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
if(o.type === 5) { /* root */
minifat_store = o.start;
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
@ -1393,7 +1393,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
prep_blob(o.content, 0);
} else {
o.storage = 'minifat';
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/);
prep_blob(o.content, 0);
}
@ -1422,6 +1422,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) {
return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options);
}
function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ {
return cfb.find(path);
}
/** CFB Constants */
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
@ -1447,6 +1450,7 @@ var consts = {
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
};
exports.find = find;
exports.read = readSync;
exports.parse = parse;
exports.utils = {
@ -1577,6 +1581,13 @@ function dup(o/*:any*/)/*:any*/ {
function fill(c/*:string*/,l/*:number*/)/*:string*/ { var o = ""; while(o.length < l) o+=c; return o; }
/* TODO: stress test */
function fuzzynum(s/*:string*/)/*:number*/ {
var v/*:number*/ = Number(s);
if(!isNaN(v)) return v;
var ss = s.replace(/([\d]),([\d])/g,"$1$2").replace(/[$]/g,"");
if(!isNaN(v = Number(ss))) return v;
return v;
}
function fuzzydate(s/*:string*/)/*:Date*/ {
var o = new Date(s), n = new Date(NaN);
var y = o.getYear(), m = o.getMonth(), d = o.getDate();
@ -5483,8 +5494,8 @@ var SYLK = (function() {
if(val.charAt(0) === '"') val = val.substr(1,val.length - 2);
else if(val === 'TRUE') val = true;
else if(val === 'FALSE') val = false;
else if(+val === +val) {
val = +val;
else if(!isNaN(fuzzynum(val))) {
val = fuzzynum(val);
if(next_cell_format !== null && SSF.is_date(next_cell_format)) val = numdate(val);
} else if(!isNaN(fuzzydate(val).getDate())) {
val = parseDate(val);
@ -5645,7 +5656,7 @@ var DIF = (function() {
case 0:
if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
else if(+value == +value) arr[R][C] = +value;
else if(!isNaN(fuzzynum(value))) arr[R][C] = fuzzynum(value);
else if(!isNaN(fuzzydate(value).getDate())) arr[R][C] = parseDate(value);
else arr[R][C] = value;
++C; break;
@ -5731,7 +5742,7 @@ var PRN = (function() {
else if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
else if(data === ""){/* empty */}
else if(+data == +data) arr[R][C] = +data;
else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data);
else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data);
else arr[R][C] = data;
}
@ -5782,7 +5793,7 @@ var PRN = (function() {
else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
else if(!isNaN(v = +s)) { cell.t = 'n'; cell.w = s; cell.v = v; }
else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) {
cell.z = o.dateNF || SSF._table[14];
var k = 0;
@ -14940,11 +14951,11 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
/* TODO: WTF */
function parse_props(cfb) {
/* [MS-OSHARED] 2.3.3.2.2 Document Summary Information Property Set */
var DSI = cfb.find('!DocumentSummaryInformation');
var DSI = CFB.find(cfb, '!DocumentSummaryInformation');
if(DSI) try { cfb.DocSummary = parse_PropertySetStream(DSI, DocSummaryPIDDSI); } catch(e) {/* empty */}
/* [MS-OSHARED] 2.3.3.2.1 Summary Information Property Set*/
var SI = cfb.find('!SummaryInformation');
var SI = CFB.find(cfb, '!SummaryInformation');
if(SI) try { cfb.Summary = parse_PropertySetStream(SI, SummaryPIDSI); } catch(e) {/* empty */}
}
@ -14952,27 +14963,28 @@ function parse_xlscfb(cfb/*:any*/, options/*:?ParseOpts*/)/*:Workbook*/ {
if(!options) options = {};
fix_read_opts(options);
reset_cp();
var CompObj, Summary, Workbook/*:?any*/;
var CompObj, Summary, WB/*:?any*/;
if(cfb.FullPaths) {
CompObj = cfb.find('!CompObj');
Summary = cfb.find('!SummaryInformation');
Workbook = cfb.find('/Workbook');
CompObj = CFB.find(cfb, '!CompObj');
Summary = CFB.find(cfb, '!SummaryInformation');
WB = CFB.find(cfb, '/Workbook');
} else {
prep_blob(cfb, 0);
Workbook = ({content: cfb}/*:any*/);
WB = ({content: cfb}/*:any*/);
}
if(!Workbook) Workbook = cfb.find('/Book');
if(!WB) WB = CFB.find(cfb, '/Book');
var CompObjP, SummaryP, WorkbookP/*:Workbook*/;
var _data/*:?any*/;
if(CompObj) CompObjP = parse_compobj(CompObj);
if(options.bookProps && !options.bookSheets) WorkbookP = ({}/*:any*/);
else {
if(Workbook) WorkbookP = parse_workbook(Workbook.content, options);
if(WB && WB.content) WorkbookP = parse_workbook(WB.content, options);
/* Quattro Pro 7-8 */
else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options);
else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options);
/* Quattro Pro 9 */
else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options);
else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options);
else throw new Error("Cannot find Workbook stream");
}
@ -16386,12 +16398,14 @@ var HTML_ = (function() {
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m};
if(opts.raw) ws[R][C] = {t:'s', v:m};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m};
if(opts.raw) ws[coord] = {t:'s', v:m};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
}
C += CS;
@ -16483,7 +16497,8 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
if(v != null && v.length) {
if(!isNaN(Number(v))) o = {t:'n', v:Number(v)};
if(opts.raw) o = {t:'s', v:v};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
else if(!isNaN(fuzzydate(v).getDate())) {
o = ({t:'d', v:parseDate(v)}/*:any*/);
if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/);
@ -17407,20 +17422,20 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
/* references to [MS-OFFCRYPTO] */
function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ {
var f = 'Version';
var data = cfb.find(f);
var data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var version = parse_DataSpaceVersionInfo(data.content);
/* 2.3.4.1 */
f = 'DataSpaceMap';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var dsm = parse_DataSpaceMap(data.content);
if(dsm.length != 1 || dsm[0].comps.length != 1 || dsm[0].comps[0].t != 0 || dsm[0].name != "StrongEncryptionDataSpace" || dsm[0].comps[0].v != "EncryptedPackage")
throw new Error("ECMA-376 Encrypted file bad " + f);
f = 'StrongEncryptionDataSpace';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var seds = parse_DataSpaceDefinition(data.content);
if(seds.length != 1 || seds[0] != "StrongEncryptionTransform")
@ -17428,12 +17443,12 @@ function parse_xlsxcfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ {
/* 2.3.4.3 */
f = '!Primary';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var hdr = parse_Primary(data.content);
f = 'EncryptionInfo';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var einfo = parse_EncryptionInfo(data.content);
@ -17579,8 +17594,8 @@ function firstbyte(f/*:RawData*/,o/*:?TypeOpts*/)/*:Array<number>*/ {
return [x.charCodeAt(0), x.charCodeAt(1), x.charCodeAt(2), x.charCodeAt(3)];
}
function read_cfb(cfb, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(cfb.find("EncryptedPackage")) return parse_xlsxcfb(cfb, opts);
function read_cfb(cfb/*:CFBContainer*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(CFB.find(cfb, "EncryptedPackage")) return parse_xlsxcfb(cfb, opts);
return parse_xlscfb(cfb, opts);
}
@ -17778,11 +17793,11 @@ function writeFileAsync(filename/*:string*/, wb/*:Workbook*/, opts/*:?WriteFileO
var _cb = cb; if(!(_cb instanceof Function)) _cb = (opts/*:any*/);
return _fs.writeFile(filename, writeSync(wb, o), _cb);
}
function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/) {
if(sheet == null || sheet["!ref"] == null) return [];
var val = {t:'n',v:0}, header = 0, offset = 1, hdr/*:Array<any>*/ = [], isempty = true, v=0, vv="";
var r = {s:{r:0,c:0},e:{r:0,c:0}};
var o = opts != null ? opts : {};
var o = opts || {};
var raw = o.raw;
var defval = o.defval;
var range = o.range != null ? o.range : sheet["!ref"];
@ -17796,8 +17811,8 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
}
if(header > 0) offset = 0;
var rr = encode_row(r.s.r);
var cols = new Array(r.e.c-r.s.c+1);
var out = new Array(r.e.r-r.s.r-offset+1);
var cols/*:Array<string>*/ = [];
var out/*:Array<any>*/ = [];
var outi = 0, counter = 0;
var dense = Array.isArray(sheet);
var R = r.s.r, C = 0, CC = 0;
@ -17817,7 +17832,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
hdr[C] = vv;
}
}
var row = (header === 1) ? [] : {};
var row/*:any*/ = (header === 1) ? [] : {};
for (R = r.s.r + offset; R <= r.e.r; ++R) {
rr = encode_row(R);
isempty = true;
@ -17861,7 +17876,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
var qreg = /"/g;
function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array<string>*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ {
var isempty = true;
var row = [], txt = "", rr = encode_row(R);
var row/*:Array<string>*/ = [], txt = "", rr = encode_row(R);
for(var C = r.s.c; C <= r.e.c; ++C) {
if (!cols[C]) continue;
var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr];

81
xlsx.js
View File

@ -1026,7 +1026,7 @@ var DO_NOT_EXPORT_CFB = true;
/* [MS-CFB] v20130118 */
var CFB = (function _CFB(){
var exports = {};
exports.version = '0.12.0';
exports.version = '0.12.1';
function parse(file, options) {
var mver = 3; // major version
var ssz = 512; // sector size
@ -1197,7 +1197,7 @@ function build_full_paths(FI, FPD, FP, Paths) {
if(FI[i].type === 0 /* unknown */) continue;
j = dad[i];
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
else while(j !== 0) {
else while(j !== 0 && j !== dad[j]) {
FP[i] = FP[j] + "/" + FP[i];
j = dad[j];
}
@ -1298,7 +1298,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
var blob = sector.slice(i, i+128);
prep_blob(blob, 64);
namelen = blob.read_shift(2);
if(namelen === 0) continue;
name = __utf16le(blob,0,namelen-pl);
Paths.push(name);
var o = ({
@ -1319,6 +1318,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
o.start = blob.read_shift(4, 'i');
o.size = blob.read_shift(4, 'i');
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
if(o.type === 5) { /* root */
minifat_store = o.start;
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
@ -1331,7 +1331,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
prep_blob(o.content, 0);
} else {
o.storage = 'minifat';
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size));
prep_blob(o.content, 0);
}
@ -1360,6 +1360,9 @@ function readSync(blob, options) {
return parse(blob, options);
}
function find(cfb, path) {
return cfb.find(path);
}
/** CFB Constants */
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
@ -1385,6 +1388,7 @@ var consts = {
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
};
exports.find = find;
exports.read = readSync;
exports.parse = parse;
exports.utils = {
@ -1514,6 +1518,13 @@ function dup(o) {
function fill(c,l) { var o = ""; while(o.length < l) o+=c; return o; }
/* TODO: stress test */
function fuzzynum(s) {
var v = Number(s);
if(!isNaN(v)) return v;
var ss = s.replace(/([\d]),([\d])/g,"$1$2").replace(/[$]/g,"");
if(!isNaN(v = Number(ss))) return v;
return v;
}
function fuzzydate(s) {
var o = new Date(s), n = new Date(NaN);
var y = o.getYear(), m = o.getMonth(), d = o.getDate();
@ -5409,8 +5420,8 @@ var SYLK = (function() {
if(val.charAt(0) === '"') val = val.substr(1,val.length - 2);
else if(val === 'TRUE') val = true;
else if(val === 'FALSE') val = false;
else if(+val === +val) {
val = +val;
else if(!isNaN(fuzzynum(val))) {
val = fuzzynum(val);
if(next_cell_format !== null && SSF.is_date(next_cell_format)) val = numdate(val);
} else if(!isNaN(fuzzydate(val).getDate())) {
val = parseDate(val);
@ -5571,7 +5582,7 @@ var DIF = (function() {
case 0:
if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
else if(+value == +value) arr[R][C] = +value;
else if(!isNaN(fuzzynum(value))) arr[R][C] = fuzzynum(value);
else if(!isNaN(fuzzydate(value).getDate())) arr[R][C] = parseDate(value);
else arr[R][C] = value;
++C; break;
@ -5657,7 +5668,7 @@ var PRN = (function() {
else if(data === 'TRUE') arr[R][C] = true;
else if(data === 'FALSE') arr[R][C] = false;
else if(data === ""){/* empty */}
else if(+data == +data) arr[R][C] = +data;
else if(!isNaN(fuzzynum(data))) arr[R][C] = fuzzynum(data);
else if(!isNaN(fuzzydate(data).getDate())) arr[R][C] = parseDate(data);
else arr[R][C] = data;
}
@ -5708,7 +5719,7 @@ var PRN = (function() {
else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
else if(!isNaN(v = +s)) { cell.t = 'n'; cell.w = s; cell.v = v; }
else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) {
cell.z = o.dateNF || SSF._table[14];
var k = 0;
@ -14854,11 +14865,11 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
/* TODO: WTF */
function parse_props(cfb) {
/* [MS-OSHARED] 2.3.3.2.2 Document Summary Information Property Set */
var DSI = cfb.find('!DocumentSummaryInformation');
var DSI = CFB.find(cfb, '!DocumentSummaryInformation');
if(DSI) try { cfb.DocSummary = parse_PropertySetStream(DSI, DocSummaryPIDDSI); } catch(e) {/* empty */}
/* [MS-OSHARED] 2.3.3.2.1 Summary Information Property Set*/
var SI = cfb.find('!SummaryInformation');
var SI = CFB.find(cfb, '!SummaryInformation');
if(SI) try { cfb.Summary = parse_PropertySetStream(SI, SummaryPIDSI); } catch(e) {/* empty */}
}
@ -14866,27 +14877,28 @@ function parse_xlscfb(cfb, options) {
if(!options) options = {};
fix_read_opts(options);
reset_cp();
var CompObj, Summary, Workbook;
var CompObj, Summary, WB;
if(cfb.FullPaths) {
CompObj = cfb.find('!CompObj');
Summary = cfb.find('!SummaryInformation');
Workbook = cfb.find('/Workbook');
CompObj = CFB.find(cfb, '!CompObj');
Summary = CFB.find(cfb, '!SummaryInformation');
WB = CFB.find(cfb, '/Workbook');
} else {
prep_blob(cfb, 0);
Workbook = ({content: cfb});
WB = ({content: cfb});
}
if(!Workbook) Workbook = cfb.find('/Book');
if(!WB) WB = CFB.find(cfb, '/Book');
var CompObjP, SummaryP, WorkbookP;
var _data;
if(CompObj) CompObjP = parse_compobj(CompObj);
if(options.bookProps && !options.bookSheets) WorkbookP = ({});
else {
if(Workbook) WorkbookP = parse_workbook(Workbook.content, options);
if(WB && WB.content) WorkbookP = parse_workbook(WB.content, options);
/* Quattro Pro 7-8 */
else if(cfb.find('PerfectOffice_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('PerfectOffice_MAIN').content, options);
else if((_data=CFB.find(cfb, 'PerfectOffice_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options);
/* Quattro Pro 9 */
else if(cfb.find('NativeContent_MAIN')) WorkbookP = WK_.to_workbook(cfb.find('NativeContent_MAIN').content, options);
else if((_data=CFB.find(cfb, 'NativeContent_MAIN')) && _data.content) WorkbookP = WK_.to_workbook(_data.content, options);
else throw new Error("Cannot find Workbook stream");
}
@ -16300,12 +16312,14 @@ var HTML_ = (function() {
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m};
if(opts.raw) ws[R][C] = {t:'s', v:m};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m};
if(opts.raw) ws[coord] = {t:'s', v:m};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
}
C += CS;
@ -16397,7 +16411,8 @@ function parse_dom_table(table, _opts) {
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o = {t:'s', v:v};
if(v != null && v.length) {
if(!isNaN(Number(v))) o = {t:'n', v:Number(v)};
if(opts.raw) o = {t:'s', v:v};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
else if(!isNaN(fuzzydate(v).getDate())) {
o = ({t:'d', v:parseDate(v)});
if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)});
@ -17320,20 +17335,20 @@ function parse_zip(zip, opts) {
/* references to [MS-OFFCRYPTO] */
function parse_xlsxcfb(cfb, opts) {
var f = 'Version';
var data = cfb.find(f);
var data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var version = parse_DataSpaceVersionInfo(data.content);
/* 2.3.4.1 */
f = 'DataSpaceMap';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var dsm = parse_DataSpaceMap(data.content);
if(dsm.length != 1 || dsm[0].comps.length != 1 || dsm[0].comps[0].t != 0 || dsm[0].name != "StrongEncryptionDataSpace" || dsm[0].comps[0].v != "EncryptedPackage")
throw new Error("ECMA-376 Encrypted file bad " + f);
f = 'StrongEncryptionDataSpace';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var seds = parse_DataSpaceDefinition(data.content);
if(seds.length != 1 || seds[0] != "StrongEncryptionTransform")
@ -17341,12 +17356,12 @@ function parse_xlsxcfb(cfb, opts) {
/* 2.3.4.3 */
f = '!Primary';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var hdr = parse_Primary(data.content);
f = 'EncryptionInfo';
data = cfb.find(f);
data = CFB.find(cfb, f);
if(!data || !data.content) throw new Error("ECMA-376 Encrypted file missing " + f);
var einfo = parse_EncryptionInfo(data.content);
@ -17491,7 +17506,7 @@ function firstbyte(f,o) {
}
function read_cfb(cfb, opts) {
if(cfb.find("EncryptedPackage")) return parse_xlsxcfb(cfb, opts);
if(CFB.find(cfb, "EncryptedPackage")) return parse_xlsxcfb(cfb, opts);
return parse_xlscfb(cfb, opts);
}
@ -17688,11 +17703,11 @@ function writeFileAsync(filename, wb, opts, cb) {
var _cb = cb; if(!(_cb instanceof Function)) _cb = (opts);
return _fs.writeFile(filename, writeSync(wb, o), _cb);
}
function sheet_to_json(sheet, opts){
function sheet_to_json(sheet, opts) {
if(sheet == null || sheet["!ref"] == null) return [];
var val = {t:'n',v:0}, header = 0, offset = 1, hdr = [], isempty = true, v=0, vv="";
var r = {s:{r:0,c:0},e:{r:0,c:0}};
var o = opts != null ? opts : {};
var o = opts || {};
var raw = o.raw;
var defval = o.defval;
var range = o.range != null ? o.range : sheet["!ref"];
@ -17706,8 +17721,8 @@ function sheet_to_json(sheet, opts){
}
if(header > 0) offset = 0;
var rr = encode_row(r.s.r);
var cols = new Array(r.e.c-r.s.c+1);
var out = new Array(r.e.r-r.s.r-offset+1);
var cols = [];
var out = [];
var outi = 0, counter = 0;
var dense = Array.isArray(sheet);
var R = r.s.r, C = 0, CC = 0;