parsers expose original book type

This commit is contained in:
SheetJS 2022-07-08 18:31:08 -04:00
parent 61262617ec
commit ad1ce0d9b0
28 changed files with 1010 additions and 602 deletions

@ -235,7 +235,11 @@ function dbf_to_sheet(buf, opts)/*:Worksheet*/ {
}
function dbf_to_workbook(buf, opts)/*:Workbook*/ {
try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); }
try {
var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts);
o.bookType = "dbf";
return o;
}
catch(e) { if(opts && opts.WTF) throw e; }
return ({SheetNames:[],Sheets:{}});
}
@ -546,6 +550,7 @@ var SYLK = /*#__PURE__*/(function() {
keys(ws).forEach(function(k) { o[k] = ws[k]; });
var outwb = sheet_to_workbook(o, opts);
keys(wb).forEach(function(k) { outwb[k] = wb[k]; });
outwb.bookType = "sylk";
return outwb;
}
@ -664,7 +669,11 @@ var DIF = /*#__PURE__*/(function() {
}
function dif_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(dif_to_aoa(str, opts), opts); }
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(dif_to_sheet(str, opts), opts); }
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
var o = sheet_to_workbook(dif_to_sheet(str, opts), opts);
o.bookType = "dif";
return o;
}
var sheet_to_dif = /*#__PURE__*/(function() {
var push_field = function pf(o/*:Array<string>*/, topic/*:string*/, v/*:number*/, n/*:number*/, s/*:string*/) {

@ -1,78 +1,97 @@
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
switch(opts.type) {
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
case 'binary': return rtf_to_book_str(d, opts);
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_book_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
function rtf_to_sheet(d, opts) {
switch (opts.type) {
case "base64":
return rtf_to_sheet_str(Base64_decode(d), opts);
case "binary":
return rtf_to_sheet_str(d, opts);
case "buffer":
return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts);
case "array":
return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: RTF technically can store multiple tables, even if Excel does not */
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
var o = opts || {};
var sname = o.sheet || "Sheet1";
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
wb.Sheets[sname] = ws;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return wb;
function rtf_to_sheet_str(str, opts) {
var o = opts || {};
var ws = o.dense ? [] : {};
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if (!rows)
throw new Error("RTF missing table");
var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } };
rows.forEach(function(rowtf, R) {
if (Array.isArray(ws))
ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while ((res = rtfre.exec(rowtf)) != null) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if (data.charCodeAt(0) == 32)
data = data.slice(1);
if (data.length)
payload.push(data);
switch (res[0]) {
case "\\cell":
++C;
if (payload.length) {
var cell = { v: payload.join(""), t: "s" };
if (cell.v == "TRUE" || cell.v == "FALSE") {
cell.v = cell.v == "TRUE";
cell.t = "b";
} else if (!isNaN(fuzzynum(cell.v))) {
cell.t = "n";
if (o.cellText !== false)
cell.w = cell.v;
cell.v = fuzzynum(cell.v);
}
if (Array.isArray(ws))
ws[R][C] = cell;
else
ws[encode_cell({ r: R, c: C })] = cell;
}
payload = [];
break;
case "\\par":
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if (C > range.e.c)
range.e.c = C;
});
ws["!ref"] = encode_range(range);
return ws;
}
/* TODO: standardize sheet names as titles for tables */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
function rtf_to_workbook(d, opts) {
var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts);
wb.bookType = "rtf";
return wb;
}
function sheet_to_rtf(ws, opts) {
var o = ["{\\rtf1\\ansi"];
if (!ws["!ref"])
return o[0] + "}";
var r = safe_decode_range(ws["!ref"]), cell;
var dense = Array.isArray(ws);
for (var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for (var C = r.s.c; C <= r.e.c; ++C)
o.push("\\cellx" + (C + 1));
o.push("\\pard\\intbl");
for (C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({ r: R, c: C });
cell = dense ? (ws[R] || [])[C] : ws[coord];
if (!cell || cell.v == null && (!cell.f || cell.F)) {
o.push(" \\cell");
continue;
}
o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}

@ -919,6 +919,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ {
out.SSF = dup(table_fmt);
out.Props = Props;
out.Custprops = Custprops;
out.bookType = "xlml";
return out;
}

@ -97,9 +97,14 @@ var HTML_END = '</body></html>';
function html_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
var mtch = str.match(/<table[\s\S]*?>[\s\S]*?<\/table>/gi);
if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find <table>");
if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
if(mtch.length == 1) {
var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
w.bookType = "html";
return w;
}
var wb = book_new();
mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); });
wb.bookType = "html";
return wb;
}
@ -215,7 +220,9 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
}
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
var o = sheet_to_workbook(parse_dom_table(table, opts), opts);
//o.bookType = "dom"; // TODO: define a type for this
return o;
}
function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ {

@ -770,9 +770,12 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(!content) throw new Error("Missing content.xml in ODS / UOF file");
var wb = parse_content_xml(utf8read(content), opts, Styles);
if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml'));
wb.bookType = "ods";
return wb;
}
function parse_fods(data/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
return parse_content_xml(data, opts);
var wb = parse_content_xml(data, opts);
wb.bookType = "fods";
return wb;
}

@ -396,7 +396,7 @@ function parse_old_storage(buf, sst, rsst, v) {
var ret;
switch (buf[2]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: ieee };
break;
@ -456,7 +456,7 @@ function parse_new_storage(buf, sst, rsst) {
var ret;
switch (buf[1]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: d128 };
break;
@ -761,6 +761,7 @@ function parse_TN_DocumentArchive(M, root) {
});
if (out.SheetNames.length == 0)
throw new Error("Empty NUMBERS file");
out.bookType = "numbers";
return out;
}
function parse_numbers_iwa(cfb) {
@ -961,6 +962,8 @@ function write_numbers_iwa(wb, opts) {
throw new Error("Too many messages");
}
var entry = CFB.find(cfb, dependents[1].location);
if (!entry)
throw "Could not find ".concat(dependents[1].location, " in Numbers template");
var x = parse_iwa_file(decompress_iwa_file(entry.content));
var docroot;
for (var xi = 0; xi < x.length; ++xi) {
@ -968,8 +971,12 @@ function write_numbers_iwa(wb, opts) {
if (packet.id == 1)
docroot = packet;
}
if (docroot == null)
throw "Could not find message ".concat(1, " in Numbers template");
var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -985,6 +992,8 @@ function write_numbers_iwa(wb, opts) {
entry.size = entry.content.length;
sheetrootref = parse_TSP_Reference(sheetref[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -993,6 +1002,8 @@ function write_numbers_iwa(wb, opts) {
}
sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -1005,6 +1016,8 @@ function write_numbers_iwa(wb, opts) {
pb[7][0].data = write_varint49(range.e.c + 1);
var cruidsref = parse_TSP_Reference(pb[46][0].data);
var oldbucket = CFB.find(cfb, dependents[cruidsref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
for (var j = 0; j < _x.length; ++j) {
@ -1047,6 +1060,8 @@ function write_numbers_iwa(wb, opts) {
var row_headers = parse_shallow(store[1][0].data);
var row_header_ref = parse_TSP_Reference(row_headers[2][0].data);
oldbucket = CFB.find(cfb, dependents[row_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != row_header_ref)
@ -1065,6 +1080,8 @@ function write_numbers_iwa(wb, opts) {
oldbucket.size = oldbucket.content.length;
var col_header_ref = parse_TSP_Reference(store[2][0].data);
oldbucket = CFB.find(cfb, dependents[col_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != col_header_ref)
@ -1109,6 +1126,8 @@ function write_numbers_iwa(wb, opts) {
var sstref = parse_TSP_Reference(store[4][0].data);
(function() {
var sentry = CFB.find(cfb, dependents[sstref].location);
if (!sentry)
throw "Could not find ".concat(dependents[sstref].location, " in Numbers template");
var sx = parse_iwa_file(decompress_iwa_file(sentry.content));
var sstroot;
for (var sxi = 0; sxi < sx.length; ++sxi) {
@ -1116,6 +1135,8 @@ function write_numbers_iwa(wb, opts) {
if (packet2.id == sstref)
sstroot = packet2;
}
if (sstroot == null)
throw "Could not find message ".concat(sstref, " in Numbers template");
var sstdata = parse_shallow(sstroot.messages[0].data);
{
sstdata[3] = [];
@ -1141,6 +1162,8 @@ function write_numbers_iwa(wb, opts) {
var tileref = parse_TSP_Reference(tl[2][0].data);
(function() {
var tentry = CFB.find(cfb, dependents[tileref].location);
if (!tentry)
throw "Could not find ".concat(dependents[tileref].location, " in Numbers template");
var tx = parse_iwa_file(decompress_iwa_file(tentry.content));
var tileroot;
for (var sxi = 0; sxi < tx.length; ++sxi) {

@ -247,6 +247,8 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true);
else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true);
}
// TODO: pass back content types metdata for xlsm/xlsx resolution
out.bookType = xlsb ? "xlsb" : "xlsx";
return out;
}

@ -20,7 +20,7 @@ can be installed with Bash on Windows or with `cygwin`.
**JavaScript APIs**
- [`XMLHttpRequest and fetch`](xhr/)
- [`Clipboard Data`](https://docs.sheetjs.com/docs/getting-started/demos/clipboard)
- [`Typed Arrays and Math`](array/)
- [`Typed Arrays for Machine Learning`](https://docs.sheetjs.com/docs/getting-started/demos/ml)
**Frameworks**
- [`angularjs`](angular/)

@ -1,7 +0,0 @@
.PHONY: init
init:
npm i
.PHONY: tfjs
tfjs: init
node tf.js

@ -1,131 +1,6 @@
# Typed Arrays and Math
ECMAScript version 6 introduced Typed Arrays, array-like objects designed for
low-level optimizations and predictable operations. They are supported in most
modern browsers and form the basis of various APIs, including NodeJS Buffers,
WebGL buffers, WebAssembly, and tensors in linear algebra and math libraries.
This demo covers conversions between worksheets and Typed Arrays. It also tries
to cover common numerical libraries that work with data arrays.
Excel supports a subset of the IEEE754 Double precision floating point numbers,
but many libraries only support `Float32` Single precision values. `Math.fround`
rounds `Number` values to the nearest single-precision floating point value.
## Working with Data in Typed Arrays
Typed arrays are not true Array objects. The array of array utility functions
like `aoa_to_sheet` will not handle arrays of Typed Arrays.
#### Exporting Typed Arrays to a Worksheet
A single typed array can be converted to a pure JS array with `Array.from`:
```js
var column = Array.from(dataset_typedarray);
```
`aoa_to_sheet` expects a row-major array of arrays. To export multiple data
sets, "transpose" the data:
```js
/* assuming data is an array of typed arrays */
var aoa = [];
for(var i = 0; i < data.length; ++i) {
for(var j = 0; j < data[i].length; ++j) {
if(!aoa[j]) aoa[j] = [];
aoa[j][i] = data[i][j];
}
}
/* aoa can be directly converted to a worksheet object */
var ws = XLSX.utils.aoa_to_sheet(aoa);
```
#### Importing Data from a Spreadsheet
`sheet_to_json` with the option `header:1` will generate a row-major array of
arrays that can be transposed. However, it is more efficient to walk the sheet
manually:
```js
/* find worksheet range */
var range = XLSX.utils.decode_range(ws['!ref']);
var out = []
/* walk the columns */
for(var C = range.s.c; C <= range.e.c; ++C) {
/* create the typed array */
var ta = new Float32Array(range.e.r - range.s.r + 1);
/* walk the rows */
for(var R = range.s.r; R <= range.e.r; ++R) {
/* find the cell, skip it if the cell isn't numeric or boolean */
var cell = ws[XLSX.utils.encode_cell({r:R, c:C})];
if(!cell || cell.t != 'n' && cell.t != 'b') continue;
/* assign to the typed array */
ta[R - range.s.r] = cell.v;
}
out.push(ta);
}
```
If the data set has a header row, the loop can be adjusted to skip those rows.
## Demos
Each example focuses on single-variable linear regression. Sample worksheets
will start with a label row. The first column is the x-value and the second
column is the y-value. A sample spreadsheet can be generated randomly:
```js
var aoo = [];
for(var i = 0; i < 100; ++i) aoo.push({x:i, y:2 * i + Math.random()});
var ws = XLSX.utils.json_to_sheet(aoo);
var wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Sheet1");
XLSX.writeFile(wb, "linreg.xlsx");
```
Some libraries provide utility functions that work with plain arrays of numbers.
When possible, they should be preferred over manual conversion.
Reshaping raw float arrays and exporting to a worksheet is straightforward:
```js
function array_to_sheet(farray, shape, headers) {
/* generate new AOA from the float array */
var aoa = [];
for(var j = 0; j < shape[0]; ++j) {
aoa[j] = [];
for(var i = 0; i < shape[1]; ++i) aoa[j][i] = farray[j * shape[1] + i];
}
/* add headers and generate worksheet */
if(headers) aoa.unshift(headers);
return XLSX.utils.aoa_to_sheet(aoa);
}
```
#### TensorFlow
[TensorFlow](https://js.tensorflow.org/) `tensor` objects can be created from
arrays of arrays:
```js
var tensor = tf.tensor2d(aoa).transpose();
var col1 = tensor.slice([0,0], [1,tensor.shape[1]]).flatten();
var col2 = tensor.slice([1,0], [1,tensor.shape[1]]).flatten();
```
`stack` should be used to create the 2-d tensor for export:
```js
var tensor = tf.stack([col1, col2]).transpose();
var shape = tensor.shape;
var farray = tensor.dataSync();
var ws = array_to_sheet(farray, shape, ["header1", "header2"]);
```
The demo generates a sample dataset and uses a simple linear predictor with
least-squares scoring to calculate regression coefficients. The tensors are
exported to a new file.
[The new demo](https://docs.sheetjs.com/docs/getting-started/demos/ml) includes
interactive examples as well as strategies for CSV and JS Array interchange.
[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-xlsx?pixel)](https://github.com/SheetJS/js-xlsx)

@ -1,27 +0,0 @@
/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */
/*global module, require, XLSX:true */
if(typeof require !== 'undefined' && typeof XLSX === 'undefined') XLSX = require('xlsx');
function generate_random_file(n) {
if(!n) n = 100;
var aoo = [];
var x_ = 0, y_ = 0, xx = 0, xy = 0;
for(var i = 0; i < n; ++i) {
var y = Math.fround(2 * i + Math.random());
aoo.push({x:i, y:y});
x_ += i / n; y_ += y / n; xx += i*i; xy += i * y;
}
var m = Math.fround((xy - n * x_ * y_)/(xx - n * x_ * x_));
console.log(m, Math.fround(y_ - m * x_), "JS Pre");
var ws = XLSX.utils.json_to_sheet(aoo);
var wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(wb, ws, "Sheet1");
ws = XLSX.utils.aoa_to_sheet([[2, 0]]);
XLSX.utils.sheet_set_array_formula(ws, "A1:B1", "LINEST(Sheet1!B2:B101,Sheet1!A2:A101)");
XLSX.utils.book_append_sheet(wb, ws, "Sheet2");
XLSX.writeFile(wb, "linreg.xlsx");
}
if(typeof module !== 'undefined') module.exports = {
generate_random_file: generate_random_file
};

@ -1,6 +0,0 @@
{
"dependencies": {
"@tensorflow/tfjs": "^3.16.0",
"xlsx": "https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz"
}
}

@ -1,78 +0,0 @@
/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */
/* eslint-env node */
var XLSX = require('xlsx');
var tf = require('@tensorflow/tfjs');
var linest = require('./linest');
/* generate linreg.xlsx with 100 random points */
var N = 100;
linest.generate_random_file(N);
/* get the first worksheet as an array of arrays, skip the first row */
var wb = XLSX.readFile('linreg.xlsx');
var ws = wb.Sheets[wb.SheetNames[0]];
var aoa = XLSX.utils.sheet_to_json(ws, {header:1, raw:true}).slice(1);
/* calculate the coefficients in JS */
(function(aoa) {
var x_ = 0, y_ = 0, xx = 0, xy = 0, n = aoa.length;
for(var i = 0; i < n; ++i) {
x_ += aoa[i][0] / n;
y_ += aoa[i][1] / n;
xx += aoa[i][0] * aoa[i][0];
xy += aoa[i][0] * aoa[i][1];
}
var m = Math.fround((xy - n * x_ * y_)/(xx - n * x_ * x_));
console.log(m, Math.fround(y_ - m * x_), "JS Post");
})(aoa);
/* build X and Y vectors */
var tensor = tf.tensor2d(aoa).transpose();
console.log(tensor.shape);
var xs = tensor.slice([0,0], [1,tensor.shape[1]]).flatten();
var ys = tensor.slice([1,0], [1,tensor.shape[1]]).flatten();
/* set up variables with initial guess */
var x_ = xs.mean().dataSync()[0];
var y_ = ys.mean().dataSync()[0];
var a = tf.variable(tf.scalar(y_/x_));
var b = tf.variable(tf.scalar(Math.random()));
/* linear predictor */
function predict(x) { return tf.tidy(function() { return a.mul(x).add(b); }); }
/* mean square scoring */
function loss(yh, y) { return yh.sub(y).square().mean(); }
/* train */
for(var j = 0; j < 5; ++j) {
var learning_rate = 0.0001 /(j+1), iterations = 1000;
var optimizer = tf.train.sgd(learning_rate);
for(var i = 0; i < iterations; ++i) optimizer.minimize(function() {
var pred = predict(xs);
var L = loss(pred, ys);
return L
});
/* compute the coefficient */
var m = a.dataSync()[0], b_ = b.dataSync()[0];
console.log(m, b_, "TF " + iterations * (j+1));
}
/* export data to aoa */
var yh = predict(xs);
var tfdata = tf.stack([xs, ys, yh]).transpose();
var shape = tfdata.shape;
var tfarr = tfdata.dataSync();
var tfaoa = [];
for(j = 0; j < shape[0]; ++j) {
tfaoa[j] = [];
for(i = 0; i < shape[1]; ++i) tfaoa[j][i] = tfarr[j * shape[1] + i];
}
/* add headers and export */
tfaoa.unshift(["x", "y", "pred"]);
var new_ws = XLSX.utils.aoa_to_sheet(tfaoa);
var new_wb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(new_wb, new_ws, "Sheet1");
XLSX.writeFile(new_wb, "tfjs.xls");

97
modules/45_rtf.js Normal file

@ -0,0 +1,97 @@
function rtf_to_sheet(d, opts) {
switch (opts.type) {
case "base64":
return rtf_to_sheet_str(Base64_decode(d), opts);
case "binary":
return rtf_to_sheet_str(d, opts);
case "buffer":
return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts);
case "array":
return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function rtf_to_sheet_str(str, opts) {
var o = opts || {};
var ws = o.dense ? [] : {};
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if (!rows)
throw new Error("RTF missing table");
var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } };
rows.forEach(function(rowtf, R) {
if (Array.isArray(ws))
ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while ((res = rtfre.exec(rowtf)) != null) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if (data.charCodeAt(0) == 32)
data = data.slice(1);
if (data.length)
payload.push(data);
switch (res[0]) {
case "\\cell":
++C;
if (payload.length) {
var cell = { v: payload.join(""), t: "s" };
if (cell.v == "TRUE" || cell.v == "FALSE") {
cell.v = cell.v == "TRUE";
cell.t = "b";
} else if (!isNaN(fuzzynum(cell.v))) {
cell.t = "n";
if (o.cellText !== false)
cell.w = cell.v;
cell.v = fuzzynum(cell.v);
}
if (Array.isArray(ws))
ws[R][C] = cell;
else
ws[encode_cell({ r: R, c: C })] = cell;
}
payload = [];
break;
case "\\par":
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if (C > range.e.c)
range.e.c = C;
});
ws["!ref"] = encode_range(range);
return ws;
}
function rtf_to_workbook(d, opts) {
var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts);
wb.bookType = "rtf";
return wb;
}
function sheet_to_rtf(ws, opts) {
var o = ["{\\rtf1\\ansi"];
if (!ws["!ref"])
return o[0] + "}";
var r = safe_decode_range(ws["!ref"]), cell;
var dense = Array.isArray(ws);
for (var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for (var C = r.s.c; C <= r.e.c; ++C)
o.push("\\cellx" + (C + 1));
o.push("\\pard\\intbl");
for (C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({ r: R, c: C });
cell = dense ? (ws[R] || [])[C] : ws[coord];
if (!cell || cell.v == null && (!cell.f || cell.F)) {
o.push(" \\cell");
continue;
}
o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}

98
modules/45_rtf.ts Normal file

@ -0,0 +1,98 @@
import { WorkBook, WorkSheet, Range, CellObject } from '../';
import type { utils } from "../";
declare var encode_cell: typeof utils.encode_cell;
declare var encode_range: typeof utils.encode_range;
declare var format_cell: typeof utils.format_cell;
declare var safe_decode_range: typeof utils.decode_range;
declare function sheet_to_workbook(s: WorkSheet, o?: any): WorkBook;
declare function cc2str(d: any): string;
declare function a2s(a: any): string;
declare var has_buf: boolean;
declare function Base64_decode(s: string): string;
declare function fuzzynum(s: string): number;
function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ {
switch(opts.type) {
case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts);
case 'binary': return rtf_to_sheet_str(d, opts);
case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: this is a stub */
function rtf_to_sheet_str(str: string, opts)/*:Worksheet*/ {
var o = opts || {};
// ESBuild issue 2375
var ws: WorkSheet = o.dense ? [] : ({}/*:any*/);
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows) throw new Error("RTF missing table");
var range: Range = {s: {c:0, r:0}, e: {c:0, r:rows.length - 1}};
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload: string[] = [];
while((res = rtfre.exec(rowtf)) != null) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell: CellObject = {v: payload.join(""), t:"s"};
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
else if(!isNaN(fuzzynum(cell.v as string))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v as string; cell.v = fuzzynum(cell.v as string); }
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return ws;
}
function rtf_to_workbook(d/*:RawData*/, opts): WorkBook {
var wb: WorkBook = sheet_to_workbook(rtf_to_sheet(d, opts), opts);
wb.bookType = "rtf";
return wb;
}
/* TODO: this is a stub */
function sheet_to_rtf(ws: WorkSheet, opts): string {
var o: string[] = ["{\\rtf1\\ansi"];
if(!ws["!ref"]) return o[0] + "}";
var r = safe_decode_range(ws['!ref']), cell: CellObject;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) { o.push(" \\cell"); continue; }
o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}

@ -396,7 +396,7 @@ function parse_old_storage(buf, sst, rsst, v) {
var ret;
switch (buf[2]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: ieee };
break;
@ -456,7 +456,7 @@ function parse_new_storage(buf, sst, rsst) {
var ret;
switch (buf[1]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: d128 };
break;
@ -761,6 +761,7 @@ function parse_TN_DocumentArchive(M, root) {
});
if (out.SheetNames.length == 0)
throw new Error("Empty NUMBERS file");
out.bookType = "numbers";
return out;
}
function parse_numbers_iwa(cfb) {
@ -961,6 +962,8 @@ function write_numbers_iwa(wb, opts) {
throw new Error("Too many messages");
}
var entry = CFB.find(cfb, dependents[1].location);
if (!entry)
throw "Could not find ".concat(dependents[1].location, " in Numbers template");
var x = parse_iwa_file(decompress_iwa_file(entry.content));
var docroot;
for (var xi = 0; xi < x.length; ++xi) {
@ -968,8 +971,12 @@ function write_numbers_iwa(wb, opts) {
if (packet.id == 1)
docroot = packet;
}
if (docroot == null)
throw "Could not find message ".concat(1, " in Numbers template");
var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -985,6 +992,8 @@ function write_numbers_iwa(wb, opts) {
entry.size = entry.content.length;
sheetrootref = parse_TSP_Reference(sheetref[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -993,6 +1002,8 @@ function write_numbers_iwa(wb, opts) {
}
sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -1005,6 +1016,8 @@ function write_numbers_iwa(wb, opts) {
pb[7][0].data = write_varint49(range.e.c + 1);
var cruidsref = parse_TSP_Reference(pb[46][0].data);
var oldbucket = CFB.find(cfb, dependents[cruidsref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
for (var j = 0; j < _x.length; ++j) {
@ -1047,6 +1060,8 @@ function write_numbers_iwa(wb, opts) {
var row_headers = parse_shallow(store[1][0].data);
var row_header_ref = parse_TSP_Reference(row_headers[2][0].data);
oldbucket = CFB.find(cfb, dependents[row_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != row_header_ref)
@ -1065,6 +1080,8 @@ function write_numbers_iwa(wb, opts) {
oldbucket.size = oldbucket.content.length;
var col_header_ref = parse_TSP_Reference(store[2][0].data);
oldbucket = CFB.find(cfb, dependents[col_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != col_header_ref)
@ -1109,6 +1126,8 @@ function write_numbers_iwa(wb, opts) {
var sstref = parse_TSP_Reference(store[4][0].data);
(function() {
var sentry = CFB.find(cfb, dependents[sstref].location);
if (!sentry)
throw "Could not find ".concat(dependents[sstref].location, " in Numbers template");
var sx = parse_iwa_file(decompress_iwa_file(sentry.content));
var sstroot;
for (var sxi = 0; sxi < sx.length; ++sxi) {
@ -1116,6 +1135,8 @@ function write_numbers_iwa(wb, opts) {
if (packet2.id == sstref)
sstroot = packet2;
}
if (sstroot == null)
throw "Could not find message ".concat(sstref, " in Numbers template");
var sstdata = parse_shallow(sstroot.messages[0].data);
{
sstdata[3] = [];
@ -1141,6 +1162,8 @@ function write_numbers_iwa(wb, opts) {
var tileref = parse_TSP_Reference(tl[2][0].data);
(function() {
var tentry = CFB.find(cfb, dependents[tileref].location);
if (!tentry)
throw "Could not find ".concat(dependents[tileref].location, " in Numbers template");
var tx = parse_iwa_file(decompress_iwa_file(tentry.content));
var tileroot;
for (var sxi = 0; sxi < tx.length; ++sxi) {

@ -184,9 +184,9 @@ interface IWAMessage {
data: Uint8Array;
}
interface IWAArchiveInfo {
id?: number;
id: number;
merge?: boolean;
messages?: IWAMessage[];
messages: IWAMessage[];
}
/** Extract all messages from a IWA file */
function parse_iwa_file(buf: Uint8Array): IWAArchiveInfo[] {
@ -248,7 +248,7 @@ function parse_snappy_chunk(type: number, buf: Uint8Array): Uint8Array {
var ptr: Ptr = [0];
var usz = parse_varint49(buf, ptr);
var chunks = [];
var chunks: Uint8Array[] = [];
while(ptr[0] < buf.length) {
var tag = buf[ptr[0]] & 0x3;
if(tag == 0) {
@ -295,7 +295,7 @@ function parse_snappy_chunk(type: number, buf: Uint8Array): Uint8Array {
/** Decompress IWA file */
function decompress_iwa_file(buf: Uint8Array): Uint8Array {
var out = [];
var out: Uint8Array[] = [];
var l = 0;
while(l < buf.length) {
var t = buf[l++];
@ -336,7 +336,7 @@ function compress_iwa_file(buf: Uint8Array): Uint8Array {
//<<export { decompress_iwa_file, compress_iwa_file };
/** Parse "old storage" (version 0..3) */
function parse_old_storage(buf: Uint8Array, sst: string[], rsst: string[], v: 0|1|2|3): CellObject {
function parse_old_storage(buf: Uint8Array, sst: string[], rsst: string[], v: 0|1|2|3): CellObject | void {
var dv = u8_to_dataview(buf);
var flags = dv.getUint32(4, true);
@ -352,7 +352,7 @@ function parse_old_storage(buf: Uint8Array, sst: string[], rsst: string[], v: 0|
var ret: CellObject;
switch(buf[2]) {
case 0: break; // return { t: "z" }; // blank?
case 0: return void 0; // return { t: "z" }; // blank?
case 2: ret = { t: "n", v: ieee }; break; // number
case 3: ret = { t: "s", v: sst[sidx] }; break; // string
case 5: ret = { t: "d", v: dt }; break; // date-time
@ -371,7 +371,7 @@ function parse_old_storage(buf: Uint8Array, sst: string[], rsst: string[], v: 0|
}
/** Parse "new storage" (version 5) */
function parse_new_storage(buf: Uint8Array, sst: string[], rsst: string[]): CellObject {
function parse_new_storage(buf: Uint8Array, sst: string[], rsst: string[]): CellObject | void {
var dv = u8_to_dataview(buf);
var flags = dv.getUint32(8, true);
@ -388,7 +388,7 @@ function parse_new_storage(buf: Uint8Array, sst: string[], rsst: string[]): Cell
var ret: CellObject;
switch(buf[1]) {
case 0: break; // return { t: "z" }; // blank?
case 0: return void 0; // return { t: "z" }; // blank?
case 2: ret = { t: "n", v: d128 }; break; // number
case 3: ret = { t: "s", v: sst[sidx] }; break; // string
case 5: ret = { t: "d", v: dt }; break; // date-time
@ -438,7 +438,7 @@ function write_old_storage(cell: CellObject, sst: string[]): Uint8Array {
return out.slice(0, l);
}
//<<export { write_new_storage, write_old_storage };
function parse_cell_storage(buf: Uint8Array, sst: string[], rsst: string[]): CellObject {
function parse_cell_storage(buf: Uint8Array, sst: string[], rsst: string[]): CellObject | void {
switch(buf[0]) {
case 0: case 1:
case 2: case 3: return parse_old_storage(buf, sst, rsst, buf[0]);
@ -475,7 +475,7 @@ function parse_TST_TableDataList(M: MessageSpace, root: IWAMessage): string[] {
var type = varint_to_i32(pb[1][0].data);
var entries = pb[3];
var data = [];
var data: Array<string> = [];
(entries||[]).forEach(entry => {
// .TST.TableDataList.ListEntry
var le = parse_shallow(entry.data);
@ -505,7 +505,7 @@ interface TileRowInfo {
/** Row Index */
R: number;
/** Cell Storage */
cells?: Uint8Array[];
cells: Uint8Array[];
}
/** Parse .TSP.TileRowInfo */
function parse_TST_TileRowInfo(u8: Uint8Array, type: TileStorageType): TileRowInfo {
@ -673,6 +673,7 @@ function parse_TN_DocumentArchive(M: MessageSpace, root: IWAMessage): WorkBook {
});
});
if(out.SheetNames.length == 0) throw new Error("Empty NUMBERS file");
out.bookType = "numbers";
return out;
}
@ -694,7 +695,7 @@ function parse_numbers_iwa(cfb: CFB$Container): WorkBook {
/* find document root */
if(M?.[1]?.[0]?.meta?.[1]?.[0].data && varint_to_i32(M[1][0].meta[1][0].data) == 10000) throw new Error("Pages documents are not supported");
var docroot: IWAMessage = M?.[1]?.[0]?.meta?.[1]?.[0].data && varint_to_i32(M[1][0].meta[1][0].data) == 1 && M[1][0];
var docroot: IWAMessage | false = M?.[1]?.[0]?.meta?.[1]?.[0].data && varint_to_i32(M[1][0].meta[1][0].data) == 1 && M[1][0];
if(!docroot) indices.forEach((idx) => {
M[idx].forEach((iwam) => {
var mtype = varint_to_i32(iwam.meta[1][0].data) >>> 0;
@ -789,7 +790,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
/* TODO: support multiple worksheets, larger ranges, more data types, etc */
var ws = wb.Sheets[wb.SheetNames[0]];
if(wb.SheetNames.length > 1) console.error("The Numbers writer currently writes only the first table");
var range = decode_range(ws["!ref"]);
var range = decode_range(ws["!ref"] as string);
range.s.r = range.s.c = 0;
/* Actual NUMBERS 12.0 limit ALL1000000 */
@ -853,16 +854,19 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
/* .TN.DocumentArchive */
var entry = CFB.find(cfb, dependents[1].location);
if(!entry) throw `Could not find ${dependents[1].location} in Numbers template`;
var x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array));
var docroot: IWAArchiveInfo;
var docroot!: IWAArchiveInfo;
for(var xi = 0; xi < x.length; ++xi) {
var packet = x[xi];
if(packet.id == 1) docroot = packet;
}
if(docroot == null) throw `Could not find message ${1} in Numbers template`;
/* .TN.SheetArchive */
var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if(!entry) throw `Could not find ${dependents[sheetrootref].location} in Numbers template`;
x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array));
for(xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -880,6 +884,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
/* .TST.TableInfoArchive */
sheetrootref = parse_TSP_Reference(sheetref[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if(!entry) throw `Could not find ${dependents[sheetrootref].location} in Numbers template`;
x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array));
for(xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -889,6 +894,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
/* .TST.TableModelArchive */
sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if(!entry) throw `Could not find ${dependents[sheetrootref].location} in Numbers template`;
x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array));
for(xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -903,6 +909,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
var cruidsref = parse_TSP_Reference(pb[46][0].data);
var oldbucket = CFB.find(cfb, dependents[cruidsref].location);
if(!oldbucket) throw `Could not find ${dependents[cruidsref].location} in Numbers template`;
var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content as Uint8Array));
{
for(var j = 0; j < _x.length; ++j) {
@ -940,6 +947,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
var row_headers = parse_shallow(store[1][0].data);
var row_header_ref = parse_TSP_Reference(row_headers[2][0].data);
oldbucket = CFB.find(cfb, dependents[row_header_ref].location);
if(!oldbucket) throw `Could not find ${dependents[cruidsref].location} in Numbers template`;
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content as Uint8Array));
{
if(_x[0].id != row_header_ref) throw "Bad HeaderStorageBucket";
@ -956,6 +964,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
var col_header_ref = parse_TSP_Reference(store[2][0].data);
oldbucket = CFB.find(cfb, dependents[col_header_ref].location);
if(!oldbucket) throw `Could not find ${dependents[cruidsref].location} in Numbers template`;
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content as Uint8Array));
{
if(_x[0].id != col_header_ref) throw "Bad HeaderStorageBucket";
@ -996,12 +1005,14 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
var sstref = parse_TSP_Reference(store[4][0].data);
(() => {
var sentry = CFB.find(cfb, dependents[sstref].location);
if(!sentry) throw `Could not find ${dependents[sstref].location} in Numbers template`;
var sx = parse_iwa_file(decompress_iwa_file(sentry.content as Uint8Array));
var sstroot: IWAArchiveInfo;
var sstroot!: IWAArchiveInfo;
for(var sxi = 0; sxi < sx.length; ++sxi) {
var packet = sx[sxi];
if(packet.id == sstref) sstroot = packet;
}
if(sstroot == null) throw `Could not find message ${sstref} in Numbers template`;
var sstdata = parse_shallow(sstroot.messages[0].data);
{
@ -1028,8 +1039,9 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container {
var tileref = parse_TSP_Reference(tl[2][0].data);
(() => {
var tentry = CFB.find(cfb, dependents[tileref].location);
if(!tentry) throw `Could not find ${dependents[tileref].location} in Numbers template`;
var tx = parse_iwa_file(decompress_iwa_file(tentry.content as Uint8Array));
var tileroot: IWAArchiveInfo;
var tileroot!: IWAArchiveInfo;
for(var sxi = 0; sxi < tx.length; ++sxi) {
var packet = tx[sxi];
if(packet.id == tileref) tileroot = packet;

@ -3,7 +3,7 @@ LIBFILES=src/types.ts
TSFILES=$(wildcard *.ts)
ENTRIES=$(subst .ts,.js,$(TSFILES))
BAREJS=04_base64.js 51_xlsxmeta.js 51_xlsbmeta.js 59_vba.js 64_ftab.js 83_numbers.js
BAREJS=04_base64.js 45_rtf.js 51_xlsxmeta.js 51_xlsbmeta.js 59_vba.js 64_ftab.js 83_numbers.js
.PHONY: all
all: $(ENTRIES) xlsx.zahl.js

12
test.js

@ -1430,6 +1430,18 @@ describe('parse features', function() {
"11111,1934-06-03,1934-06-03,1934-06-03"
].join("\n"));
}); });
it('bookType metadata', function() {
[
// TODO: keep in sync with BookType, support other formats
"xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers"
].forEach(function(r) {
if(!XLSX_ZAHL) XLSX_ZAHL=require("./dist/xlsx.zahl");
var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]);
var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1");
var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL });
assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r);
}); });
});
describe('write features', function() {

11
test.mjs generated

@ -1421,6 +1421,17 @@ describe('parse features', function() {
"11111,1934-06-03,1934-06-03,1934-06-03"
].join("\n"));
}); });
it('bookType metadata', function() {
[
// TODO: keep in sync with BookType, support other formats
"xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers"
].forEach(function(r) {
var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]);
var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1");
var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL });
assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r);
}); });
});
describe('write features', function() {

@ -1386,6 +1386,17 @@ describe('parse features', function() {
"11111,1934-06-03,1934-06-03,1934-06-03"
].join("\n"));
}); });
it('bookType metadata', function() {
([
// TODO: keep in sync with BookType, support other formats
"xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers"
] as X.BookType[]).forEach(function(r: X.BookType) {
var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]);
var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1");
var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL });
assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r);
}); });
});
describe('write features', function() {
@ -1754,6 +1765,33 @@ describe('roundtrip features', function() {
}
}); });
it('should preserve date system', function() {([
"biff5", "ods", "slk", "xls", "xlsb", "xlsx", "xml"
] as X.BookType[]).forEach(function(ext) {
// TODO: check actual date codes and actual date values
var wb0 = X.read(fs.readFileSync("./test_files/1904/1900." + ext), {type: TYPE});
assert.ok(!wb0.Workbook?.WBProps?.date1904);
var wb1 = X.read(X.write(wb0, {type: TYPE, bookType: ext}), {type: TYPE});
assert.ok(!wb1.Workbook?.WBProps?.date1904);
var wb2 = X.utils.book_new(); X.utils.book_append_sheet(wb2, X.utils.aoa_to_sheet([[1]]), "Sheet1");
wb2.Workbook = { WBProps: { date1904: false } };
assert.ok(!wb2.Workbook?.WBProps?.date1904);
var wb3 = X.read(X.write(wb2, {type: TYPE, bookType: ext}), {type: TYPE});
assert.ok(!wb3.Workbook?.WBProps?.date1904);
var wb4 = X.read(fs.readFileSync("./test_files/1904/1904." + ext), {type: TYPE});
assert.ok(wb4.Workbook?.WBProps?.date1904);
var wb5 = X.read(X.write(wb4, {type: TYPE, bookType: ext}), {type: TYPE});
assert.ok(wb5.Workbook?.WBProps?.date1904); // xlsb, xml
var wb6 = X.utils.book_new(); X.utils.book_append_sheet(wb6, X.utils.aoa_to_sheet([[1]]), "Sheet1");
wb6.Workbook = { WBProps: { date1904: true } };
assert.ok(wb6.Workbook?.WBProps?.date1904);
var wb7 = X.read(X.write(wb6, {type: TYPE, bookType: ext}), {type: TYPE});
assert.ok(wb7.Workbook?.WBProps?.date1904);
}); });
});
//function password_file(x){return x.match(/^password.*\.xls$/); }

38
test.ts

@ -1386,6 +1386,17 @@ Deno.test('parse features', async function(t) {
"11111,1934-06-03,1934-06-03,1934-06-03"
].join("\n"));
}); });
await t.step('bookType metadata', async function(t) {
([
// TODO: keep in sync with BookType, support other formats
"xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers"
] as X.BookType[]).forEach(function(r: X.BookType) {
var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]);
var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1");
var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL });
assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r);
}); });
});
Deno.test('write features', async function(t) {
@ -1754,6 +1765,33 @@ Deno.test('roundtrip features', async function(t) {
}
}); });
await t.step('should preserve date system', async function(t) {([
"biff5", "ods", "slk", "xls", "xlsb", "xlsx", "xml"
] as X.BookType[]).forEach(function(ext) {
// TODO: check actual date codes and actual date values
var wb0 = X.read(fs.readFileSync("./test_files/1904/1900." + ext), {type: TYPE});
assert.assert(!wb0.Workbook?.WBProps?.date1904);
var wb1 = X.read(X.write(wb0, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(!wb1.Workbook?.WBProps?.date1904);
var wb2 = X.utils.book_new(); X.utils.book_append_sheet(wb2, X.utils.aoa_to_sheet([[1]]), "Sheet1");
wb2.Workbook = { WBProps: { date1904: false } };
assert.assert(!wb2.Workbook?.WBProps?.date1904);
var wb3 = X.read(X.write(wb2, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(!wb3.Workbook?.WBProps?.date1904);
var wb4 = X.read(fs.readFileSync("./test_files/1904/1904." + ext), {type: TYPE});
assert.assert(wb4.Workbook?.WBProps?.date1904);
var wb5 = X.read(X.write(wb4, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(wb5.Workbook?.WBProps?.date1904); // xlsb, xml
var wb6 = X.utils.book_new(); X.utils.book_append_sheet(wb6, X.utils.aoa_to_sheet([[1]]), "Sheet1");
wb6.Workbook = { WBProps: { date1904: true } };
assert.assert(wb6.Workbook?.WBProps?.date1904);
var wb7 = X.read(X.write(wb6, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(wb7.Workbook?.WBProps?.date1904);
}); });
});
//function password_file(x){return x.match(/^password.*\.xls$/); }

@ -1385,6 +1385,17 @@ Deno.test('parse features', async function(t) {
"11111,1934-06-03,1934-06-03,1934-06-03"
].join("\n"));
}); });
await t.step('bookType metadata', async function(t) {
([
// TODO: keep in sync with BookType, support other formats
"xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers"
] as X.BookType[]).forEach(function(r: X.BookType) {
var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]);
var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1");
var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL });
assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r);
}); });
});
Deno.test('write features', async function(t) {
@ -1753,6 +1764,33 @@ Deno.test('roundtrip features', async function(t) {
}
}); });
await t.step('should preserve date system', async function(t) {([
"biff5", "ods", "slk", "xls", "xlsb", "xlsx", "xml"
] as X.BookType[]).forEach(function(ext) {
// TODO: check actual date codes and actual date values
var wb0 = X.read(fs.readFileSync("./test_files/1904/1900." + ext), {type: TYPE});
assert.assert(!wb0.Workbook?.WBProps?.date1904);
var wb1 = X.read(X.write(wb0, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(!wb1.Workbook?.WBProps?.date1904);
var wb2 = X.utils.book_new(); X.utils.book_append_sheet(wb2, X.utils.aoa_to_sheet([[1]]), "Sheet1");
wb2.Workbook = { WBProps: { date1904: false } };
assert.assert(!wb2.Workbook?.WBProps?.date1904);
var wb3 = X.read(X.write(wb2, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(!wb3.Workbook?.WBProps?.date1904);
var wb4 = X.read(fs.readFileSync("./test_files/1904/1904." + ext), {type: TYPE});
assert.assert(wb4.Workbook?.WBProps?.date1904);
var wb5 = X.read(X.write(wb4, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(wb5.Workbook?.WBProps?.date1904); // xlsb, xml
var wb6 = X.utils.book_new(); X.utils.book_append_sheet(wb6, X.utils.aoa_to_sheet([[1]]), "Sheet1");
wb6.Workbook = { WBProps: { date1904: true } };
assert.assert(wb6.Workbook?.WBProps?.date1904);
var wb7 = X.read(X.write(wb6, {type: TYPE, bookType: ext}), {type: TYPE});
assert.assert(wb7.Workbook?.WBProps?.date1904);
}); });
});
//function password_file(x){return x.match(/^password.*\.xls$/); }

29
tests/core.js generated

@ -723,6 +723,7 @@ describe('output formats', function() {
["fods", true, true],
["csv", true, true],
["txt", true, true],
["rtf", false, true],
["sylk", false, true],
["eth", false, true],
["html", true, true],
@ -1429,6 +1430,17 @@ describe('parse features', function() {
"11111,1934-06-03,1934-06-03,1934-06-03"
].join("\n"));
}); });
it('bookType metadata', function() {
[
// TODO: keep in sync with BookType, support other formats
"xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers"
].forEach(function(r) {
var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]);
var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1");
var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL });
assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r);
}); });
});
describe('write features', function() {
@ -2531,6 +2543,23 @@ describe('js -> file -> js', function() {
});
});
describe('rtf', function() {
it('roundtrip should be idempotent', function() {
var ws = X.utils.aoa_to_sheet([
[1,2,3],
[true, false, null, "sheetjs"],
["foo", "bar", fixdate, "0.3"],
["baz", null, "q\"ux"]
]);
var wb1 = X.utils.book_new();
X.utils.book_append_sheet(wb1, ws, "Sheet1");
var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"});
var wb2 = X.read(rtf1, {type: "string"});
var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"});
assert.equal(rtf1, rtf2);
});
});
describe('corner cases', function() {
it('output functions', function() {
var ws = X.utils.aoa_to_sheet([

3
types/index.d.ts vendored

@ -285,6 +285,9 @@ export interface WorkBook {
Workbook?: WBProps;
vbaraw?: any;
/** Original file type (when parsed with `read` or `readFile`) */
bookType?: BookType;
}
export interface SheetProps {

@ -7845,7 +7845,11 @@ function dbf_to_sheet(buf, opts)/*:Worksheet*/ {
}
function dbf_to_workbook(buf, opts)/*:Workbook*/ {
try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); }
try {
var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts);
o.bookType = "dbf";
return o;
}
catch(e) { if(opts && opts.WTF) throw e; }
return ({SheetNames:[],Sheets:{}});
}
@ -8156,6 +8160,7 @@ var SYLK = /*#__PURE__*/(function() {
keys(ws).forEach(function(k) { o[k] = ws[k]; });
var outwb = sheet_to_workbook(o, opts);
keys(wb).forEach(function(k) { outwb[k] = wb[k]; });
outwb.bookType = "sylk";
return outwb;
}
@ -8274,7 +8279,11 @@ var DIF = /*#__PURE__*/(function() {
}
function dif_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(dif_to_aoa(str, opts), opts); }
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(dif_to_sheet(str, opts), opts); }
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
var o = sheet_to_workbook(dif_to_sheet(str, opts), opts);
o.bookType = "dif";
return o;
}
var sheet_to_dif = /*#__PURE__*/(function() {
var push_field = function pf(o/*:Array<string>*/, topic/*:string*/, v/*:number*/, n/*:number*/, s/*:string*/) {
@ -10322,83 +10331,102 @@ function parse_FilePass(blob, length/*:number*/, opts) {
}
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
switch(opts.type) {
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
case 'binary': return rtf_to_book_str(d, opts);
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_book_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
function rtf_to_sheet(d, opts) {
switch (opts.type) {
case "base64":
return rtf_to_sheet_str(Base64_decode(d), opts);
case "binary":
return rtf_to_sheet_str(d, opts);
case "buffer":
return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts);
case "array":
return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: RTF technically can store multiple tables, even if Excel does not */
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
var o = opts || {};
var sname = o.sheet || "Sheet1";
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
wb.Sheets[sname] = ws;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return wb;
function rtf_to_sheet_str(str, opts) {
var o = opts || {};
var ws = o.dense ? [] : {};
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if (!rows)
throw new Error("RTF missing table");
var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } };
rows.forEach(function(rowtf, R) {
if (Array.isArray(ws))
ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while ((res = rtfre.exec(rowtf)) != null) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if (data.charCodeAt(0) == 32)
data = data.slice(1);
if (data.length)
payload.push(data);
switch (res[0]) {
case "\\cell":
++C;
if (payload.length) {
var cell = { v: payload.join(""), t: "s" };
if (cell.v == "TRUE" || cell.v == "FALSE") {
cell.v = cell.v == "TRUE";
cell.t = "b";
} else if (!isNaN(fuzzynum(cell.v))) {
cell.t = "n";
if (o.cellText !== false)
cell.w = cell.v;
cell.v = fuzzynum(cell.v);
}
if (Array.isArray(ws))
ws[R][C] = cell;
else
ws[encode_cell({ r: R, c: C })] = cell;
}
payload = [];
break;
case "\\par":
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if (C > range.e.c)
range.e.c = C;
});
ws["!ref"] = encode_range(range);
return ws;
}
/* TODO: standardize sheet names as titles for tables */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
function rtf_to_workbook(d, opts) {
var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts);
wb.bookType = "rtf";
return wb;
}
function sheet_to_rtf(ws, opts) {
var o = ["{\\rtf1\\ansi"];
if (!ws["!ref"])
return o[0] + "}";
var r = safe_decode_range(ws["!ref"]), cell;
var dense = Array.isArray(ws);
for (var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for (var C = r.s.c; C <= r.e.c; ++C)
o.push("\\cellx" + (C + 1));
o.push("\\pard\\intbl");
for (C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({ r: R, c: C });
cell = dense ? (ws[R] || [])[C] : ws[coord];
if (!cell || cell.v == null && (!cell.f || cell.F)) {
o.push(" \\cell");
continue;
}
o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}
function hex2RGB(h) {
var o = h.slice(h[0]==="#"?1:0).slice(0,6);
@ -18728,6 +18756,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ {
out.SSF = dup(table_fmt);
out.Props = Props;
out.Custprops = Custprops;
out.bookType = "xlml";
return out;
}
@ -21619,9 +21648,14 @@ var HTML_END = '</body></html>';
function html_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
var mtch = str.match(/<table[\s\S]*?>[\s\S]*?<\/table>/gi);
if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find <table>");
if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
if(mtch.length == 1) {
var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
w.bookType = "html";
return w;
}
var wb = book_new();
mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); });
wb.bookType = "html";
return wb;
}
@ -21737,7 +21771,9 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
}
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
var o = sheet_to_workbook(parse_dom_table(table, opts), opts);
//o.bookType = "dom"; // TODO: define a type for this
return o;
}
function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ {
@ -22528,10 +22564,13 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(!content) throw new Error("Missing content.xml in ODS / UOF file");
var wb = parse_content_xml(utf8read(content), opts, Styles);
if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml'));
wb.bookType = "ods";
return wb;
}
function parse_fods(data/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
return parse_content_xml(data, opts);
var wb = parse_content_xml(data, opts);
wb.bookType = "fods";
return wb;
}
/* OpenDocument */
@ -23413,7 +23452,7 @@ function parse_old_storage(buf, sst, rsst, v) {
var ret;
switch (buf[2]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: ieee };
break;
@ -23473,7 +23512,7 @@ function parse_new_storage(buf, sst, rsst) {
var ret;
switch (buf[1]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: d128 };
break;
@ -23778,6 +23817,7 @@ function parse_TN_DocumentArchive(M, root) {
});
if (out.SheetNames.length == 0)
throw new Error("Empty NUMBERS file");
out.bookType = "numbers";
return out;
}
function parse_numbers_iwa(cfb) {
@ -23978,6 +24018,8 @@ function write_numbers_iwa(wb, opts) {
throw new Error("Too many messages");
}
var entry = CFB.find(cfb, dependents[1].location);
if (!entry)
throw "Could not find ".concat(dependents[1].location, " in Numbers template");
var x = parse_iwa_file(decompress_iwa_file(entry.content));
var docroot;
for (var xi = 0; xi < x.length; ++xi) {
@ -23985,8 +24027,12 @@ function write_numbers_iwa(wb, opts) {
if (packet.id == 1)
docroot = packet;
}
if (docroot == null)
throw "Could not find message ".concat(1, " in Numbers template");
var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -24002,6 +24048,8 @@ function write_numbers_iwa(wb, opts) {
entry.size = entry.content.length;
sheetrootref = parse_TSP_Reference(sheetref[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -24010,6 +24058,8 @@ function write_numbers_iwa(wb, opts) {
}
sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -24022,6 +24072,8 @@ function write_numbers_iwa(wb, opts) {
pb[7][0].data = write_varint49(range.e.c + 1);
var cruidsref = parse_TSP_Reference(pb[46][0].data);
var oldbucket = CFB.find(cfb, dependents[cruidsref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
for (var j = 0; j < _x.length; ++j) {
@ -24064,6 +24116,8 @@ function write_numbers_iwa(wb, opts) {
var row_headers = parse_shallow(store[1][0].data);
var row_header_ref = parse_TSP_Reference(row_headers[2][0].data);
oldbucket = CFB.find(cfb, dependents[row_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != row_header_ref)
@ -24082,6 +24136,8 @@ function write_numbers_iwa(wb, opts) {
oldbucket.size = oldbucket.content.length;
var col_header_ref = parse_TSP_Reference(store[2][0].data);
oldbucket = CFB.find(cfb, dependents[col_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != col_header_ref)
@ -24126,6 +24182,8 @@ function write_numbers_iwa(wb, opts) {
var sstref = parse_TSP_Reference(store[4][0].data);
(function() {
var sentry = CFB.find(cfb, dependents[sstref].location);
if (!sentry)
throw "Could not find ".concat(dependents[sstref].location, " in Numbers template");
var sx = parse_iwa_file(decompress_iwa_file(sentry.content));
var sstroot;
for (var sxi = 0; sxi < sx.length; ++sxi) {
@ -24133,6 +24191,8 @@ function write_numbers_iwa(wb, opts) {
if (packet2.id == sstref)
sstroot = packet2;
}
if (sstroot == null)
throw "Could not find message ".concat(sstref, " in Numbers template");
var sstdata = parse_shallow(sstroot.messages[0].data);
{
sstdata[3] = [];
@ -24158,6 +24218,8 @@ function write_numbers_iwa(wb, opts) {
var tileref = parse_TSP_Reference(tl[2][0].data);
(function() {
var tentry = CFB.find(cfb, dependents[tileref].location);
if (!tentry)
throw "Could not find ".concat(dependents[tileref].location, " in Numbers template");
var tx = parse_iwa_file(decompress_iwa_file(tentry.content));
var tileroot;
for (var sxi = 0; sxi < tx.length; ++sxi) {
@ -24496,6 +24558,8 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true);
else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true);
}
// TODO: pass back content types metdata for xlsm/xlsx resolution
out.bookType = xlsb ? "xlsb" : "xlsx";
return out;
}

242
xlsx.js generated

@ -7755,7 +7755,11 @@ function dbf_to_sheet(buf, opts) {
}
function dbf_to_workbook(buf, opts) {
try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); }
try {
var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts);
o.bookType = "dbf";
return o;
}
catch(e) { if(opts && opts.WTF) throw e; }
return ({SheetNames:[],Sheets:{}});
}
@ -8066,6 +8070,7 @@ var SYLK = (function() {
keys(ws).forEach(function(k) { o[k] = ws[k]; });
var outwb = sheet_to_workbook(o, opts);
keys(wb).forEach(function(k) { outwb[k] = wb[k]; });
outwb.bookType = "sylk";
return outwb;
}
@ -8184,7 +8189,11 @@ var DIF = (function() {
}
function dif_to_sheet(str, opts) { return aoa_to_sheet(dif_to_aoa(str, opts), opts); }
function dif_to_workbook(str, opts) { return sheet_to_workbook(dif_to_sheet(str, opts), opts); }
function dif_to_workbook(str, opts) {
var o = sheet_to_workbook(dif_to_sheet(str, opts), opts);
o.bookType = "dif";
return o;
}
var sheet_to_dif = (function() {
var push_field = function pf(o, topic, v, n, s) {
@ -10231,88 +10240,103 @@ function parse_FilePass(blob, length, opts) {
}
var RTF = (function() {
function rtf_to_sheet(d, opts) {
switch(opts.type) {
case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts);
case 'binary': return rtf_to_sheet_str(d, opts);
case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: this is a stub */
function rtf_to_sheet_str(str, opts) {
var o = opts || {};
var ws = o.dense ? ([]) : ({});
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}});
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return ws;
}
function rtf_to_workbook(d, opts) { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); }
/* TODO: this is a stub */
function sheet_to_rtf(ws) {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}
return {
to_workbook: rtf_to_workbook,
to_sheet: rtf_to_sheet,
from_sheet: sheet_to_rtf
};
})();
function rtf_to_sheet(d, opts) {
switch (opts.type) {
case "base64":
return rtf_to_sheet_str(Base64_decode(d), opts);
case "binary":
return rtf_to_sheet_str(d, opts);
case "buffer":
return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts);
case "array":
return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
function rtf_to_sheet_str(str, opts) {
var o = opts || {};
var ws = o.dense ? [] : {};
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if (!rows)
throw new Error("RTF missing table");
var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } };
rows.forEach(function(rowtf, R) {
if (Array.isArray(ws))
ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while ((res = rtfre.exec(rowtf)) != null) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if (data.charCodeAt(0) == 32)
data = data.slice(1);
if (data.length)
payload.push(data);
switch (res[0]) {
case "\\cell":
++C;
if (payload.length) {
var cell = { v: payload.join(""), t: "s" };
if (cell.v == "TRUE" || cell.v == "FALSE") {
cell.v = cell.v == "TRUE";
cell.t = "b";
} else if (!isNaN(fuzzynum(cell.v))) {
cell.t = "n";
if (o.cellText !== false)
cell.w = cell.v;
cell.v = fuzzynum(cell.v);
}
if (Array.isArray(ws))
ws[R][C] = cell;
else
ws[encode_cell({ r: R, c: C })] = cell;
}
payload = [];
break;
case "\\par":
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if (C > range.e.c)
range.e.c = C;
});
ws["!ref"] = encode_range(range);
return ws;
}
function rtf_to_workbook(d, opts) {
var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts);
wb.bookType = "rtf";
return wb;
}
function sheet_to_rtf(ws, opts) {
var o = ["{\\rtf1\\ansi"];
if (!ws["!ref"])
return o[0] + "}";
var r = safe_decode_range(ws["!ref"]), cell;
var dense = Array.isArray(ws);
for (var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for (var C = r.s.c; C <= r.e.c; ++C)
o.push("\\cellx" + (C + 1));
o.push("\\pard\\intbl");
for (C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({ r: R, c: C });
cell = dense ? (ws[R] || [])[C] : ws[coord];
if (!cell || cell.v == null && (!cell.f || cell.F)) {
o.push(" \\cell");
continue;
}
o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}
function hex2RGB(h) {
var o = h.slice(h[0]==="#"?1:0).slice(0,6);
return [parseInt(o.slice(0,2),16),parseInt(o.slice(2,4),16),parseInt(o.slice(4,6),16)];
@ -18631,6 +18655,7 @@ Workbook.WBProps.date1904 = true;
out.SSF = dup(table_fmt);
out.Props = Props;
out.Custprops = Custprops;
out.bookType = "xlml";
return out;
}
@ -21513,9 +21538,14 @@ var HTML_END = '</body></html>';
function html_to_workbook(str, opts) {
var mtch = str.match(/<table[\s\S]*?>[\s\S]*?<\/table>/gi);
if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find <table>");
if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
if(mtch.length == 1) {
var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
w.bookType = "html";
return w;
}
var wb = book_new();
mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); });
wb.bookType = "html";
return wb;
}
@ -21631,7 +21661,9 @@ function parse_dom_table(table, _opts) {
}
function table_to_book(table, opts) {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
var o = sheet_to_workbook(parse_dom_table(table, opts), opts);
//o.bookType = "dom"; // TODO: define a type for this
return o;
}
function is_dom_element_hidden(element) {
@ -22422,10 +22454,13 @@ function parse_ods(zip, opts) {
if(!content) throw new Error("Missing content.xml in ODS / UOF file");
var wb = parse_content_xml(utf8read(content), opts, Styles);
if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml'));
wb.bookType = "ods";
return wb;
}
function parse_fods(data, opts) {
return parse_content_xml(data, opts);
var wb = parse_content_xml(data, opts);
wb.bookType = "fods";
return wb;
}
/* OpenDocument */
@ -23307,7 +23342,7 @@ function parse_old_storage(buf, sst, rsst, v) {
var ret;
switch (buf[2]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: ieee };
break;
@ -23367,7 +23402,7 @@ function parse_new_storage(buf, sst, rsst) {
var ret;
switch (buf[1]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: d128 };
break;
@ -23672,6 +23707,7 @@ function parse_TN_DocumentArchive(M, root) {
});
if (out.SheetNames.length == 0)
throw new Error("Empty NUMBERS file");
out.bookType = "numbers";
return out;
}
function parse_numbers_iwa(cfb) {
@ -23872,6 +23908,8 @@ function write_numbers_iwa(wb, opts) {
throw new Error("Too many messages");
}
var entry = CFB.find(cfb, dependents[1].location);
if (!entry)
throw "Could not find ".concat(dependents[1].location, " in Numbers template");
var x = parse_iwa_file(decompress_iwa_file(entry.content));
var docroot;
for (var xi = 0; xi < x.length; ++xi) {
@ -23879,8 +23917,12 @@ function write_numbers_iwa(wb, opts) {
if (packet.id == 1)
docroot = packet;
}
if (docroot == null)
throw "Could not find message ".concat(1, " in Numbers template");
var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -23896,6 +23938,8 @@ function write_numbers_iwa(wb, opts) {
entry.size = entry.content.length;
sheetrootref = parse_TSP_Reference(sheetref[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -23904,6 +23948,8 @@ function write_numbers_iwa(wb, opts) {
}
sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -23916,6 +23962,8 @@ function write_numbers_iwa(wb, opts) {
pb[7][0].data = write_varint49(range.e.c + 1);
var cruidsref = parse_TSP_Reference(pb[46][0].data);
var oldbucket = CFB.find(cfb, dependents[cruidsref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
for (var j = 0; j < _x.length; ++j) {
@ -23958,6 +24006,8 @@ function write_numbers_iwa(wb, opts) {
var row_headers = parse_shallow(store[1][0].data);
var row_header_ref = parse_TSP_Reference(row_headers[2][0].data);
oldbucket = CFB.find(cfb, dependents[row_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != row_header_ref)
@ -23976,6 +24026,8 @@ function write_numbers_iwa(wb, opts) {
oldbucket.size = oldbucket.content.length;
var col_header_ref = parse_TSP_Reference(store[2][0].data);
oldbucket = CFB.find(cfb, dependents[col_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != col_header_ref)
@ -24020,6 +24072,8 @@ function write_numbers_iwa(wb, opts) {
var sstref = parse_TSP_Reference(store[4][0].data);
(function() {
var sentry = CFB.find(cfb, dependents[sstref].location);
if (!sentry)
throw "Could not find ".concat(dependents[sstref].location, " in Numbers template");
var sx = parse_iwa_file(decompress_iwa_file(sentry.content));
var sstroot;
for (var sxi = 0; sxi < sx.length; ++sxi) {
@ -24027,6 +24081,8 @@ function write_numbers_iwa(wb, opts) {
if (packet2.id == sstref)
sstroot = packet2;
}
if (sstroot == null)
throw "Could not find message ".concat(sstref, " in Numbers template");
var sstdata = parse_shallow(sstroot.messages[0].data);
{
sstdata[3] = [];
@ -24052,6 +24108,8 @@ function write_numbers_iwa(wb, opts) {
var tileref = parse_TSP_Reference(tl[2][0].data);
(function() {
var tentry = CFB.find(cfb, dependents[tileref].location);
if (!tentry)
throw "Could not find ".concat(dependents[tileref].location, " in Numbers template");
var tx = parse_iwa_file(decompress_iwa_file(tentry.content));
var tileroot;
for (var sxi = 0; sxi < tx.length; ++sxi) {
@ -24390,6 +24448,8 @@ function parse_zip(zip, opts) {
if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true);
else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true);
}
// TODO: pass back content types metdata for xlsm/xlsx resolution
out.bookType = xlsb ? "xlsb" : "xlsx";
return out;
}
@ -24831,7 +24891,7 @@ function readSync(data, opts) {
}
break;
case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o);
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break;
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break;
case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break;
case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break;
@ -24991,7 +25051,7 @@ function writeSync(wb, opts) {
case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'fods': return write_string_type(write_ods(wb, o), o);
case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o);

228
xlsx.mjs generated

@ -7840,7 +7840,11 @@ function dbf_to_sheet(buf, opts)/*:Worksheet*/ {
}
function dbf_to_workbook(buf, opts)/*:Workbook*/ {
try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); }
try {
var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts);
o.bookType = "dbf";
return o;
}
catch(e) { if(opts && opts.WTF) throw e; }
return ({SheetNames:[],Sheets:{}});
}
@ -8151,6 +8155,7 @@ var SYLK = /*#__PURE__*/(function() {
keys(ws).forEach(function(k) { o[k] = ws[k]; });
var outwb = sheet_to_workbook(o, opts);
keys(wb).forEach(function(k) { outwb[k] = wb[k]; });
outwb.bookType = "sylk";
return outwb;
}
@ -8269,7 +8274,11 @@ var DIF = /*#__PURE__*/(function() {
}
function dif_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(dif_to_aoa(str, opts), opts); }
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(dif_to_sheet(str, opts), opts); }
function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
var o = sheet_to_workbook(dif_to_sheet(str, opts), opts);
o.bookType = "dif";
return o;
}
var sheet_to_dif = /*#__PURE__*/(function() {
var push_field = function pf(o/*:Array<string>*/, topic/*:string*/, v/*:number*/, n/*:number*/, s/*:string*/) {
@ -10317,83 +10326,102 @@ function parse_FilePass(blob, length/*:number*/, opts) {
}
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
switch(opts.type) {
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
case 'binary': return rtf_to_book_str(d, opts);
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_book_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
function rtf_to_sheet(d, opts) {
switch (opts.type) {
case "base64":
return rtf_to_sheet_str(Base64_decode(d), opts);
case "binary":
return rtf_to_sheet_str(d, opts);
case "buffer":
return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts);
case "array":
return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: RTF technically can store multiple tables, even if Excel does not */
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
var o = opts || {};
var sname = o.sheet || "Sheet1";
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
wb.Sheets[sname] = ws;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return wb;
function rtf_to_sheet_str(str, opts) {
var o = opts || {};
var ws = o.dense ? [] : {};
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if (!rows)
throw new Error("RTF missing table");
var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } };
rows.forEach(function(rowtf, R) {
if (Array.isArray(ws))
ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while ((res = rtfre.exec(rowtf)) != null) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if (data.charCodeAt(0) == 32)
data = data.slice(1);
if (data.length)
payload.push(data);
switch (res[0]) {
case "\\cell":
++C;
if (payload.length) {
var cell = { v: payload.join(""), t: "s" };
if (cell.v == "TRUE" || cell.v == "FALSE") {
cell.v = cell.v == "TRUE";
cell.t = "b";
} else if (!isNaN(fuzzynum(cell.v))) {
cell.t = "n";
if (o.cellText !== false)
cell.w = cell.v;
cell.v = fuzzynum(cell.v);
}
if (Array.isArray(ws))
ws[R][C] = cell;
else
ws[encode_cell({ r: R, c: C })] = cell;
}
payload = [];
break;
case "\\par":
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
}
if (C > range.e.c)
range.e.c = C;
});
ws["!ref"] = encode_range(range);
return ws;
}
/* TODO: standardize sheet names as titles for tables */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
function rtf_to_workbook(d, opts) {
var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts);
wb.bookType = "rtf";
return wb;
}
function sheet_to_rtf(ws, opts) {
var o = ["{\\rtf1\\ansi"];
if (!ws["!ref"])
return o[0] + "}";
var r = safe_decode_range(ws["!ref"]), cell;
var dense = Array.isArray(ws);
for (var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for (var C = r.s.c; C <= r.e.c; ++C)
o.push("\\cellx" + (C + 1));
o.push("\\pard\\intbl");
for (C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({ r: R, c: C });
cell = dense ? (ws[R] || [])[C] : ws[coord];
if (!cell || cell.v == null && (!cell.f || cell.F)) {
o.push(" \\cell");
continue;
}
o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}
function hex2RGB(h) {
var o = h.slice(h[0]==="#"?1:0).slice(0,6);
@ -18723,6 +18751,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ {
out.SSF = dup(table_fmt);
out.Props = Props;
out.Custprops = Custprops;
out.bookType = "xlml";
return out;
}
@ -21614,9 +21643,14 @@ var HTML_END = '</body></html>';
function html_to_workbook(str/*:string*/, opts)/*:Workbook*/ {
var mtch = str.match(/<table[\s\S]*?>[\s\S]*?<\/table>/gi);
if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find <table>");
if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
if(mtch.length == 1) {
var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
w.bookType = "html";
return w;
}
var wb = book_new();
mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); });
wb.bookType = "html";
return wb;
}
@ -21732,7 +21766,9 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
}
function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ {
return sheet_to_workbook(parse_dom_table(table, opts), opts);
var o = sheet_to_workbook(parse_dom_table(table, opts), opts);
//o.bookType = "dom"; // TODO: define a type for this
return o;
}
function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ {
@ -22523,10 +22559,13 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(!content) throw new Error("Missing content.xml in ODS / UOF file");
var wb = parse_content_xml(utf8read(content), opts, Styles);
if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml'));
wb.bookType = "ods";
return wb;
}
function parse_fods(data/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
return parse_content_xml(data, opts);
var wb = parse_content_xml(data, opts);
wb.bookType = "fods";
return wb;
}
/* OpenDocument */
@ -23408,7 +23447,7 @@ function parse_old_storage(buf, sst, rsst, v) {
var ret;
switch (buf[2]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: ieee };
break;
@ -23468,7 +23507,7 @@ function parse_new_storage(buf, sst, rsst) {
var ret;
switch (buf[1]) {
case 0:
break;
return void 0;
case 2:
ret = { t: "n", v: d128 };
break;
@ -23773,6 +23812,7 @@ function parse_TN_DocumentArchive(M, root) {
});
if (out.SheetNames.length == 0)
throw new Error("Empty NUMBERS file");
out.bookType = "numbers";
return out;
}
function parse_numbers_iwa(cfb) {
@ -23973,6 +24013,8 @@ function write_numbers_iwa(wb, opts) {
throw new Error("Too many messages");
}
var entry = CFB.find(cfb, dependents[1].location);
if (!entry)
throw "Could not find ".concat(dependents[1].location, " in Numbers template");
var x = parse_iwa_file(decompress_iwa_file(entry.content));
var docroot;
for (var xi = 0; xi < x.length; ++xi) {
@ -23980,8 +24022,12 @@ function write_numbers_iwa(wb, opts) {
if (packet.id == 1)
docroot = packet;
}
if (docroot == null)
throw "Could not find message ".concat(1, " in Numbers template");
var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -23997,6 +24043,8 @@ function write_numbers_iwa(wb, opts) {
entry.size = entry.content.length;
sheetrootref = parse_TSP_Reference(sheetref[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -24005,6 +24053,8 @@ function write_numbers_iwa(wb, opts) {
}
sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data);
entry = CFB.find(cfb, dependents[sheetrootref].location);
if (!entry)
throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template");
x = parse_iwa_file(decompress_iwa_file(entry.content));
for (xi = 0; xi < x.length; ++xi) {
packet = x[xi];
@ -24017,6 +24067,8 @@ function write_numbers_iwa(wb, opts) {
pb[7][0].data = write_varint49(range.e.c + 1);
var cruidsref = parse_TSP_Reference(pb[46][0].data);
var oldbucket = CFB.find(cfb, dependents[cruidsref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
for (var j = 0; j < _x.length; ++j) {
@ -24059,6 +24111,8 @@ function write_numbers_iwa(wb, opts) {
var row_headers = parse_shallow(store[1][0].data);
var row_header_ref = parse_TSP_Reference(row_headers[2][0].data);
oldbucket = CFB.find(cfb, dependents[row_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != row_header_ref)
@ -24077,6 +24131,8 @@ function write_numbers_iwa(wb, opts) {
oldbucket.size = oldbucket.content.length;
var col_header_ref = parse_TSP_Reference(store[2][0].data);
oldbucket = CFB.find(cfb, dependents[col_header_ref].location);
if (!oldbucket)
throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template");
_x = parse_iwa_file(decompress_iwa_file(oldbucket.content));
{
if (_x[0].id != col_header_ref)
@ -24121,6 +24177,8 @@ function write_numbers_iwa(wb, opts) {
var sstref = parse_TSP_Reference(store[4][0].data);
(function() {
var sentry = CFB.find(cfb, dependents[sstref].location);
if (!sentry)
throw "Could not find ".concat(dependents[sstref].location, " in Numbers template");
var sx = parse_iwa_file(decompress_iwa_file(sentry.content));
var sstroot;
for (var sxi = 0; sxi < sx.length; ++sxi) {
@ -24128,6 +24186,8 @@ function write_numbers_iwa(wb, opts) {
if (packet2.id == sstref)
sstroot = packet2;
}
if (sstroot == null)
throw "Could not find message ".concat(sstref, " in Numbers template");
var sstdata = parse_shallow(sstroot.messages[0].data);
{
sstdata[3] = [];
@ -24153,6 +24213,8 @@ function write_numbers_iwa(wb, opts) {
var tileref = parse_TSP_Reference(tl[2][0].data);
(function() {
var tentry = CFB.find(cfb, dependents[tileref].location);
if (!tentry)
throw "Could not find ".concat(dependents[tileref].location, " in Numbers template");
var tx = parse_iwa_file(decompress_iwa_file(tentry.content));
var tileroot;
for (var sxi = 0; sxi < tx.length; ++sxi) {
@ -24491,6 +24553,8 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true);
else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true);
}
// TODO: pass back content types metdata for xlsm/xlsx resolution
out.bookType = xlsb ? "xlsb" : "xlsx";
return out;
}