From ad1ce0d9b0d84afd0fb2737c87d45970972c2e66 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Fri, 8 Jul 2022 18:31:08 -0400 Subject: [PATCH] parsers expose original book type --- bits/40_harb.js | 13 ++- bits/45_rtf.js | 169 +++++++++++++++------------ bits/75_xlml.js | 1 + bits/79_html.js | 11 +- bits/80_parseods.js | 5 +- bits/83_numbers.js | 27 ++++- bits/85_parsezip.js | 2 + demos/README.md | 2 +- demos/array/Makefile | 7 -- demos/array/README.md | 129 +-------------------- demos/array/linest.js | 27 ----- demos/array/package.json | 6 - demos/array/tf.js | 78 ------------- modules/45_rtf.js | 97 ++++++++++++++++ modules/45_rtf.ts | 98 ++++++++++++++++ modules/83_numbers.js | 27 ++++- modules/83_numbers.ts | 44 ++++--- modules/Makefile | 2 +- test.js | 12 ++ test.mjs | 11 ++ test.mts | 38 ++++++ test.ts | 38 ++++++ testnocp.ts | 38 ++++++ tests/core.js | 29 +++++ types/index.d.ts | 3 + xlsx.flow.js | 228 +++++++++++++++++++++++------------- xlsx.js | 242 ++++++++++++++++++++++++--------------- xlsx.mjs | 228 +++++++++++++++++++++++------------- 28 files changed, 1010 insertions(+), 602 deletions(-) delete mode 100644 demos/array/Makefile delete mode 100644 demos/array/linest.js delete mode 100644 demos/array/package.json delete mode 100644 demos/array/tf.js create mode 100644 modules/45_rtf.js create mode 100644 modules/45_rtf.ts diff --git a/bits/40_harb.js b/bits/40_harb.js index de95b76..6e66051 100644 --- a/bits/40_harb.js +++ b/bits/40_harb.js @@ -235,7 +235,11 @@ function dbf_to_sheet(buf, opts)/*:Worksheet*/ { } function dbf_to_workbook(buf, opts)/*:Workbook*/ { - try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); } + try { + var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts); + o.bookType = "dbf"; + return o; + } catch(e) { if(opts && opts.WTF) throw e; } return ({SheetNames:[],Sheets:{}}); } @@ -546,6 +550,7 @@ var SYLK = /*#__PURE__*/(function() { keys(ws).forEach(function(k) { o[k] = ws[k]; }); var outwb = sheet_to_workbook(o, opts); keys(wb).forEach(function(k) { outwb[k] = wb[k]; }); + outwb.bookType = "sylk"; return outwb; } @@ -664,7 +669,11 @@ var DIF = /*#__PURE__*/(function() { } function dif_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(dif_to_aoa(str, opts), opts); } - function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(dif_to_sheet(str, opts), opts); } + function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { + var o = sheet_to_workbook(dif_to_sheet(str, opts), opts); + o.bookType = "dif"; + return o; + } var sheet_to_dif = /*#__PURE__*/(function() { var push_field = function pf(o/*:Array*/, topic/*:string*/, v/*:number*/, n/*:number*/, s/*:string*/) { diff --git a/bits/45_rtf.js b/bits/45_rtf.js index 3a1b7ee..dd4b42f 100644 --- a/bits/45_rtf.js +++ b/bits/45_rtf.js @@ -1,78 +1,97 @@ -function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { - switch(opts.type) { - case 'base64': return rtf_to_book_str(Base64_decode(d), opts); - case 'binary': return rtf_to_book_str(d, opts); - case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); - case 'array': return rtf_to_book_str(cc2str(d), opts); - } - throw new Error("Unrecognized type " + opts.type); +function rtf_to_sheet(d, opts) { + switch (opts.type) { + case "base64": + return rtf_to_sheet_str(Base64_decode(d), opts); + case "binary": + return rtf_to_sheet_str(d, opts); + case "buffer": + return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts); + case "array": + return rtf_to_sheet_str(cc2str(d), opts); + } + throw new Error("Unrecognized type " + opts.type); } - -/* TODO: RTF technically can store multiple tables, even if Excel does not */ -function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ { - var o = opts || {}; - var sname = o.sheet || "Sheet1"; - var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); - var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} }; - wb.Sheets[sname] = ws; - - var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); - if(!rows.length) throw new Error("RTF missing table"); - var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); - rows.forEach(function(rowtf, R) { - if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\[\w\-]+\b/g; - var last_index = 0; - var res; - var C = -1; - var payload = []; - while((res = rtfre.exec(rowtf))) { - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data.charCodeAt(0) == 0x20) data = data.slice(1); - if(data.length) payload.push(data); - switch(res[0]) { - case "\\cell": - ++C; - if(payload.length) { - // TODO: value parsing, including codepage adjustments - var cell = {v: payload.join(""), t:"s"}; - if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; } - else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); } - - if(Array.isArray(ws)) ws[R][C] = cell; - else ws[encode_cell({r:R, c:C})] = cell; - } - payload = []; - break; - case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" - payload.push("\n"); - break; - } - last_index = rtfre.lastIndex; - } - if(C > range.e.c) range.e.c = C; - }); - ws['!ref'] = encode_range(range); - return wb; +function rtf_to_sheet_str(str, opts) { + var o = opts || {}; + var ws = o.dense ? [] : {}; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if (!rows) + throw new Error("RTF missing table"); + var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } }; + rows.forEach(function(rowtf, R) { + if (Array.isArray(ws)) + ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while ((res = rtfre.exec(rowtf)) != null) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if (data.charCodeAt(0) == 32) + data = data.slice(1); + if (data.length) + payload.push(data); + switch (res[0]) { + case "\\cell": + ++C; + if (payload.length) { + var cell = { v: payload.join(""), t: "s" }; + if (cell.v == "TRUE" || cell.v == "FALSE") { + cell.v = cell.v == "TRUE"; + cell.t = "b"; + } else if (!isNaN(fuzzynum(cell.v))) { + cell.t = "n"; + if (o.cellText !== false) + cell.w = cell.v; + cell.v = fuzzynum(cell.v); + } + if (Array.isArray(ws)) + ws[R][C] = cell; + else + ws[encode_cell({ r: R, c: C })] = cell; + } + payload = []; + break; + case "\\par": + payload.push("\n"); + break; + } + last_index = rtfre.lastIndex; + } + if (C > range.e.c) + range.e.c = C; + }); + ws["!ref"] = encode_range(range); + return ws; } - -/* TODO: standardize sheet names as titles for tables */ -function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { - var o = ["{\\rtf1\\ansi"]; - var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; - var dense = Array.isArray(ws); - for(var R = r.s.r; R <= r.e.r; ++R) { - o.push("\\trowd\\trautofit1"); - for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); - o.push("\\pard\\intbl"); - for(C = r.s.c; C <= r.e.c; ++C) { - var coord = encode_cell({r:R,c:C}); - cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); - o.push("\\cell"); - } - o.push("\\pard\\intbl\\row"); - } - return o.join("") + "}"; +function rtf_to_workbook(d, opts) { + var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts); + wb.bookType = "rtf"; + return wb; +} +function sheet_to_rtf(ws, opts) { + var o = ["{\\rtf1\\ansi"]; + if (!ws["!ref"]) + return o[0] + "}"; + var r = safe_decode_range(ws["!ref"]), cell; + var dense = Array.isArray(ws); + for (var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for (var C = r.s.c; C <= r.e.c; ++C) + o.push("\\cellx" + (C + 1)); + o.push("\\pard\\intbl"); + for (C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({ r: R, c: C }); + cell = dense ? (ws[R] || [])[C] : ws[coord]; + if (!cell || cell.v == null && (!cell.f || cell.F)) { + o.push(" \\cell"); + continue; + } + o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; } diff --git a/bits/75_xlml.js b/bits/75_xlml.js index ee8e97d..6dad076 100644 --- a/bits/75_xlml.js +++ b/bits/75_xlml.js @@ -919,6 +919,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ { out.SSF = dup(table_fmt); out.Props = Props; out.Custprops = Custprops; + out.bookType = "xlml"; return out; } diff --git a/bits/79_html.js b/bits/79_html.js index a821ced..fbd4df4 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -97,9 +97,14 @@ var HTML_END = ''; function html_to_workbook(str/*:string*/, opts)/*:Workbook*/ { var mtch = str.match(/[\s\S]*?<\/table>/gi); if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find "); - if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + if(mtch.length == 1) { + var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + w.bookType = "html"; + return w; + } var wb = book_new(); mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); }); + wb.bookType = "html"; return wb; } @@ -215,7 +220,9 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ { - return sheet_to_workbook(parse_dom_table(table, opts), opts); + var o = sheet_to_workbook(parse_dom_table(table, opts), opts); + //o.bookType = "dom"; // TODO: define a type for this + return o; } function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ { diff --git a/bits/80_parseods.js b/bits/80_parseods.js index f6423a9..7daa790 100644 --- a/bits/80_parseods.js +++ b/bits/80_parseods.js @@ -770,9 +770,12 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ { if(!content) throw new Error("Missing content.xml in ODS / UOF file"); var wb = parse_content_xml(utf8read(content), opts, Styles); if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml')); + wb.bookType = "ods"; return wb; } function parse_fods(data/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ { - return parse_content_xml(data, opts); + var wb = parse_content_xml(data, opts); + wb.bookType = "fods"; + return wb; } diff --git a/bits/83_numbers.js b/bits/83_numbers.js index c7594f6..e43e8e4 100644 --- a/bits/83_numbers.js +++ b/bits/83_numbers.js @@ -396,7 +396,7 @@ function parse_old_storage(buf, sst, rsst, v) { var ret; switch (buf[2]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: ieee }; break; @@ -456,7 +456,7 @@ function parse_new_storage(buf, sst, rsst) { var ret; switch (buf[1]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: d128 }; break; @@ -761,6 +761,7 @@ function parse_TN_DocumentArchive(M, root) { }); if (out.SheetNames.length == 0) throw new Error("Empty NUMBERS file"); + out.bookType = "numbers"; return out; } function parse_numbers_iwa(cfb) { @@ -961,6 +962,8 @@ function write_numbers_iwa(wb, opts) { throw new Error("Too many messages"); } var entry = CFB.find(cfb, dependents[1].location); + if (!entry) + throw "Could not find ".concat(dependents[1].location, " in Numbers template"); var x = parse_iwa_file(decompress_iwa_file(entry.content)); var docroot; for (var xi = 0; xi < x.length; ++xi) { @@ -968,8 +971,12 @@ function write_numbers_iwa(wb, opts) { if (packet.id == 1) docroot = packet; } + if (docroot == null) + throw "Could not find message ".concat(1, " in Numbers template"); var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -985,6 +992,8 @@ function write_numbers_iwa(wb, opts) { entry.size = entry.content.length; sheetrootref = parse_TSP_Reference(sheetref[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -993,6 +1002,8 @@ function write_numbers_iwa(wb, opts) { } sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -1005,6 +1016,8 @@ function write_numbers_iwa(wb, opts) { pb[7][0].data = write_varint49(range.e.c + 1); var cruidsref = parse_TSP_Reference(pb[46][0].data); var oldbucket = CFB.find(cfb, dependents[cruidsref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { for (var j = 0; j < _x.length; ++j) { @@ -1047,6 +1060,8 @@ function write_numbers_iwa(wb, opts) { var row_headers = parse_shallow(store[1][0].data); var row_header_ref = parse_TSP_Reference(row_headers[2][0].data); oldbucket = CFB.find(cfb, dependents[row_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != row_header_ref) @@ -1065,6 +1080,8 @@ function write_numbers_iwa(wb, opts) { oldbucket.size = oldbucket.content.length; var col_header_ref = parse_TSP_Reference(store[2][0].data); oldbucket = CFB.find(cfb, dependents[col_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != col_header_ref) @@ -1109,6 +1126,8 @@ function write_numbers_iwa(wb, opts) { var sstref = parse_TSP_Reference(store[4][0].data); (function() { var sentry = CFB.find(cfb, dependents[sstref].location); + if (!sentry) + throw "Could not find ".concat(dependents[sstref].location, " in Numbers template"); var sx = parse_iwa_file(decompress_iwa_file(sentry.content)); var sstroot; for (var sxi = 0; sxi < sx.length; ++sxi) { @@ -1116,6 +1135,8 @@ function write_numbers_iwa(wb, opts) { if (packet2.id == sstref) sstroot = packet2; } + if (sstroot == null) + throw "Could not find message ".concat(sstref, " in Numbers template"); var sstdata = parse_shallow(sstroot.messages[0].data); { sstdata[3] = []; @@ -1141,6 +1162,8 @@ function write_numbers_iwa(wb, opts) { var tileref = parse_TSP_Reference(tl[2][0].data); (function() { var tentry = CFB.find(cfb, dependents[tileref].location); + if (!tentry) + throw "Could not find ".concat(dependents[tileref].location, " in Numbers template"); var tx = parse_iwa_file(decompress_iwa_file(tentry.content)); var tileroot; for (var sxi = 0; sxi < tx.length; ++sxi) { diff --git a/bits/85_parsezip.js b/bits/85_parsezip.js index 9066232..6fc25d1 100644 --- a/bits/85_parsezip.js +++ b/bits/85_parsezip.js @@ -247,6 +247,8 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ { if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true); else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true); } + // TODO: pass back content types metdata for xlsm/xlsx resolution + out.bookType = xlsb ? "xlsb" : "xlsx"; return out; } diff --git a/demos/README.md b/demos/README.md index 96c9d86..20d6926 100644 --- a/demos/README.md +++ b/demos/README.md @@ -20,7 +20,7 @@ can be installed with Bash on Windows or with `cygwin`. **JavaScript APIs** - [`XMLHttpRequest and fetch`](xhr/) - [`Clipboard Data`](https://docs.sheetjs.com/docs/getting-started/demos/clipboard) -- [`Typed Arrays and Math`](array/) +- [`Typed Arrays for Machine Learning`](https://docs.sheetjs.com/docs/getting-started/demos/ml) **Frameworks** - [`angularjs`](angular/) diff --git a/demos/array/Makefile b/demos/array/Makefile deleted file mode 100644 index abeba97..0000000 --- a/demos/array/Makefile +++ /dev/null @@ -1,7 +0,0 @@ -.PHONY: init -init: - npm i - -.PHONY: tfjs -tfjs: init - node tf.js diff --git a/demos/array/README.md b/demos/array/README.md index 18a50d3..dcf14b3 100644 --- a/demos/array/README.md +++ b/demos/array/README.md @@ -1,131 +1,6 @@ # Typed Arrays and Math -ECMAScript version 6 introduced Typed Arrays, array-like objects designed for -low-level optimizations and predictable operations. They are supported in most -modern browsers and form the basis of various APIs, including NodeJS Buffers, -WebGL buffers, WebAssembly, and tensors in linear algebra and math libraries. - -This demo covers conversions between worksheets and Typed Arrays. It also tries -to cover common numerical libraries that work with data arrays. - -Excel supports a subset of the IEEE754 Double precision floating point numbers, -but many libraries only support `Float32` Single precision values. `Math.fround` -rounds `Number` values to the nearest single-precision floating point value. - -## Working with Data in Typed Arrays - -Typed arrays are not true Array objects. The array of array utility functions -like `aoa_to_sheet` will not handle arrays of Typed Arrays. - -#### Exporting Typed Arrays to a Worksheet - -A single typed array can be converted to a pure JS array with `Array.from`: - -```js -var column = Array.from(dataset_typedarray); -``` - -`aoa_to_sheet` expects a row-major array of arrays. To export multiple data -sets, "transpose" the data: - -```js -/* assuming data is an array of typed arrays */ -var aoa = []; -for(var i = 0; i < data.length; ++i) { - for(var j = 0; j < data[i].length; ++j) { - if(!aoa[j]) aoa[j] = []; - aoa[j][i] = data[i][j]; - } -} -/* aoa can be directly converted to a worksheet object */ -var ws = XLSX.utils.aoa_to_sheet(aoa); -``` - -#### Importing Data from a Spreadsheet - -`sheet_to_json` with the option `header:1` will generate a row-major array of -arrays that can be transposed. However, it is more efficient to walk the sheet -manually: - -```js -/* find worksheet range */ -var range = XLSX.utils.decode_range(ws['!ref']); -var out = [] -/* walk the columns */ -for(var C = range.s.c; C <= range.e.c; ++C) { - /* create the typed array */ - var ta = new Float32Array(range.e.r - range.s.r + 1); - /* walk the rows */ - for(var R = range.s.r; R <= range.e.r; ++R) { - /* find the cell, skip it if the cell isn't numeric or boolean */ - var cell = ws[XLSX.utils.encode_cell({r:R, c:C})]; - if(!cell || cell.t != 'n' && cell.t != 'b') continue; - /* assign to the typed array */ - ta[R - range.s.r] = cell.v; - } - out.push(ta); -} -``` - -If the data set has a header row, the loop can be adjusted to skip those rows. - - -## Demos - -Each example focuses on single-variable linear regression. Sample worksheets -will start with a label row. The first column is the x-value and the second -column is the y-value. A sample spreadsheet can be generated randomly: - -```js -var aoo = []; -for(var i = 0; i < 100; ++i) aoo.push({x:i, y:2 * i + Math.random()}); -var ws = XLSX.utils.json_to_sheet(aoo); -var wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); -XLSX.writeFile(wb, "linreg.xlsx"); -``` - -Some libraries provide utility functions that work with plain arrays of numbers. -When possible, they should be preferred over manual conversion. - -Reshaping raw float arrays and exporting to a worksheet is straightforward: - -```js -function array_to_sheet(farray, shape, headers) { - /* generate new AOA from the float array */ - var aoa = []; - for(var j = 0; j < shape[0]; ++j) { - aoa[j] = []; - for(var i = 0; i < shape[1]; ++i) aoa[j][i] = farray[j * shape[1] + i]; - } - - /* add headers and generate worksheet */ - if(headers) aoa.unshift(headers); - return XLSX.utils.aoa_to_sheet(aoa); -} -``` - -#### TensorFlow - -[TensorFlow](https://js.tensorflow.org/) `tensor` objects can be created from -arrays of arrays: - -```js -var tensor = tf.tensor2d(aoa).transpose(); -var col1 = tensor.slice([0,0], [1,tensor.shape[1]]).flatten(); -var col2 = tensor.slice([1,0], [1,tensor.shape[1]]).flatten(); -``` - -`stack` should be used to create the 2-d tensor for export: - -```js -var tensor = tf.stack([col1, col2]).transpose(); -var shape = tensor.shape; -var farray = tensor.dataSync(); -var ws = array_to_sheet(farray, shape, ["header1", "header2"]); -``` - -The demo generates a sample dataset and uses a simple linear predictor with -least-squares scoring to calculate regression coefficients. The tensors are -exported to a new file. +[The new demo](https://docs.sheetjs.com/docs/getting-started/demos/ml) includes +interactive examples as well as strategies for CSV and JS Array interchange. [![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-xlsx?pixel)](https://github.com/SheetJS/js-xlsx) diff --git a/demos/array/linest.js b/demos/array/linest.js deleted file mode 100644 index 4d38b85..0000000 --- a/demos/array/linest.js +++ /dev/null @@ -1,27 +0,0 @@ -/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ -/*global module, require, XLSX:true */ -if(typeof require !== 'undefined' && typeof XLSX === 'undefined') XLSX = require('xlsx'); - -function generate_random_file(n) { - if(!n) n = 100; - var aoo = []; - var x_ = 0, y_ = 0, xx = 0, xy = 0; - for(var i = 0; i < n; ++i) { - var y = Math.fround(2 * i + Math.random()); - aoo.push({x:i, y:y}); - x_ += i / n; y_ += y / n; xx += i*i; xy += i * y; - } - var m = Math.fround((xy - n * x_ * y_)/(xx - n * x_ * x_)); - console.log(m, Math.fround(y_ - m * x_), "JS Pre"); - var ws = XLSX.utils.json_to_sheet(aoo); - var wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); - ws = XLSX.utils.aoa_to_sheet([[2, 0]]); - XLSX.utils.sheet_set_array_formula(ws, "A1:B1", "LINEST(Sheet1!B2:B101,Sheet1!A2:A101)"); - XLSX.utils.book_append_sheet(wb, ws, "Sheet2"); - - XLSX.writeFile(wb, "linreg.xlsx"); -} -if(typeof module !== 'undefined') module.exports = { - generate_random_file: generate_random_file -}; diff --git a/demos/array/package.json b/demos/array/package.json deleted file mode 100644 index 2630a5b..0000000 --- a/demos/array/package.json +++ /dev/null @@ -1,6 +0,0 @@ -{ - "dependencies": { - "@tensorflow/tfjs": "^3.16.0", - "xlsx": "https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz" - } -} diff --git a/demos/array/tf.js b/demos/array/tf.js deleted file mode 100644 index 3452a77..0000000 --- a/demos/array/tf.js +++ /dev/null @@ -1,78 +0,0 @@ -/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ -/* eslint-env node */ -var XLSX = require('xlsx'); -var tf = require('@tensorflow/tfjs'); -var linest = require('./linest'); - -/* generate linreg.xlsx with 100 random points */ -var N = 100; -linest.generate_random_file(N); - -/* get the first worksheet as an array of arrays, skip the first row */ -var wb = XLSX.readFile('linreg.xlsx'); -var ws = wb.Sheets[wb.SheetNames[0]]; -var aoa = XLSX.utils.sheet_to_json(ws, {header:1, raw:true}).slice(1); - -/* calculate the coefficients in JS */ -(function(aoa) { - var x_ = 0, y_ = 0, xx = 0, xy = 0, n = aoa.length; - for(var i = 0; i < n; ++i) { - x_ += aoa[i][0] / n; - y_ += aoa[i][1] / n; - xx += aoa[i][0] * aoa[i][0]; - xy += aoa[i][0] * aoa[i][1]; - } - var m = Math.fround((xy - n * x_ * y_)/(xx - n * x_ * x_)); - console.log(m, Math.fround(y_ - m * x_), "JS Post"); -})(aoa); - -/* build X and Y vectors */ -var tensor = tf.tensor2d(aoa).transpose(); -console.log(tensor.shape); -var xs = tensor.slice([0,0], [1,tensor.shape[1]]).flatten(); -var ys = tensor.slice([1,0], [1,tensor.shape[1]]).flatten(); - -/* set up variables with initial guess */ -var x_ = xs.mean().dataSync()[0]; -var y_ = ys.mean().dataSync()[0]; -var a = tf.variable(tf.scalar(y_/x_)); -var b = tf.variable(tf.scalar(Math.random())); - -/* linear predictor */ -function predict(x) { return tf.tidy(function() { return a.mul(x).add(b); }); } -/* mean square scoring */ -function loss(yh, y) { return yh.sub(y).square().mean(); } - -/* train */ -for(var j = 0; j < 5; ++j) { - var learning_rate = 0.0001 /(j+1), iterations = 1000; - var optimizer = tf.train.sgd(learning_rate); - - for(var i = 0; i < iterations; ++i) optimizer.minimize(function() { - var pred = predict(xs); - var L = loss(pred, ys); - return L - }); - - /* compute the coefficient */ - var m = a.dataSync()[0], b_ = b.dataSync()[0]; - console.log(m, b_, "TF " + iterations * (j+1)); -} - -/* export data to aoa */ -var yh = predict(xs); -var tfdata = tf.stack([xs, ys, yh]).transpose(); -var shape = tfdata.shape; -var tfarr = tfdata.dataSync(); -var tfaoa = []; -for(j = 0; j < shape[0]; ++j) { - tfaoa[j] = []; - for(i = 0; i < shape[1]; ++i) tfaoa[j][i] = tfarr[j * shape[1] + i]; -} - -/* add headers and export */ -tfaoa.unshift(["x", "y", "pred"]); -var new_ws = XLSX.utils.aoa_to_sheet(tfaoa); -var new_wb = XLSX.utils.book_new(); -XLSX.utils.book_append_sheet(new_wb, new_ws, "Sheet1"); -XLSX.writeFile(new_wb, "tfjs.xls"); diff --git a/modules/45_rtf.js b/modules/45_rtf.js new file mode 100644 index 0000000..dd4b42f --- /dev/null +++ b/modules/45_rtf.js @@ -0,0 +1,97 @@ +function rtf_to_sheet(d, opts) { + switch (opts.type) { + case "base64": + return rtf_to_sheet_str(Base64_decode(d), opts); + case "binary": + return rtf_to_sheet_str(d, opts); + case "buffer": + return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts); + case "array": + return rtf_to_sheet_str(cc2str(d), opts); + } + throw new Error("Unrecognized type " + opts.type); +} +function rtf_to_sheet_str(str, opts) { + var o = opts || {}; + var ws = o.dense ? [] : {}; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if (!rows) + throw new Error("RTF missing table"); + var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } }; + rows.forEach(function(rowtf, R) { + if (Array.isArray(ws)) + ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while ((res = rtfre.exec(rowtf)) != null) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if (data.charCodeAt(0) == 32) + data = data.slice(1); + if (data.length) + payload.push(data); + switch (res[0]) { + case "\\cell": + ++C; + if (payload.length) { + var cell = { v: payload.join(""), t: "s" }; + if (cell.v == "TRUE" || cell.v == "FALSE") { + cell.v = cell.v == "TRUE"; + cell.t = "b"; + } else if (!isNaN(fuzzynum(cell.v))) { + cell.t = "n"; + if (o.cellText !== false) + cell.w = cell.v; + cell.v = fuzzynum(cell.v); + } + if (Array.isArray(ws)) + ws[R][C] = cell; + else + ws[encode_cell({ r: R, c: C })] = cell; + } + payload = []; + break; + case "\\par": + payload.push("\n"); + break; + } + last_index = rtfre.lastIndex; + } + if (C > range.e.c) + range.e.c = C; + }); + ws["!ref"] = encode_range(range); + return ws; +} +function rtf_to_workbook(d, opts) { + var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts); + wb.bookType = "rtf"; + return wb; +} +function sheet_to_rtf(ws, opts) { + var o = ["{\\rtf1\\ansi"]; + if (!ws["!ref"]) + return o[0] + "}"; + var r = safe_decode_range(ws["!ref"]), cell; + var dense = Array.isArray(ws); + for (var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for (var C = r.s.c; C <= r.e.c; ++C) + o.push("\\cellx" + (C + 1)); + o.push("\\pard\\intbl"); + for (C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({ r: R, c: C }); + cell = dense ? (ws[R] || [])[C] : ws[coord]; + if (!cell || cell.v == null && (!cell.f || cell.F)) { + o.push(" \\cell"); + continue; + } + o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; +} diff --git a/modules/45_rtf.ts b/modules/45_rtf.ts new file mode 100644 index 0000000..8b206a5 --- /dev/null +++ b/modules/45_rtf.ts @@ -0,0 +1,98 @@ +import { WorkBook, WorkSheet, Range, CellObject } from '../'; +import type { utils } from "../"; + +declare var encode_cell: typeof utils.encode_cell; +declare var encode_range: typeof utils.encode_range; +declare var format_cell: typeof utils.format_cell; +declare var safe_decode_range: typeof utils.decode_range; +declare function sheet_to_workbook(s: WorkSheet, o?: any): WorkBook; +declare function cc2str(d: any): string; +declare function a2s(a: any): string; +declare var has_buf: boolean; +declare function Base64_decode(s: string): string; +declare function fuzzynum(s: string): number; + +function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ { + switch(opts.type) { + case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts); + case 'binary': return rtf_to_sheet_str(d, opts); + case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); + case 'array': return rtf_to_sheet_str(cc2str(d), opts); + } + throw new Error("Unrecognized type " + opts.type); +} + +/* TODO: this is a stub */ +function rtf_to_sheet_str(str: string, opts)/*:Worksheet*/ { + var o = opts || {}; + // ESBuild issue 2375 + var ws: WorkSheet = o.dense ? [] : ({}/*:any*/); + + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if(!rows) throw new Error("RTF missing table"); + var range: Range = {s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}; + rows.forEach(function(rowtf, R) { + if(Array.isArray(ws)) ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload: string[] = []; + while((res = rtfre.exec(rowtf)) != null) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if(data.charCodeAt(0) == 0x20) data = data.slice(1); + if(data.length) payload.push(data); + switch(res[0]) { + case "\\cell": + ++C; + if(payload.length) { + // TODO: value parsing, including codepage adjustments + var cell: CellObject = {v: payload.join(""), t:"s"}; + if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; } + else if(!isNaN(fuzzynum(cell.v as string))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v as string; cell.v = fuzzynum(cell.v as string); } + + if(Array.isArray(ws)) ws[R][C] = cell; + else ws[encode_cell({r:R, c:C})] = cell; + } + payload = []; + break; + case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" + payload.push("\n"); + break; + } + last_index = rtfre.lastIndex; + } + if(C > range.e.c) range.e.c = C; + }); + ws['!ref'] = encode_range(range); + return ws; +} + +function rtf_to_workbook(d/*:RawData*/, opts): WorkBook { + var wb: WorkBook = sheet_to_workbook(rtf_to_sheet(d, opts), opts); + wb.bookType = "rtf"; + return wb; +} + +/* TODO: this is a stub */ +function sheet_to_rtf(ws: WorkSheet, opts): string { + var o: string[] = ["{\\rtf1\\ansi"]; + if(!ws["!ref"]) return o[0] + "}"; + var r = safe_decode_range(ws['!ref']), cell: CellObject; + var dense = Array.isArray(ws); + for(var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); + o.push("\\pard\\intbl"); + for(C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({r:R,c:C}); + cell = dense ? (ws[R]||[])[C]: ws[coord]; + if(!cell || cell.v == null && (!cell.f || cell.F)) { o.push(" \\cell"); continue; } + o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; +} + diff --git a/modules/83_numbers.js b/modules/83_numbers.js index c7594f6..e43e8e4 100644 --- a/modules/83_numbers.js +++ b/modules/83_numbers.js @@ -396,7 +396,7 @@ function parse_old_storage(buf, sst, rsst, v) { var ret; switch (buf[2]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: ieee }; break; @@ -456,7 +456,7 @@ function parse_new_storage(buf, sst, rsst) { var ret; switch (buf[1]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: d128 }; break; @@ -761,6 +761,7 @@ function parse_TN_DocumentArchive(M, root) { }); if (out.SheetNames.length == 0) throw new Error("Empty NUMBERS file"); + out.bookType = "numbers"; return out; } function parse_numbers_iwa(cfb) { @@ -961,6 +962,8 @@ function write_numbers_iwa(wb, opts) { throw new Error("Too many messages"); } var entry = CFB.find(cfb, dependents[1].location); + if (!entry) + throw "Could not find ".concat(dependents[1].location, " in Numbers template"); var x = parse_iwa_file(decompress_iwa_file(entry.content)); var docroot; for (var xi = 0; xi < x.length; ++xi) { @@ -968,8 +971,12 @@ function write_numbers_iwa(wb, opts) { if (packet.id == 1) docroot = packet; } + if (docroot == null) + throw "Could not find message ".concat(1, " in Numbers template"); var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -985,6 +992,8 @@ function write_numbers_iwa(wb, opts) { entry.size = entry.content.length; sheetrootref = parse_TSP_Reference(sheetref[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -993,6 +1002,8 @@ function write_numbers_iwa(wb, opts) { } sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -1005,6 +1016,8 @@ function write_numbers_iwa(wb, opts) { pb[7][0].data = write_varint49(range.e.c + 1); var cruidsref = parse_TSP_Reference(pb[46][0].data); var oldbucket = CFB.find(cfb, dependents[cruidsref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { for (var j = 0; j < _x.length; ++j) { @@ -1047,6 +1060,8 @@ function write_numbers_iwa(wb, opts) { var row_headers = parse_shallow(store[1][0].data); var row_header_ref = parse_TSP_Reference(row_headers[2][0].data); oldbucket = CFB.find(cfb, dependents[row_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != row_header_ref) @@ -1065,6 +1080,8 @@ function write_numbers_iwa(wb, opts) { oldbucket.size = oldbucket.content.length; var col_header_ref = parse_TSP_Reference(store[2][0].data); oldbucket = CFB.find(cfb, dependents[col_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != col_header_ref) @@ -1109,6 +1126,8 @@ function write_numbers_iwa(wb, opts) { var sstref = parse_TSP_Reference(store[4][0].data); (function() { var sentry = CFB.find(cfb, dependents[sstref].location); + if (!sentry) + throw "Could not find ".concat(dependents[sstref].location, " in Numbers template"); var sx = parse_iwa_file(decompress_iwa_file(sentry.content)); var sstroot; for (var sxi = 0; sxi < sx.length; ++sxi) { @@ -1116,6 +1135,8 @@ function write_numbers_iwa(wb, opts) { if (packet2.id == sstref) sstroot = packet2; } + if (sstroot == null) + throw "Could not find message ".concat(sstref, " in Numbers template"); var sstdata = parse_shallow(sstroot.messages[0].data); { sstdata[3] = []; @@ -1141,6 +1162,8 @@ function write_numbers_iwa(wb, opts) { var tileref = parse_TSP_Reference(tl[2][0].data); (function() { var tentry = CFB.find(cfb, dependents[tileref].location); + if (!tentry) + throw "Could not find ".concat(dependents[tileref].location, " in Numbers template"); var tx = parse_iwa_file(decompress_iwa_file(tentry.content)); var tileroot; for (var sxi = 0; sxi < tx.length; ++sxi) { diff --git a/modules/83_numbers.ts b/modules/83_numbers.ts index 0b6c276..7af3190 100644 --- a/modules/83_numbers.ts +++ b/modules/83_numbers.ts @@ -184,9 +184,9 @@ interface IWAMessage { data: Uint8Array; } interface IWAArchiveInfo { - id?: number; + id: number; merge?: boolean; - messages?: IWAMessage[]; + messages: IWAMessage[]; } /** Extract all messages from a IWA file */ function parse_iwa_file(buf: Uint8Array): IWAArchiveInfo[] { @@ -248,7 +248,7 @@ function parse_snappy_chunk(type: number, buf: Uint8Array): Uint8Array { var ptr: Ptr = [0]; var usz = parse_varint49(buf, ptr); - var chunks = []; + var chunks: Uint8Array[] = []; while(ptr[0] < buf.length) { var tag = buf[ptr[0]] & 0x3; if(tag == 0) { @@ -295,7 +295,7 @@ function parse_snappy_chunk(type: number, buf: Uint8Array): Uint8Array { /** Decompress IWA file */ function decompress_iwa_file(buf: Uint8Array): Uint8Array { - var out = []; + var out: Uint8Array[] = []; var l = 0; while(l < buf.length) { var t = buf[l++]; @@ -336,7 +336,7 @@ function compress_iwa_file(buf: Uint8Array): Uint8Array { //< = []; (entries||[]).forEach(entry => { // .TST.TableDataList.ListEntry var le = parse_shallow(entry.data); @@ -505,7 +505,7 @@ interface TileRowInfo { /** Row Index */ R: number; /** Cell Storage */ - cells?: Uint8Array[]; + cells: Uint8Array[]; } /** Parse .TSP.TileRowInfo */ function parse_TST_TileRowInfo(u8: Uint8Array, type: TileStorageType): TileRowInfo { @@ -673,6 +673,7 @@ function parse_TN_DocumentArchive(M: MessageSpace, root: IWAMessage): WorkBook { }); }); if(out.SheetNames.length == 0) throw new Error("Empty NUMBERS file"); + out.bookType = "numbers"; return out; } @@ -694,7 +695,7 @@ function parse_numbers_iwa(cfb: CFB$Container): WorkBook { /* find document root */ if(M?.[1]?.[0]?.meta?.[1]?.[0].data && varint_to_i32(M[1][0].meta[1][0].data) == 10000) throw new Error("Pages documents are not supported"); - var docroot: IWAMessage = M?.[1]?.[0]?.meta?.[1]?.[0].data && varint_to_i32(M[1][0].meta[1][0].data) == 1 && M[1][0]; + var docroot: IWAMessage | false = M?.[1]?.[0]?.meta?.[1]?.[0].data && varint_to_i32(M[1][0].meta[1][0].data) == 1 && M[1][0]; if(!docroot) indices.forEach((idx) => { M[idx].forEach((iwam) => { var mtype = varint_to_i32(iwam.meta[1][0].data) >>> 0; @@ -789,7 +790,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { /* TODO: support multiple worksheets, larger ranges, more data types, etc */ var ws = wb.Sheets[wb.SheetNames[0]]; if(wb.SheetNames.length > 1) console.error("The Numbers writer currently writes only the first table"); - var range = decode_range(ws["!ref"]); + var range = decode_range(ws["!ref"] as string); range.s.r = range.s.c = 0; /* Actual NUMBERS 12.0 limit ALL1000000 */ @@ -853,16 +854,19 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { /* .TN.DocumentArchive */ var entry = CFB.find(cfb, dependents[1].location); + if(!entry) throw `Could not find ${dependents[1].location} in Numbers template`; var x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array)); - var docroot: IWAArchiveInfo; + var docroot!: IWAArchiveInfo; for(var xi = 0; xi < x.length; ++xi) { var packet = x[xi]; if(packet.id == 1) docroot = packet; } + if(docroot == null) throw `Could not find message ${1} in Numbers template`; /* .TN.SheetArchive */ var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if(!entry) throw `Could not find ${dependents[sheetrootref].location} in Numbers template`; x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array)); for(xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -880,6 +884,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { /* .TST.TableInfoArchive */ sheetrootref = parse_TSP_Reference(sheetref[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if(!entry) throw `Could not find ${dependents[sheetrootref].location} in Numbers template`; x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array)); for(xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -889,6 +894,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { /* .TST.TableModelArchive */ sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if(!entry) throw `Could not find ${dependents[sheetrootref].location} in Numbers template`; x = parse_iwa_file(decompress_iwa_file(entry.content as Uint8Array)); for(xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -903,6 +909,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { var cruidsref = parse_TSP_Reference(pb[46][0].data); var oldbucket = CFB.find(cfb, dependents[cruidsref].location); + if(!oldbucket) throw `Could not find ${dependents[cruidsref].location} in Numbers template`; var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content as Uint8Array)); { for(var j = 0; j < _x.length; ++j) { @@ -940,6 +947,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { var row_headers = parse_shallow(store[1][0].data); var row_header_ref = parse_TSP_Reference(row_headers[2][0].data); oldbucket = CFB.find(cfb, dependents[row_header_ref].location); + if(!oldbucket) throw `Could not find ${dependents[cruidsref].location} in Numbers template`; _x = parse_iwa_file(decompress_iwa_file(oldbucket.content as Uint8Array)); { if(_x[0].id != row_header_ref) throw "Bad HeaderStorageBucket"; @@ -956,6 +964,7 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { var col_header_ref = parse_TSP_Reference(store[2][0].data); oldbucket = CFB.find(cfb, dependents[col_header_ref].location); + if(!oldbucket) throw `Could not find ${dependents[cruidsref].location} in Numbers template`; _x = parse_iwa_file(decompress_iwa_file(oldbucket.content as Uint8Array)); { if(_x[0].id != col_header_ref) throw "Bad HeaderStorageBucket"; @@ -996,12 +1005,14 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { var sstref = parse_TSP_Reference(store[4][0].data); (() => { var sentry = CFB.find(cfb, dependents[sstref].location); + if(!sentry) throw `Could not find ${dependents[sstref].location} in Numbers template`; var sx = parse_iwa_file(decompress_iwa_file(sentry.content as Uint8Array)); - var sstroot: IWAArchiveInfo; + var sstroot!: IWAArchiveInfo; for(var sxi = 0; sxi < sx.length; ++sxi) { var packet = sx[sxi]; if(packet.id == sstref) sstroot = packet; } + if(sstroot == null) throw `Could not find message ${sstref} in Numbers template`; var sstdata = parse_shallow(sstroot.messages[0].data); { @@ -1028,8 +1039,9 @@ function write_numbers_iwa(wb: WorkBook, opts: any): CFB$Container { var tileref = parse_TSP_Reference(tl[2][0].data); (() => { var tentry = CFB.find(cfb, dependents[tileref].location); + if(!tentry) throw `Could not find ${dependents[tileref].location} in Numbers template`; var tx = parse_iwa_file(decompress_iwa_file(tentry.content as Uint8Array)); - var tileroot: IWAArchiveInfo; + var tileroot!: IWAArchiveInfo; for(var sxi = 0; sxi < tx.length; ++sxi) { var packet = tx[sxi]; if(packet.id == tileref) tileroot = packet; diff --git a/modules/Makefile b/modules/Makefile index c0e2052..210764d 100644 --- a/modules/Makefile +++ b/modules/Makefile @@ -3,7 +3,7 @@ LIBFILES=src/types.ts TSFILES=$(wildcard *.ts) ENTRIES=$(subst .ts,.js,$(TSFILES)) -BAREJS=04_base64.js 51_xlsxmeta.js 51_xlsbmeta.js 59_vba.js 64_ftab.js 83_numbers.js +BAREJS=04_base64.js 45_rtf.js 51_xlsxmeta.js 51_xlsbmeta.js 59_vba.js 64_ftab.js 83_numbers.js .PHONY: all all: $(ENTRIES) xlsx.zahl.js diff --git a/test.js b/test.js index 7b23b2a..a76eccf 100644 --- a/test.js +++ b/test.js @@ -1430,6 +1430,18 @@ describe('parse features', function() { "11111,1934-06-03,1934-06-03,1934-06-03" ].join("\n")); }); }); + + it('bookType metadata', function() { + [ + // TODO: keep in sync with BookType, support other formats + "xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers" + ].forEach(function(r) { + if(!XLSX_ZAHL) XLSX_ZAHL=require("./dist/xlsx.zahl"); + var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]); + var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1"); + var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL }); + assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r); + }); }); }); describe('write features', function() { diff --git a/test.mjs b/test.mjs index 862b3fa..b85edd5 100644 --- a/test.mjs +++ b/test.mjs @@ -1421,6 +1421,17 @@ describe('parse features', function() { "11111,1934-06-03,1934-06-03,1934-06-03" ].join("\n")); }); }); + + it('bookType metadata', function() { + [ + // TODO: keep in sync with BookType, support other formats + "xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers" + ].forEach(function(r) { + var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]); + var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1"); + var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL }); + assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r); + }); }); }); describe('write features', function() { diff --git a/test.mts b/test.mts index e9228ae..ee6731e 100644 --- a/test.mts +++ b/test.mts @@ -1386,6 +1386,17 @@ describe('parse features', function() { "11111,1934-06-03,1934-06-03,1934-06-03" ].join("\n")); }); }); + + it('bookType metadata', function() { + ([ + // TODO: keep in sync with BookType, support other formats + "xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers" + ] as X.BookType[]).forEach(function(r: X.BookType) { + var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]); + var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1"); + var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL }); + assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r); + }); }); }); describe('write features', function() { @@ -1754,6 +1765,33 @@ describe('roundtrip features', function() { } }); }); + it('should preserve date system', function() {([ + "biff5", "ods", "slk", "xls", "xlsb", "xlsx", "xml" + ] as X.BookType[]).forEach(function(ext) { + // TODO: check actual date codes and actual date values + var wb0 = X.read(fs.readFileSync("./test_files/1904/1900." + ext), {type: TYPE}); + assert.ok(!wb0.Workbook?.WBProps?.date1904); + var wb1 = X.read(X.write(wb0, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.ok(!wb1.Workbook?.WBProps?.date1904); + + var wb2 = X.utils.book_new(); X.utils.book_append_sheet(wb2, X.utils.aoa_to_sheet([[1]]), "Sheet1"); + wb2.Workbook = { WBProps: { date1904: false } }; + assert.ok(!wb2.Workbook?.WBProps?.date1904); + var wb3 = X.read(X.write(wb2, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.ok(!wb3.Workbook?.WBProps?.date1904); + + var wb4 = X.read(fs.readFileSync("./test_files/1904/1904." + ext), {type: TYPE}); + assert.ok(wb4.Workbook?.WBProps?.date1904); + var wb5 = X.read(X.write(wb4, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.ok(wb5.Workbook?.WBProps?.date1904); // xlsb, xml + + var wb6 = X.utils.book_new(); X.utils.book_append_sheet(wb6, X.utils.aoa_to_sheet([[1]]), "Sheet1"); + wb6.Workbook = { WBProps: { date1904: true } }; + assert.ok(wb6.Workbook?.WBProps?.date1904); + var wb7 = X.read(X.write(wb6, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.ok(wb7.Workbook?.WBProps?.date1904); + }); }); + }); //function password_file(x){return x.match(/^password.*\.xls$/); } diff --git a/test.ts b/test.ts index 9d2c8d8..ea02f33 100644 --- a/test.ts +++ b/test.ts @@ -1386,6 +1386,17 @@ Deno.test('parse features', async function(t) { "11111,1934-06-03,1934-06-03,1934-06-03" ].join("\n")); }); }); + + await t.step('bookType metadata', async function(t) { + ([ + // TODO: keep in sync with BookType, support other formats + "xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers" + ] as X.BookType[]).forEach(function(r: X.BookType) { + var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]); + var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1"); + var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL }); + assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r); + }); }); }); Deno.test('write features', async function(t) { @@ -1754,6 +1765,33 @@ Deno.test('roundtrip features', async function(t) { } }); }); + await t.step('should preserve date system', async function(t) {([ + "biff5", "ods", "slk", "xls", "xlsb", "xlsx", "xml" + ] as X.BookType[]).forEach(function(ext) { + // TODO: check actual date codes and actual date values + var wb0 = X.read(fs.readFileSync("./test_files/1904/1900." + ext), {type: TYPE}); + assert.assert(!wb0.Workbook?.WBProps?.date1904); + var wb1 = X.read(X.write(wb0, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(!wb1.Workbook?.WBProps?.date1904); + + var wb2 = X.utils.book_new(); X.utils.book_append_sheet(wb2, X.utils.aoa_to_sheet([[1]]), "Sheet1"); + wb2.Workbook = { WBProps: { date1904: false } }; + assert.assert(!wb2.Workbook?.WBProps?.date1904); + var wb3 = X.read(X.write(wb2, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(!wb3.Workbook?.WBProps?.date1904); + + var wb4 = X.read(fs.readFileSync("./test_files/1904/1904." + ext), {type: TYPE}); + assert.assert(wb4.Workbook?.WBProps?.date1904); + var wb5 = X.read(X.write(wb4, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(wb5.Workbook?.WBProps?.date1904); // xlsb, xml + + var wb6 = X.utils.book_new(); X.utils.book_append_sheet(wb6, X.utils.aoa_to_sheet([[1]]), "Sheet1"); + wb6.Workbook = { WBProps: { date1904: true } }; + assert.assert(wb6.Workbook?.WBProps?.date1904); + var wb7 = X.read(X.write(wb6, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(wb7.Workbook?.WBProps?.date1904); + }); }); + }); //function password_file(x){return x.match(/^password.*\.xls$/); } diff --git a/testnocp.ts b/testnocp.ts index aead2ab..5d207cc 100644 --- a/testnocp.ts +++ b/testnocp.ts @@ -1385,6 +1385,17 @@ Deno.test('parse features', async function(t) { "11111,1934-06-03,1934-06-03,1934-06-03" ].join("\n")); }); }); + + await t.step('bookType metadata', async function(t) { + ([ + // TODO: keep in sync with BookType, support other formats + "xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers" + ] as X.BookType[]).forEach(function(r: X.BookType) { + var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]); + var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1"); + var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL }); + assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r); + }); }); }); Deno.test('write features', async function(t) { @@ -1753,6 +1764,33 @@ Deno.test('roundtrip features', async function(t) { } }); }); + await t.step('should preserve date system', async function(t) {([ + "biff5", "ods", "slk", "xls", "xlsb", "xlsx", "xml" + ] as X.BookType[]).forEach(function(ext) { + // TODO: check actual date codes and actual date values + var wb0 = X.read(fs.readFileSync("./test_files/1904/1900." + ext), {type: TYPE}); + assert.assert(!wb0.Workbook?.WBProps?.date1904); + var wb1 = X.read(X.write(wb0, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(!wb1.Workbook?.WBProps?.date1904); + + var wb2 = X.utils.book_new(); X.utils.book_append_sheet(wb2, X.utils.aoa_to_sheet([[1]]), "Sheet1"); + wb2.Workbook = { WBProps: { date1904: false } }; + assert.assert(!wb2.Workbook?.WBProps?.date1904); + var wb3 = X.read(X.write(wb2, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(!wb3.Workbook?.WBProps?.date1904); + + var wb4 = X.read(fs.readFileSync("./test_files/1904/1904." + ext), {type: TYPE}); + assert.assert(wb4.Workbook?.WBProps?.date1904); + var wb5 = X.read(X.write(wb4, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(wb5.Workbook?.WBProps?.date1904); // xlsb, xml + + var wb6 = X.utils.book_new(); X.utils.book_append_sheet(wb6, X.utils.aoa_to_sheet([[1]]), "Sheet1"); + wb6.Workbook = { WBProps: { date1904: true } }; + assert.assert(wb6.Workbook?.WBProps?.date1904); + var wb7 = X.read(X.write(wb6, {type: TYPE, bookType: ext}), {type: TYPE}); + assert.assert(wb7.Workbook?.WBProps?.date1904); + }); }); + }); //function password_file(x){return x.match(/^password.*\.xls$/); } diff --git a/tests/core.js b/tests/core.js index e5a0895..4fefe5e 100644 --- a/tests/core.js +++ b/tests/core.js @@ -723,6 +723,7 @@ describe('output formats', function() { ["fods", true, true], ["csv", true, true], ["txt", true, true], + ["rtf", false, true], ["sylk", false, true], ["eth", false, true], ["html", true, true], @@ -1429,6 +1430,17 @@ describe('parse features', function() { "11111,1934-06-03,1934-06-03,1934-06-03" ].join("\n")); }); }); + + it('bookType metadata', function() { + [ + // TODO: keep in sync with BookType, support other formats + "xlsx"/*, "xlsm" */, "xlsb"/* xls / xla / biff# */, "xlml", "ods", "fods"/*, "csv", "txt", */, "sylk", "html", "dif", "rtf"/*, "prn", "eth"*/, "dbf", "numbers" + ].forEach(function(r) { + var ws = X.utils.aoa_to_sheet([ ["a", "b", "c"], [1, 2, 3] ]); + var wb = X.utils.book_new(); X.utils.book_append_sheet(wb, ws, "Sheet1"); + var data = X.write(wb, {type: TYPE, bookType: r, WTF: true, numbers:XLSX_ZAHL }); + assert.equal(X.read(data, {type: TYPE, WTF: true}).bookType, r); + }); }); }); describe('write features', function() { @@ -2531,6 +2543,23 @@ describe('js -> file -> js', function() { }); }); +describe('rtf', function() { + it('roundtrip should be idempotent', function() { + var ws = X.utils.aoa_to_sheet([ + [1,2,3], + [true, false, null, "sheetjs"], + ["foo", "bar", fixdate, "0.3"], + ["baz", null, "q\"ux"] + ]); + var wb1 = X.utils.book_new(); + X.utils.book_append_sheet(wb1, ws, "Sheet1"); + var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"}); + var wb2 = X.read(rtf1, {type: "string"}); + var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"}); + assert.equal(rtf1, rtf2); + }); +}); + describe('corner cases', function() { it('output functions', function() { var ws = X.utils.aoa_to_sheet([ diff --git a/types/index.d.ts b/types/index.d.ts index 2d37afb..ebbed34 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -285,6 +285,9 @@ export interface WorkBook { Workbook?: WBProps; vbaraw?: any; + + /** Original file type (when parsed with `read` or `readFile`) */ + bookType?: BookType; } export interface SheetProps { diff --git a/xlsx.flow.js b/xlsx.flow.js index 123ce0e..d8f8ebc 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -7845,7 +7845,11 @@ function dbf_to_sheet(buf, opts)/*:Worksheet*/ { } function dbf_to_workbook(buf, opts)/*:Workbook*/ { - try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); } + try { + var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts); + o.bookType = "dbf"; + return o; + } catch(e) { if(opts && opts.WTF) throw e; } return ({SheetNames:[],Sheets:{}}); } @@ -8156,6 +8160,7 @@ var SYLK = /*#__PURE__*/(function() { keys(ws).forEach(function(k) { o[k] = ws[k]; }); var outwb = sheet_to_workbook(o, opts); keys(wb).forEach(function(k) { outwb[k] = wb[k]; }); + outwb.bookType = "sylk"; return outwb; } @@ -8274,7 +8279,11 @@ var DIF = /*#__PURE__*/(function() { } function dif_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(dif_to_aoa(str, opts), opts); } - function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(dif_to_sheet(str, opts), opts); } + function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { + var o = sheet_to_workbook(dif_to_sheet(str, opts), opts); + o.bookType = "dif"; + return o; + } var sheet_to_dif = /*#__PURE__*/(function() { var push_field = function pf(o/*:Array*/, topic/*:string*/, v/*:number*/, n/*:number*/, s/*:string*/) { @@ -10322,83 +10331,102 @@ function parse_FilePass(blob, length/*:number*/, opts) { } -function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { - switch(opts.type) { - case 'base64': return rtf_to_book_str(Base64_decode(d), opts); - case 'binary': return rtf_to_book_str(d, opts); - case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); - case 'array': return rtf_to_book_str(cc2str(d), opts); - } - throw new Error("Unrecognized type " + opts.type); +function rtf_to_sheet(d, opts) { + switch (opts.type) { + case "base64": + return rtf_to_sheet_str(Base64_decode(d), opts); + case "binary": + return rtf_to_sheet_str(d, opts); + case "buffer": + return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts); + case "array": + return rtf_to_sheet_str(cc2str(d), opts); + } + throw new Error("Unrecognized type " + opts.type); } - -/* TODO: RTF technically can store multiple tables, even if Excel does not */ -function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ { - var o = opts || {}; - var sname = o.sheet || "Sheet1"; - var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); - var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} }; - wb.Sheets[sname] = ws; - - var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); - if(!rows.length) throw new Error("RTF missing table"); - var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); - rows.forEach(function(rowtf, R) { - if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\[\w\-]+\b/g; - var last_index = 0; - var res; - var C = -1; - var payload = []; - while((res = rtfre.exec(rowtf))) { - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data.charCodeAt(0) == 0x20) data = data.slice(1); - if(data.length) payload.push(data); - switch(res[0]) { - case "\\cell": - ++C; - if(payload.length) { - // TODO: value parsing, including codepage adjustments - var cell = {v: payload.join(""), t:"s"}; - if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; } - else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); } - - if(Array.isArray(ws)) ws[R][C] = cell; - else ws[encode_cell({r:R, c:C})] = cell; - } - payload = []; - break; - case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" - payload.push("\n"); - break; - } - last_index = rtfre.lastIndex; - } - if(C > range.e.c) range.e.c = C; - }); - ws['!ref'] = encode_range(range); - return wb; +function rtf_to_sheet_str(str, opts) { + var o = opts || {}; + var ws = o.dense ? [] : {}; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if (!rows) + throw new Error("RTF missing table"); + var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } }; + rows.forEach(function(rowtf, R) { + if (Array.isArray(ws)) + ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while ((res = rtfre.exec(rowtf)) != null) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if (data.charCodeAt(0) == 32) + data = data.slice(1); + if (data.length) + payload.push(data); + switch (res[0]) { + case "\\cell": + ++C; + if (payload.length) { + var cell = { v: payload.join(""), t: "s" }; + if (cell.v == "TRUE" || cell.v == "FALSE") { + cell.v = cell.v == "TRUE"; + cell.t = "b"; + } else if (!isNaN(fuzzynum(cell.v))) { + cell.t = "n"; + if (o.cellText !== false) + cell.w = cell.v; + cell.v = fuzzynum(cell.v); + } + if (Array.isArray(ws)) + ws[R][C] = cell; + else + ws[encode_cell({ r: R, c: C })] = cell; + } + payload = []; + break; + case "\\par": + payload.push("\n"); + break; + } + last_index = rtfre.lastIndex; + } + if (C > range.e.c) + range.e.c = C; + }); + ws["!ref"] = encode_range(range); + return ws; } - -/* TODO: standardize sheet names as titles for tables */ -function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { - var o = ["{\\rtf1\\ansi"]; - var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; - var dense = Array.isArray(ws); - for(var R = r.s.r; R <= r.e.r; ++R) { - o.push("\\trowd\\trautofit1"); - for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); - o.push("\\pard\\intbl"); - for(C = r.s.c; C <= r.e.c; ++C) { - var coord = encode_cell({r:R,c:C}); - cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); - o.push("\\cell"); - } - o.push("\\pard\\intbl\\row"); - } - return o.join("") + "}"; +function rtf_to_workbook(d, opts) { + var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts); + wb.bookType = "rtf"; + return wb; +} +function sheet_to_rtf(ws, opts) { + var o = ["{\\rtf1\\ansi"]; + if (!ws["!ref"]) + return o[0] + "}"; + var r = safe_decode_range(ws["!ref"]), cell; + var dense = Array.isArray(ws); + for (var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for (var C = r.s.c; C <= r.e.c; ++C) + o.push("\\cellx" + (C + 1)); + o.push("\\pard\\intbl"); + for (C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({ r: R, c: C }); + cell = dense ? (ws[R] || [])[C] : ws[coord]; + if (!cell || cell.v == null && (!cell.f || cell.F)) { + o.push(" \\cell"); + continue; + } + o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; } function hex2RGB(h) { var o = h.slice(h[0]==="#"?1:0).slice(0,6); @@ -18728,6 +18756,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ { out.SSF = dup(table_fmt); out.Props = Props; out.Custprops = Custprops; + out.bookType = "xlml"; return out; } @@ -21619,9 +21648,14 @@ var HTML_END = ''; function html_to_workbook(str/*:string*/, opts)/*:Workbook*/ { var mtch = str.match(/[\s\S]*?<\/table>/gi); if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find
"); - if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + if(mtch.length == 1) { + var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + w.bookType = "html"; + return w; + } var wb = book_new(); mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); }); + wb.bookType = "html"; return wb; } @@ -21737,7 +21771,9 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ { - return sheet_to_workbook(parse_dom_table(table, opts), opts); + var o = sheet_to_workbook(parse_dom_table(table, opts), opts); + //o.bookType = "dom"; // TODO: define a type for this + return o; } function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ { @@ -22528,10 +22564,13 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ { if(!content) throw new Error("Missing content.xml in ODS / UOF file"); var wb = parse_content_xml(utf8read(content), opts, Styles); if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml')); + wb.bookType = "ods"; return wb; } function parse_fods(data/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ { - return parse_content_xml(data, opts); + var wb = parse_content_xml(data, opts); + wb.bookType = "fods"; + return wb; } /* OpenDocument */ @@ -23413,7 +23452,7 @@ function parse_old_storage(buf, sst, rsst, v) { var ret; switch (buf[2]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: ieee }; break; @@ -23473,7 +23512,7 @@ function parse_new_storage(buf, sst, rsst) { var ret; switch (buf[1]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: d128 }; break; @@ -23778,6 +23817,7 @@ function parse_TN_DocumentArchive(M, root) { }); if (out.SheetNames.length == 0) throw new Error("Empty NUMBERS file"); + out.bookType = "numbers"; return out; } function parse_numbers_iwa(cfb) { @@ -23978,6 +24018,8 @@ function write_numbers_iwa(wb, opts) { throw new Error("Too many messages"); } var entry = CFB.find(cfb, dependents[1].location); + if (!entry) + throw "Could not find ".concat(dependents[1].location, " in Numbers template"); var x = parse_iwa_file(decompress_iwa_file(entry.content)); var docroot; for (var xi = 0; xi < x.length; ++xi) { @@ -23985,8 +24027,12 @@ function write_numbers_iwa(wb, opts) { if (packet.id == 1) docroot = packet; } + if (docroot == null) + throw "Could not find message ".concat(1, " in Numbers template"); var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -24002,6 +24048,8 @@ function write_numbers_iwa(wb, opts) { entry.size = entry.content.length; sheetrootref = parse_TSP_Reference(sheetref[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -24010,6 +24058,8 @@ function write_numbers_iwa(wb, opts) { } sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -24022,6 +24072,8 @@ function write_numbers_iwa(wb, opts) { pb[7][0].data = write_varint49(range.e.c + 1); var cruidsref = parse_TSP_Reference(pb[46][0].data); var oldbucket = CFB.find(cfb, dependents[cruidsref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { for (var j = 0; j < _x.length; ++j) { @@ -24064,6 +24116,8 @@ function write_numbers_iwa(wb, opts) { var row_headers = parse_shallow(store[1][0].data); var row_header_ref = parse_TSP_Reference(row_headers[2][0].data); oldbucket = CFB.find(cfb, dependents[row_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != row_header_ref) @@ -24082,6 +24136,8 @@ function write_numbers_iwa(wb, opts) { oldbucket.size = oldbucket.content.length; var col_header_ref = parse_TSP_Reference(store[2][0].data); oldbucket = CFB.find(cfb, dependents[col_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != col_header_ref) @@ -24126,6 +24182,8 @@ function write_numbers_iwa(wb, opts) { var sstref = parse_TSP_Reference(store[4][0].data); (function() { var sentry = CFB.find(cfb, dependents[sstref].location); + if (!sentry) + throw "Could not find ".concat(dependents[sstref].location, " in Numbers template"); var sx = parse_iwa_file(decompress_iwa_file(sentry.content)); var sstroot; for (var sxi = 0; sxi < sx.length; ++sxi) { @@ -24133,6 +24191,8 @@ function write_numbers_iwa(wb, opts) { if (packet2.id == sstref) sstroot = packet2; } + if (sstroot == null) + throw "Could not find message ".concat(sstref, " in Numbers template"); var sstdata = parse_shallow(sstroot.messages[0].data); { sstdata[3] = []; @@ -24158,6 +24218,8 @@ function write_numbers_iwa(wb, opts) { var tileref = parse_TSP_Reference(tl[2][0].data); (function() { var tentry = CFB.find(cfb, dependents[tileref].location); + if (!tentry) + throw "Could not find ".concat(dependents[tileref].location, " in Numbers template"); var tx = parse_iwa_file(decompress_iwa_file(tentry.content)); var tileroot; for (var sxi = 0; sxi < tx.length; ++sxi) { @@ -24496,6 +24558,8 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ { if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true); else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true); } + // TODO: pass back content types metdata for xlsm/xlsx resolution + out.bookType = xlsb ? "xlsb" : "xlsx"; return out; } diff --git a/xlsx.js b/xlsx.js index c530dbf..0815982 100644 --- a/xlsx.js +++ b/xlsx.js @@ -7755,7 +7755,11 @@ function dbf_to_sheet(buf, opts) { } function dbf_to_workbook(buf, opts) { - try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); } + try { + var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts); + o.bookType = "dbf"; + return o; + } catch(e) { if(opts && opts.WTF) throw e; } return ({SheetNames:[],Sheets:{}}); } @@ -8066,6 +8070,7 @@ var SYLK = (function() { keys(ws).forEach(function(k) { o[k] = ws[k]; }); var outwb = sheet_to_workbook(o, opts); keys(wb).forEach(function(k) { outwb[k] = wb[k]; }); + outwb.bookType = "sylk"; return outwb; } @@ -8184,7 +8189,11 @@ var DIF = (function() { } function dif_to_sheet(str, opts) { return aoa_to_sheet(dif_to_aoa(str, opts), opts); } - function dif_to_workbook(str, opts) { return sheet_to_workbook(dif_to_sheet(str, opts), opts); } + function dif_to_workbook(str, opts) { + var o = sheet_to_workbook(dif_to_sheet(str, opts), opts); + o.bookType = "dif"; + return o; + } var sheet_to_dif = (function() { var push_field = function pf(o, topic, v, n, s) { @@ -10231,88 +10240,103 @@ function parse_FilePass(blob, length, opts) { } -var RTF = (function() { - function rtf_to_sheet(d, opts) { - switch(opts.type) { - case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts); - case 'binary': return rtf_to_sheet_str(d, opts); - case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); - case 'array': return rtf_to_sheet_str(cc2str(d), opts); - } - throw new Error("Unrecognized type " + opts.type); - } - - /* TODO: this is a stub */ - function rtf_to_sheet_str(str, opts) { - var o = opts || {}; - var ws = o.dense ? ([]) : ({}); - - var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); - if(!rows.length) throw new Error("RTF missing table"); - var range = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}); - rows.forEach(function(rowtf, R) { - if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\[\w\-]+\b/g; - var last_index = 0; - var res; - var C = -1; - var payload = []; - while((res = rtfre.exec(rowtf))) { - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data.charCodeAt(0) == 0x20) data = data.slice(1); - if(data.length) payload.push(data); - switch(res[0]) { - case "\\cell": - ++C; - if(payload.length) { - // TODO: value parsing, including codepage adjustments - var cell = {v: payload.join(""), t:"s"}; - if(Array.isArray(ws)) ws[R][C] = cell; - else ws[encode_cell({r:R, c:C})] = cell; - } - payload = []; - break; - case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" - payload.push("\n"); - break; - } - last_index = rtfre.lastIndex; - } - if(C > range.e.c) range.e.c = C; - }); - ws['!ref'] = encode_range(range); - return ws; - } - - function rtf_to_workbook(d, opts) { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); } - - /* TODO: this is a stub */ - function sheet_to_rtf(ws) { - var o = ["{\\rtf1\\ansi"]; - var r = safe_decode_range(ws['!ref']), cell; - var dense = Array.isArray(ws); - for(var R = r.s.r; R <= r.e.r; ++R) { - o.push("\\trowd\\trautofit1"); - for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); - o.push("\\pard\\intbl"); - for(C = r.s.c; C <= r.e.c; ++C) { - var coord = encode_cell({r:R,c:C}); - cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); - o.push("\\cell"); - } - o.push("\\pard\\intbl\\row"); - } - return o.join("") + "}"; - } - - return { - to_workbook: rtf_to_workbook, - to_sheet: rtf_to_sheet, - from_sheet: sheet_to_rtf - }; -})(); +function rtf_to_sheet(d, opts) { + switch (opts.type) { + case "base64": + return rtf_to_sheet_str(Base64_decode(d), opts); + case "binary": + return rtf_to_sheet_str(d, opts); + case "buffer": + return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts); + case "array": + return rtf_to_sheet_str(cc2str(d), opts); + } + throw new Error("Unrecognized type " + opts.type); +} +function rtf_to_sheet_str(str, opts) { + var o = opts || {}; + var ws = o.dense ? [] : {}; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if (!rows) + throw new Error("RTF missing table"); + var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } }; + rows.forEach(function(rowtf, R) { + if (Array.isArray(ws)) + ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while ((res = rtfre.exec(rowtf)) != null) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if (data.charCodeAt(0) == 32) + data = data.slice(1); + if (data.length) + payload.push(data); + switch (res[0]) { + case "\\cell": + ++C; + if (payload.length) { + var cell = { v: payload.join(""), t: "s" }; + if (cell.v == "TRUE" || cell.v == "FALSE") { + cell.v = cell.v == "TRUE"; + cell.t = "b"; + } else if (!isNaN(fuzzynum(cell.v))) { + cell.t = "n"; + if (o.cellText !== false) + cell.w = cell.v; + cell.v = fuzzynum(cell.v); + } + if (Array.isArray(ws)) + ws[R][C] = cell; + else + ws[encode_cell({ r: R, c: C })] = cell; + } + payload = []; + break; + case "\\par": + payload.push("\n"); + break; + } + last_index = rtfre.lastIndex; + } + if (C > range.e.c) + range.e.c = C; + }); + ws["!ref"] = encode_range(range); + return ws; +} +function rtf_to_workbook(d, opts) { + var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts); + wb.bookType = "rtf"; + return wb; +} +function sheet_to_rtf(ws, opts) { + var o = ["{\\rtf1\\ansi"]; + if (!ws["!ref"]) + return o[0] + "}"; + var r = safe_decode_range(ws["!ref"]), cell; + var dense = Array.isArray(ws); + for (var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for (var C = r.s.c; C <= r.e.c; ++C) + o.push("\\cellx" + (C + 1)); + o.push("\\pard\\intbl"); + for (C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({ r: R, c: C }); + cell = dense ? (ws[R] || [])[C] : ws[coord]; + if (!cell || cell.v == null && (!cell.f || cell.F)) { + o.push(" \\cell"); + continue; + } + o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; +} function hex2RGB(h) { var o = h.slice(h[0]==="#"?1:0).slice(0,6); return [parseInt(o.slice(0,2),16),parseInt(o.slice(2,4),16),parseInt(o.slice(4,6),16)]; @@ -18631,6 +18655,7 @@ Workbook.WBProps.date1904 = true; out.SSF = dup(table_fmt); out.Props = Props; out.Custprops = Custprops; + out.bookType = "xlml"; return out; } @@ -21513,9 +21538,14 @@ var HTML_END = ''; function html_to_workbook(str, opts) { var mtch = str.match(/[\s\S]*?<\/table>/gi); if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find
"); - if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + if(mtch.length == 1) { + var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + w.bookType = "html"; + return w; + } var wb = book_new(); mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); }); + wb.bookType = "html"; return wb; } @@ -21631,7 +21661,9 @@ function parse_dom_table(table, _opts) { } function table_to_book(table, opts) { - return sheet_to_workbook(parse_dom_table(table, opts), opts); + var o = sheet_to_workbook(parse_dom_table(table, opts), opts); + //o.bookType = "dom"; // TODO: define a type for this + return o; } function is_dom_element_hidden(element) { @@ -22422,10 +22454,13 @@ function parse_ods(zip, opts) { if(!content) throw new Error("Missing content.xml in ODS / UOF file"); var wb = parse_content_xml(utf8read(content), opts, Styles); if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml')); + wb.bookType = "ods"; return wb; } function parse_fods(data, opts) { - return parse_content_xml(data, opts); + var wb = parse_content_xml(data, opts); + wb.bookType = "fods"; + return wb; } /* OpenDocument */ @@ -23307,7 +23342,7 @@ function parse_old_storage(buf, sst, rsst, v) { var ret; switch (buf[2]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: ieee }; break; @@ -23367,7 +23402,7 @@ function parse_new_storage(buf, sst, rsst) { var ret; switch (buf[1]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: d128 }; break; @@ -23672,6 +23707,7 @@ function parse_TN_DocumentArchive(M, root) { }); if (out.SheetNames.length == 0) throw new Error("Empty NUMBERS file"); + out.bookType = "numbers"; return out; } function parse_numbers_iwa(cfb) { @@ -23872,6 +23908,8 @@ function write_numbers_iwa(wb, opts) { throw new Error("Too many messages"); } var entry = CFB.find(cfb, dependents[1].location); + if (!entry) + throw "Could not find ".concat(dependents[1].location, " in Numbers template"); var x = parse_iwa_file(decompress_iwa_file(entry.content)); var docroot; for (var xi = 0; xi < x.length; ++xi) { @@ -23879,8 +23917,12 @@ function write_numbers_iwa(wb, opts) { if (packet.id == 1) docroot = packet; } + if (docroot == null) + throw "Could not find message ".concat(1, " in Numbers template"); var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -23896,6 +23938,8 @@ function write_numbers_iwa(wb, opts) { entry.size = entry.content.length; sheetrootref = parse_TSP_Reference(sheetref[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -23904,6 +23948,8 @@ function write_numbers_iwa(wb, opts) { } sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -23916,6 +23962,8 @@ function write_numbers_iwa(wb, opts) { pb[7][0].data = write_varint49(range.e.c + 1); var cruidsref = parse_TSP_Reference(pb[46][0].data); var oldbucket = CFB.find(cfb, dependents[cruidsref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { for (var j = 0; j < _x.length; ++j) { @@ -23958,6 +24006,8 @@ function write_numbers_iwa(wb, opts) { var row_headers = parse_shallow(store[1][0].data); var row_header_ref = parse_TSP_Reference(row_headers[2][0].data); oldbucket = CFB.find(cfb, dependents[row_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != row_header_ref) @@ -23976,6 +24026,8 @@ function write_numbers_iwa(wb, opts) { oldbucket.size = oldbucket.content.length; var col_header_ref = parse_TSP_Reference(store[2][0].data); oldbucket = CFB.find(cfb, dependents[col_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != col_header_ref) @@ -24020,6 +24072,8 @@ function write_numbers_iwa(wb, opts) { var sstref = parse_TSP_Reference(store[4][0].data); (function() { var sentry = CFB.find(cfb, dependents[sstref].location); + if (!sentry) + throw "Could not find ".concat(dependents[sstref].location, " in Numbers template"); var sx = parse_iwa_file(decompress_iwa_file(sentry.content)); var sstroot; for (var sxi = 0; sxi < sx.length; ++sxi) { @@ -24027,6 +24081,8 @@ function write_numbers_iwa(wb, opts) { if (packet2.id == sstref) sstroot = packet2; } + if (sstroot == null) + throw "Could not find message ".concat(sstref, " in Numbers template"); var sstdata = parse_shallow(sstroot.messages[0].data); { sstdata[3] = []; @@ -24052,6 +24108,8 @@ function write_numbers_iwa(wb, opts) { var tileref = parse_TSP_Reference(tl[2][0].data); (function() { var tentry = CFB.find(cfb, dependents[tileref].location); + if (!tentry) + throw "Could not find ".concat(dependents[tileref].location, " in Numbers template"); var tx = parse_iwa_file(decompress_iwa_file(tentry.content)); var tileroot; for (var sxi = 0; sxi < tx.length; ++sxi) { @@ -24390,6 +24448,8 @@ function parse_zip(zip, opts) { if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true); else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true); } + // TODO: pass back content types metdata for xlsm/xlsx resolution + out.bookType = xlsb ? "xlsb" : "xlsx"; return out; } @@ -24831,7 +24891,7 @@ function readSync(data, opts) { } break; case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o); - case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break; + case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break; case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break; case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break; @@ -24991,7 +25051,7 @@ function writeSync(wb, opts) { case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); - case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); + case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o); case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'fods': return write_string_type(write_ods(wb, o), o); case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o); diff --git a/xlsx.mjs b/xlsx.mjs index a5bc4c4..ec3efcd 100644 --- a/xlsx.mjs +++ b/xlsx.mjs @@ -7840,7 +7840,11 @@ function dbf_to_sheet(buf, opts)/*:Worksheet*/ { } function dbf_to_workbook(buf, opts)/*:Workbook*/ { - try { return sheet_to_workbook(dbf_to_sheet(buf, opts), opts); } + try { + var o = sheet_to_workbook(dbf_to_sheet(buf, opts), opts); + o.bookType = "dbf"; + return o; + } catch(e) { if(opts && opts.WTF) throw e; } return ({SheetNames:[],Sheets:{}}); } @@ -8151,6 +8155,7 @@ var SYLK = /*#__PURE__*/(function() { keys(ws).forEach(function(k) { o[k] = ws[k]; }); var outwb = sheet_to_workbook(o, opts); keys(wb).forEach(function(k) { outwb[k] = wb[k]; }); + outwb.bookType = "sylk"; return outwb; } @@ -8269,7 +8274,11 @@ var DIF = /*#__PURE__*/(function() { } function dif_to_sheet(str/*:string*/, opts)/*:Worksheet*/ { return aoa_to_sheet(dif_to_aoa(str, opts), opts); } - function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { return sheet_to_workbook(dif_to_sheet(str, opts), opts); } + function dif_to_workbook(str/*:string*/, opts)/*:Workbook*/ { + var o = sheet_to_workbook(dif_to_sheet(str, opts), opts); + o.bookType = "dif"; + return o; + } var sheet_to_dif = /*#__PURE__*/(function() { var push_field = function pf(o/*:Array*/, topic/*:string*/, v/*:number*/, n/*:number*/, s/*:string*/) { @@ -10317,83 +10326,102 @@ function parse_FilePass(blob, length/*:number*/, opts) { } -function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { - switch(opts.type) { - case 'base64': return rtf_to_book_str(Base64_decode(d), opts); - case 'binary': return rtf_to_book_str(d, opts); - case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); - case 'array': return rtf_to_book_str(cc2str(d), opts); - } - throw new Error("Unrecognized type " + opts.type); +function rtf_to_sheet(d, opts) { + switch (opts.type) { + case "base64": + return rtf_to_sheet_str(Base64_decode(d), opts); + case "binary": + return rtf_to_sheet_str(d, opts); + case "buffer": + return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString("binary") : a2s(d), opts); + case "array": + return rtf_to_sheet_str(cc2str(d), opts); + } + throw new Error("Unrecognized type " + opts.type); } - -/* TODO: RTF technically can store multiple tables, even if Excel does not */ -function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ { - var o = opts || {}; - var sname = o.sheet || "Sheet1"; - var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); - var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} }; - wb.Sheets[sname] = ws; - - var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); - if(!rows.length) throw new Error("RTF missing table"); - var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); - rows.forEach(function(rowtf, R) { - if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\[\w\-]+\b/g; - var last_index = 0; - var res; - var C = -1; - var payload = []; - while((res = rtfre.exec(rowtf))) { - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data.charCodeAt(0) == 0x20) data = data.slice(1); - if(data.length) payload.push(data); - switch(res[0]) { - case "\\cell": - ++C; - if(payload.length) { - // TODO: value parsing, including codepage adjustments - var cell = {v: payload.join(""), t:"s"}; - if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; } - else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); } - - if(Array.isArray(ws)) ws[R][C] = cell; - else ws[encode_cell({r:R, c:C})] = cell; - } - payload = []; - break; - case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" - payload.push("\n"); - break; - } - last_index = rtfre.lastIndex; - } - if(C > range.e.c) range.e.c = C; - }); - ws['!ref'] = encode_range(range); - return wb; +function rtf_to_sheet_str(str, opts) { + var o = opts || {}; + var ws = o.dense ? [] : {}; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if (!rows) + throw new Error("RTF missing table"); + var range = { s: { c: 0, r: 0 }, e: { c: 0, r: rows.length - 1 } }; + rows.forEach(function(rowtf, R) { + if (Array.isArray(ws)) + ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while ((res = rtfre.exec(rowtf)) != null) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if (data.charCodeAt(0) == 32) + data = data.slice(1); + if (data.length) + payload.push(data); + switch (res[0]) { + case "\\cell": + ++C; + if (payload.length) { + var cell = { v: payload.join(""), t: "s" }; + if (cell.v == "TRUE" || cell.v == "FALSE") { + cell.v = cell.v == "TRUE"; + cell.t = "b"; + } else if (!isNaN(fuzzynum(cell.v))) { + cell.t = "n"; + if (o.cellText !== false) + cell.w = cell.v; + cell.v = fuzzynum(cell.v); + } + if (Array.isArray(ws)) + ws[R][C] = cell; + else + ws[encode_cell({ r: R, c: C })] = cell; + } + payload = []; + break; + case "\\par": + payload.push("\n"); + break; + } + last_index = rtfre.lastIndex; + } + if (C > range.e.c) + range.e.c = C; + }); + ws["!ref"] = encode_range(range); + return ws; } - -/* TODO: standardize sheet names as titles for tables */ -function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { - var o = ["{\\rtf1\\ansi"]; - var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; - var dense = Array.isArray(ws); - for(var R = r.s.r; R <= r.e.r; ++R) { - o.push("\\trowd\\trautofit1"); - for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); - o.push("\\pard\\intbl"); - for(C = r.s.c; C <= r.e.c; ++C) { - var coord = encode_cell({r:R,c:C}); - cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); - o.push("\\cell"); - } - o.push("\\pard\\intbl\\row"); - } - return o.join("") + "}"; +function rtf_to_workbook(d, opts) { + var wb = sheet_to_workbook(rtf_to_sheet(d, opts), opts); + wb.bookType = "rtf"; + return wb; +} +function sheet_to_rtf(ws, opts) { + var o = ["{\\rtf1\\ansi"]; + if (!ws["!ref"]) + return o[0] + "}"; + var r = safe_decode_range(ws["!ref"]), cell; + var dense = Array.isArray(ws); + for (var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for (var C = r.s.c; C <= r.e.c; ++C) + o.push("\\cellx" + (C + 1)); + o.push("\\pard\\intbl"); + for (C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({ r: R, c: C }); + cell = dense ? (ws[R] || [])[C] : ws[coord]; + if (!cell || cell.v == null && (!cell.f || cell.F)) { + o.push(" \\cell"); + continue; + } + o.push(" " + (cell.w || (format_cell(cell), cell.w) || "").replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; } function hex2RGB(h) { var o = h.slice(h[0]==="#"?1:0).slice(0,6); @@ -18723,6 +18751,7 @@ function parse_xlml_xml(d, _opts)/*:Workbook*/ { out.SSF = dup(table_fmt); out.Props = Props; out.Custprops = Custprops; + out.bookType = "xlml"; return out; } @@ -21614,9 +21643,14 @@ var HTML_END = ''; function html_to_workbook(str/*:string*/, opts)/*:Workbook*/ { var mtch = str.match(/[\s\S]*?<\/table>/gi); if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find
"); - if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + if(mtch.length == 1) { + var w = sheet_to_workbook(html_to_sheet(mtch[0], opts), opts); + w.bookType = "html"; + return w; + } var wb = book_new(); mtch.forEach(function(s, idx) { book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); }); + wb.bookType = "html"; return wb; } @@ -21732,7 +21766,9 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } function table_to_book(table/*:HTMLElement*/, opts/*:?any*/)/*:Workbook*/ { - return sheet_to_workbook(parse_dom_table(table, opts), opts); + var o = sheet_to_workbook(parse_dom_table(table, opts), opts); + //o.bookType = "dom"; // TODO: define a type for this + return o; } function is_dom_element_hidden(element/*:HTMLElement*/)/*:boolean*/ { @@ -22523,10 +22559,13 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/)/*:Workbook*/ { if(!content) throw new Error("Missing content.xml in ODS / UOF file"); var wb = parse_content_xml(utf8read(content), opts, Styles); if(safegetzipfile(zip, 'meta.xml')) wb.Props = parse_core_props(getzipdata(zip, 'meta.xml')); + wb.bookType = "ods"; return wb; } function parse_fods(data/*:string*/, opts/*:?ParseOpts*/)/*:Workbook*/ { - return parse_content_xml(data, opts); + var wb = parse_content_xml(data, opts); + wb.bookType = "fods"; + return wb; } /* OpenDocument */ @@ -23408,7 +23447,7 @@ function parse_old_storage(buf, sst, rsst, v) { var ret; switch (buf[2]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: ieee }; break; @@ -23468,7 +23507,7 @@ function parse_new_storage(buf, sst, rsst) { var ret; switch (buf[1]) { case 0: - break; + return void 0; case 2: ret = { t: "n", v: d128 }; break; @@ -23773,6 +23812,7 @@ function parse_TN_DocumentArchive(M, root) { }); if (out.SheetNames.length == 0) throw new Error("Empty NUMBERS file"); + out.bookType = "numbers"; return out; } function parse_numbers_iwa(cfb) { @@ -23973,6 +24013,8 @@ function write_numbers_iwa(wb, opts) { throw new Error("Too many messages"); } var entry = CFB.find(cfb, dependents[1].location); + if (!entry) + throw "Could not find ".concat(dependents[1].location, " in Numbers template"); var x = parse_iwa_file(decompress_iwa_file(entry.content)); var docroot; for (var xi = 0; xi < x.length; ++xi) { @@ -23980,8 +24022,12 @@ function write_numbers_iwa(wb, opts) { if (packet.id == 1) docroot = packet; } + if (docroot == null) + throw "Could not find message ".concat(1, " in Numbers template"); var sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[1][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -23997,6 +24043,8 @@ function write_numbers_iwa(wb, opts) { entry.size = entry.content.length; sheetrootref = parse_TSP_Reference(sheetref[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -24005,6 +24053,8 @@ function write_numbers_iwa(wb, opts) { } sheetrootref = parse_TSP_Reference(parse_shallow(docroot.messages[0].data)[2][0].data); entry = CFB.find(cfb, dependents[sheetrootref].location); + if (!entry) + throw "Could not find ".concat(dependents[sheetrootref].location, " in Numbers template"); x = parse_iwa_file(decompress_iwa_file(entry.content)); for (xi = 0; xi < x.length; ++xi) { packet = x[xi]; @@ -24017,6 +24067,8 @@ function write_numbers_iwa(wb, opts) { pb[7][0].data = write_varint49(range.e.c + 1); var cruidsref = parse_TSP_Reference(pb[46][0].data); var oldbucket = CFB.find(cfb, dependents[cruidsref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); var _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { for (var j = 0; j < _x.length; ++j) { @@ -24059,6 +24111,8 @@ function write_numbers_iwa(wb, opts) { var row_headers = parse_shallow(store[1][0].data); var row_header_ref = parse_TSP_Reference(row_headers[2][0].data); oldbucket = CFB.find(cfb, dependents[row_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != row_header_ref) @@ -24077,6 +24131,8 @@ function write_numbers_iwa(wb, opts) { oldbucket.size = oldbucket.content.length; var col_header_ref = parse_TSP_Reference(store[2][0].data); oldbucket = CFB.find(cfb, dependents[col_header_ref].location); + if (!oldbucket) + throw "Could not find ".concat(dependents[cruidsref].location, " in Numbers template"); _x = parse_iwa_file(decompress_iwa_file(oldbucket.content)); { if (_x[0].id != col_header_ref) @@ -24121,6 +24177,8 @@ function write_numbers_iwa(wb, opts) { var sstref = parse_TSP_Reference(store[4][0].data); (function() { var sentry = CFB.find(cfb, dependents[sstref].location); + if (!sentry) + throw "Could not find ".concat(dependents[sstref].location, " in Numbers template"); var sx = parse_iwa_file(decompress_iwa_file(sentry.content)); var sstroot; for (var sxi = 0; sxi < sx.length; ++sxi) { @@ -24128,6 +24186,8 @@ function write_numbers_iwa(wb, opts) { if (packet2.id == sstref) sstroot = packet2; } + if (sstroot == null) + throw "Could not find message ".concat(sstref, " in Numbers template"); var sstdata = parse_shallow(sstroot.messages[0].data); { sstdata[3] = []; @@ -24153,6 +24213,8 @@ function write_numbers_iwa(wb, opts) { var tileref = parse_TSP_Reference(tl[2][0].data); (function() { var tentry = CFB.find(cfb, dependents[tileref].location); + if (!tentry) + throw "Could not find ".concat(dependents[tileref].location, " in Numbers template"); var tx = parse_iwa_file(decompress_iwa_file(tentry.content)); var tileroot; for (var sxi = 0; sxi < tx.length; ++sxi) { @@ -24491,6 +24553,8 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ { if(dir.vba.length > 0) out.vbaraw = getzipdata(zip,strip_front_slash(dir.vba[0]),true); else if(dir.defaults && dir.defaults.bin === CT_VBA) out.vbaraw = getzipdata(zip, 'xl/vbaProject.bin',true); } + // TODO: pass back content types metdata for xlsm/xlsx resolution + out.bookType = xlsb ? "xlsb" : "xlsx"; return out; }