parse number values from RTF cells

This commit is contained in:
SheetJS 2022-07-07 02:30:44 -04:00
parent efa36be102
commit 61262617ec
22 changed files with 346 additions and 429 deletions

@ -165,10 +165,15 @@ $(TESTESMFMT): test-esm_%:
FMTS=$* make test-esm
TESTDENOFMT=$(patsubst %,test-deno_%,$(FMT))
.PHONY: $(TESTESMFMT)
.PHONY: $(TESTDENOFMT)
$(TESTDENOFMT): test-deno_%:
FMTS=$* make test-deno
TESTDENOCPFMT=$(patsubst %,test-denocp_%,$(FMT))
.PHONY: $(TESTDENOCPFMT)
$(TESTDENOCPFMT): test-denocp_%:
FMTS=$* make test-denocp
.PHONY: travis
travis: ## Run test suite with minimal output
mocha -R dot -t 30000

@ -1,82 +1,78 @@
var RTF = /*#__PURE__*/(function() {
function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ {
switch(opts.type) {
case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts);
case 'binary': return rtf_to_sheet_str(d, opts);
case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
switch(opts.type) {
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
case 'binary': return rtf_to_book_str(d, opts);
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_book_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: this is a stub */
function rtf_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
var o = opts || {};
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
/* TODO: RTF technically can store multiple tables, even if Excel does not */
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
var o = opts || {};
var sname = o.sheet || "Sheet1";
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
wb.Sheets[sname] = ws;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return ws;
}
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); }
/* TODO: this is a stub */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
last_index = rtfre.lastIndex;
}
return o.join("") + "}";
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return wb;
}
return {
to_workbook: rtf_to_workbook,
to_sheet: rtf_to_sheet,
from_sheet: sheet_to_rtf
};
})();
/* TODO: standardize sheet names as titles for tables */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}

@ -105,7 +105,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
}
break;
case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o);
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break;
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break;
case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break;
case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break;

@ -142,7 +142,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) {
case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'fods': return write_string_type(write_ods(wb, o), o);
case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o);

@ -17,37 +17,40 @@ can be installed with Bash on Windows or with `cygwin`.
### Included Demos
**Frameworks and APIs**
**JavaScript APIs**
- [`XMLHttpRequest and fetch`](xhr/)
- [`Clipboard Data`](https://docs.sheetjs.com/docs/getting-started/demos/clipboard)
- [`Typed Arrays and Math`](array/)
**Frameworks**
- [`angularjs`](angular/)
- [`angular and ionic`](angular2/)
- [`knockout`](knockout/)
- [`meteor`](meteor/)
- [`react, react-native, next`](react/)
- [`vue 2.x, weex, nuxt`](vue/)
- [`XMLHttpRequest and fetch`](xhr/)
- [`nodejs server`](server/)
- [`databases and key/value stores`](database/)
- [`typed arrays and math`](array/)
**Front-End UI Components**
- [`canvas-datagrid`](datagrid/)
- [`x-spreadsheet`](xspreadsheet/)
- [`react-data-grid`](react/modify/)
- [`vue3-table-light`](/vue/modify/)
- [`vue3-table-light`](vue/modify/)
**Platforms and Integrations**
- [`deno`](deno/)
- [`NodeJS Server-Side Processing`](server/)
- [`Deno`](deno/)
- [`electron application`](electron/)
- [`nw.js application`](nwjs/)
- [`NW.js`](nwjs/)
- [`Chrome / Chromium extensions`](chrome/)
- [`Google Sheets API`](https://docs.sheetjs.com/docs/getting-started/demos/gsheet)
- [`ExtendScript for Adobe Apps`](https://docs.sheetjs.com/docs/getting-started/demos/extendscript)
- [`NetSuite SuiteScript`](https://docs.sheetjs.com/docs/getting-started/demos/netsuite)
- [`SalesForce Lightning Web Components`](https://docs.sheetjs.com/docs/getting-started/demos/salesforce)
- [`Excel JavaScript API`](https://docs.sheetjs.com/docs/getting-started/demos/excel)
- [`Headless Browsers`](headless/)
- [`Headless Automation`](https://docs.sheetjs.com/docs/getting-started/demos/headless)
- [`Swift JSC and other engines`](altjs/)
- [`"serverless" functions`](function/)
- [`databases and key/value stores`](database/)
- [`internet explorer`](oldie/)
**Bundlers and Tooling**

@ -1,11 +0,0 @@
{
"env": { "node":true },
"parserOptions": {
"ecmaVersion": 8
},
"rules": {
"no-var": 0,
"semi": [ 2, "always" ]
}
}

@ -1 +0,0 @@
*.pdf

@ -1,52 +1,8 @@
# Headless Browsers
The library, eschewing unstable and nascent ECMAScript features, plays nicely
with most headless browsers. This demo shows a few common headless scenarios.
NodeJS does not ship with its own layout engine. For advanced HTML exports, a
headless browser is generally indistinguishable from a browser process.
## Chromium Automation with Puppeteer
[Puppeteer](https://pptr.dev/) enables headless Chromium automation.
[`html.js`](./html.js) shows a dedicated script for converting an HTML file to
XLSB using puppeteer. The first argument is the path to the HTML file. The
script writes to `output.xlsb`:
```bash
# read from test.html and write to output.xlsb
$ node html.js test.html
```
The script pulls up the webpage using headless Chromium and adds a script tag
reference to the standalone browser build. That will make the `XLSX` variable
available to future scripts added in the page! The browser context is not able
to save the file using `writeFile`, so the demo generates the XLSB spreadsheet
bytes with the `base64` type, sends the string back to the main process, and
uses `fs.writeFileSync` to write the file.
## WebKit Automation with PhantomJS
This was tested using [PhantomJS 2.1.1](https://phantomjs.org/download.html)
```bash
$ phantomjs phantomjs.js
```
The flow is similar to the Puppeteer flow (scrape table and generate workbook in
website context, copy string back, write string to file from main process).
The `binary` type generates strings that can be written in PhantomJS using the
`fs.write` method with mode `"wb"`.
## wkhtmltopdf
This was tested in wkhtmltopdf 0.12.4, installed using the official binaries:
```bash
$ wkhtmltopdf --javascript-delay 20000 http://oss.sheetjs.com/sheetjs/tests/ test.pdf
```
[The new demo](https://docs.sheetjs.com/docs/getting-started/demos/headless)
has a more focused table export example as well as a demo script for Chromium
automation with Puppeteer and multi-browser automation with Playwright.
[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-xlsx?pixel)](https://github.com/SheetJS/js-xlsx)

@ -1,51 +0,0 @@
#!/usr/bin/env node
/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */
const puppeteer = require("puppeteer");
const path = require("path");
const fs = require("fs");
/* inf is the path to the html file -> url is a file URL */
let inf = process.argv[2] || "test.html";
let htmlpath = path.join(__dirname, inf);
if(!fs.existsSync(htmlpath)) htmlpath = path.join(process.cwd(), inf);
if(!fs.existsSync(htmlpath)) htmlpath = path.resolve(inf);
if(!fs.existsSync(htmlpath)) { console.error(`Could not find a valid file for \`${inf}\``); process.exit(4); }
console.error(`Reading from ${htmlpath}`);
const url = `file://${htmlpath}`;
/* get the standalone build source (e.g. node_modules/xlsx/dist/xlsx.full.min.js) */
// const websrc = fs.readFileSync(require.resolve("xlsx/dist/xlsx.full.min.js"), "utf8");
const get_lib = (jspath) => fs.readFileSync(path.resolve(__dirname, jspath)).toString();
const websrc = get_lib("xlsx.full.min.js");
(async() => {
/* start browser and go to web page */
const browser = await puppeteer.launch();
const page = await browser.newPage();
page.on("console", msg => console.log("PAGE LOG:", msg.text()));
await page.setViewport({width: 1920, height: 1080});
await page.goto(url, {waitUntil: "networkidle2"});
/* inject library */
await page.addScriptTag({content: websrc});
/* this function `s5s` will be called by the script below, receiving the Base64-encoded file */
await page.exposeFunction("s5s", async(b64) => {
fs.writeFileSync("output.xlsb", b64, {encoding: "base64"});
});
/* generate XLSB file in webpage context and send back a Base64-encoded string */
await page.addScriptTag({content: `
/* call table_to_book on first table */
var wb = XLSX.utils.table_to_book(document.getElementsByTagName("TABLE")[0]);
/* generate XLSB file */
var b64 = XLSX.write(wb, {type: "base64", bookType: "xlsb"});
/* call "s5s" hook exposed from the node process */
window.s5s(b64);
`});
/* cleanup */
await browser.close();
})();

@ -1,35 +0,0 @@
/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */
/* eslint-env phantomjs */
var XLSX = require('xlsx');
var page = require('webpage').create();
page.onConsoleMessage = function(msg) { console.log(msg); };
/* this code will be run in the page */
var code = [ "function(){",
/* call table_to_book on first table */
"var wb = XLSX.utils.table_to_book(document.body.getElementsByTagName('table')[0]);",
/* generate XLSB file and return binary string */
"return XLSX.write(wb, {type: 'binary', bookType: 'xlsb'});",
"}" ].join("");
page.open('https://sheetjs.com/demos/table', function() {
console.log("Page Loaded");
/* Load the browser script from the UNPKG CDN */
page.includeJs("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js", function() {
/* Verify the page is loaded by logging the version number */
var version = "function(){ console.log('Library Version:' + window.XLSX.version); }";
page.evaluateJavaScript(version);
/* The code will return a binary string */
var bin = page.evaluateJavaScript(code);
var workbook = XLSX.read(bin, {type: "binary"});
console.log(XLSX.utils.sheet_to_csv(workbook.Sheets[workbook.SheetNames[0]]));
/* XLSX.writeFile will not work here -- have to write manually */
require("fs").write("phantomjs.xlsb", bin, "wb");
phantom.exit();
});
});

@ -1 +0,0 @@
../extendscript/sheetjs.xlsx

@ -1,35 +0,0 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8"/>
<title>SheetJS Table Export</title>
</head>
<body>
<table id="data-table">
<tr>
<td id="data-table-A1"><span contenteditable="true">SheetJS</span></td>
<td id="data-table-B1"><span contenteditable="true">Table</span></td>
<td id="data-table-C1"><span contenteditable="true">Export</span></td>
<td id="data-table-D1"><span contenteditable="true">Test</span></td>
</tr>
<tr>
<td id="data-table-A2"><span contenteditable="true">&#xBB5;&#xBA3;&#xB95;&#xBCD;&#xB95;&#xBAE;&#xBCD;</span></td>
<td id="data-table-B2"><span contenteditable="true">&#xE2A;&#xE27;&#xE31;&#xE2A;&#xE14;&#xE35;</span></td>
<td id="data-table-C2"><span contenteditable="true">&#x4F60;&#x597D;</span></td>
<td id="data-table-D2"><span contenteditable="true">&#xAC00;&#xC9C0;&#xB9C8;</span></td>
</tr>
<tr>
<td id="data-table-A3"><span contenteditable="true">1</span></td>
<td id="data-table-B3"><span contenteditable="true">2</span></td>
<td id="data-table-C3"><span contenteditable="true">3</span></td>
<td id="data-table-D3"><span contenteditable="true">4</span></td>
</tr>
<tr>
<td id="data-table-A4"><span contenteditable="true">Click</span></td>
<td id="data-table-B4"><span contenteditable="true">to</span></td>
<td id="data-table-C4"><span contenteditable="true">edit</span></td>
<td id="data-table-D4"><span contenteditable="true">cells</span></td>
</tr>
</table>
</body>
</html>

@ -1 +0,0 @@
../../dist/xlsx.full.min.js

@ -68,13 +68,13 @@ var export_xlsx = (function() {
var HTMLOUT = document.getElementById('htmlout');
var input = document.createElement('input');
input.style.display = 'none';
input.setAttribute('nwsaveas', 'sheetjs.xlsx');
input.setAttribute('nwsaveas', 'SheetJSNWDemo.xlsx');
input.setAttribute('type', 'file');
document.body.appendChild(input);
input.addEventListener('cancel',function(){ alert("Save was canceled!"); });
input.addEventListener('change',function(e){
var filename=this.value, bookType=(filename.match(/[^\.]*$/)||["xlsx"])[0];
var wb = XLSX.utils.table_to_book(HTMLOUT);
var wb = XLSX.utils.table_to_book(HTMLOUT.getElementsByTagName("TABLE")[0]);
var wbout = XLSX.write(wb, {type:'buffer', bookType:bookType});
fs.writeFile(filename, wbout, function(err) {
if(!err) return alert("Saved to " + filename);

@ -4,7 +4,7 @@
"version": "0.0.0",
"main": "index.html",
"dependencies": {
"nw": "~0.63.0",
"nw": "~0.66.0",
"xlsx": "https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz"
}
}

18
test.js

@ -723,6 +723,7 @@ describe('output formats', function() {
["fods", true, true],
["csv", true, true],
["txt", true, true],
["rtf", false, true],
["sylk", false, true],
["eth", false, true],
["html", true, true],
@ -2531,6 +2532,23 @@ describe('js -> file -> js', function() {
});
});
describe('rtf', function() {
it('roundtrip should be idempotent', function() {
var ws = X.utils.aoa_to_sheet([
[1,2,3],
[true, false, null, "sheetjs"],
["foo", "bar", fixdate, "0.3"],
["baz", null, "q\"ux"]
]);
var wb1 = X.utils.book_new();
X.utils.book_append_sheet(wb1, ws, "Sheet1");
var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"});
var wb2 = X.read(rtf1, {type: "string"});
var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"});
assert.equal(rtf1, rtf2);
});
});
describe('corner cases', function() {
it('output functions', function() {
var ws = X.utils.aoa_to_sheet([

18
test.mjs generated

@ -720,6 +720,7 @@ describe('output formats', function() {
["fods", true, true],
["csv", true, true],
["txt", true, true],
["rtf", false, true],
["sylk", false, true],
["eth", false, true],
["html", true, true],
@ -2517,6 +2518,23 @@ describe('js -> file -> js', function() {
});
});
describe('rtf', function() {
it('roundtrip should be idempotent', function() {
var ws = X.utils.aoa_to_sheet([
[1,2,3],
[true, false, null, "sheetjs"],
["foo", "bar", fixdate, "0.3"],
["baz", null, "q\"ux"]
]);
var wb1 = X.utils.book_new();
X.utils.book_append_sheet(wb1, ws, "Sheet1");
var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"});
var wb2 = X.read(rtf1, {type: "string"});
var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"});
assert.equal(rtf1, rtf2);
});
});
describe('corner cases', function() {
it('output functions', function() {
var ws = X.utils.aoa_to_sheet([

@ -737,6 +737,7 @@ describe('output formats', function() {
["fods", true, true],
["csv", true, true],
["txt", true, true],
["rtf", false, true],
["sylk", false, true],
["eth", false, true],
["html", true, true],
@ -2226,6 +2227,11 @@ describe('numbers', function() {
assert.equal(get_cell(ws2, "A1").v, 1);
assert.equal(get_cell(ws2, "ALL2").v, 2);
});
it('should support icloud.com files', function() {
var wb = X.read(fs.readFileSync(dir + 'Attendance.numbers'), {type:TYPE, WTF:true});
var ws = wb.Sheets["Attendance"];
assert.equal(get_cell(ws, "A1").v, "Date");
});
});
describe('dbf', function() {
@ -2415,6 +2421,23 @@ describe('js -> file -> js', function() {
});
});
describe('rtf', function() {
it('roundtrip should be idempotent', function() {
var ws = X.utils.aoa_to_sheet([
[1,2,3],
[true, false, null, "sheetjs"],
["foo", "bar", fixdate, "0.3"],
["baz", null, "q\"ux"]
]);
var wb1 = X.utils.book_new();
X.utils.book_append_sheet(wb1, ws, "Sheet1");
var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"});
var wb2 = X.read(rtf1, {type: "string"});
var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"});
assert.equal(rtf1, rtf2);
});
});
describe('corner cases', function() {
it('output functions', function() {
var ws = X.utils.aoa_to_sheet([

18
test.ts

@ -737,6 +737,7 @@ Deno.test('output formats', async function(t) {
["fods", true, true],
["csv", true, true],
["txt", true, true],
["rtf", false, true],
["sylk", false, true],
["eth", false, true],
["html", true, true],
@ -2420,6 +2421,23 @@ Deno.test('js -> file -> js', async function(t) {
});
});
Deno.test('rtf', async function(t) {
await t.step('roundtrip should be idempotent', async function(t) {
var ws = X.utils.aoa_to_sheet([
[1,2,3],
[true, false, null, "sheetjs"],
["foo", "bar", fixdate, "0.3"],
["baz", null, "q\"ux"]
]);
var wb1 = X.utils.book_new();
X.utils.book_append_sheet(wb1, ws, "Sheet1");
var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"});
var wb2 = X.read(rtf1, {type: "string"});
var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"});
assert.equal(rtf1, rtf2);
});
});
Deno.test('corner cases', async function(t) {
await t.step('output functions', async function(t) {
var ws = X.utils.aoa_to_sheet([

@ -736,6 +736,7 @@ Deno.test('output formats', async function(t) {
["fods", true, true],
["csv", true, true],
["txt", true, true],
["rtf", false, true],
["sylk", false, true],
["eth", false, true],
["html", true, true],
@ -2225,6 +2226,11 @@ Deno.test('numbers', async function(t) {
assert.equal(get_cell(ws2, "A1").v, 1);
assert.equal(get_cell(ws2, "ALL2").v, 2);
});
await t.step('should support icloud.com files', async function(t) {
var wb = X.read(fs.readFileSync(dir + 'Attendance.numbers'), {type:TYPE, WTF:true});
var ws = wb.Sheets["Attendance"];
assert.equal(get_cell(ws, "A1").v, "Date");
});
});
Deno.test('dbf', async function(t) {
@ -2414,6 +2420,23 @@ Deno.test('js -> file -> js', async function(t) {
});
});
Deno.test('rtf', async function(t) {
await t.step('roundtrip should be idempotent', async function(t) {
var ws = X.utils.aoa_to_sheet([
[1,2,3],
[true, false, null, "sheetjs"],
["foo", "bar", fixdate, "0.3"],
["baz", null, "q\"ux"]
]);
var wb1 = X.utils.book_new();
X.utils.book_append_sheet(wb1, ws, "Sheet1");
var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"});
var wb2 = X.read(rtf1, {type: "string"});
var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"});
assert.equal(rtf1, rtf2);
});
});
Deno.test('corner cases', async function(t) {
await t.step('output functions', async function(t) {
var ws = X.utils.aoa_to_sheet([

@ -10322,88 +10322,84 @@ function parse_FilePass(blob, length/*:number*/, opts) {
}
var RTF = /*#__PURE__*/(function() {
function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ {
switch(opts.type) {
case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts);
case 'binary': return rtf_to_sheet_str(d, opts);
case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
switch(opts.type) {
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
case 'binary': return rtf_to_book_str(d, opts);
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_book_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: this is a stub */
function rtf_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
var o = opts || {};
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
/* TODO: RTF technically can store multiple tables, even if Excel does not */
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
var o = opts || {};
var sname = o.sheet || "Sheet1";
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
wb.Sheets[sname] = ws;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return ws;
}
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); }
/* TODO: this is a stub */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
last_index = rtfre.lastIndex;
}
return o.join("") + "}";
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return wb;
}
return {
to_workbook: rtf_to_workbook,
to_sheet: rtf_to_sheet,
from_sheet: sheet_to_rtf
};
})();
/* TODO: standardize sheet names as titles for tables */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}
function hex2RGB(h) {
var o = h.slice(h[0]==="#"?1:0).slice(0,6);
return [parseInt(o.slice(0,2),16),parseInt(o.slice(2,4),16),parseInt(o.slice(4,6),16)];
@ -24945,7 +24941,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
}
break;
case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o);
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break;
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break;
case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break;
case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break;
@ -25106,7 +25102,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) {
case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'fods': return write_string_type(write_ods(wb, o), o);
case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o);

152
xlsx.mjs generated

@ -10317,88 +10317,84 @@ function parse_FilePass(blob, length/*:number*/, opts) {
}
var RTF = /*#__PURE__*/(function() {
function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ {
switch(opts.type) {
case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts);
case 'binary': return rtf_to_sheet_str(d, opts);
case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_sheet_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ {
switch(opts.type) {
case 'base64': return rtf_to_book_str(Base64_decode(d), opts);
case 'binary': return rtf_to_book_str(d, opts);
case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts);
case 'array': return rtf_to_book_str(cc2str(d), opts);
}
throw new Error("Unrecognized type " + opts.type);
}
/* TODO: this is a stub */
function rtf_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
var o = opts || {};
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
/* TODO: RTF technically can store multiple tables, even if Excel does not */
function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ {
var o = opts || {};
var sname = o.sheet || "Sheet1";
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} };
wb.Sheets[sname] = ws;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
last_index = rtfre.lastIndex;
var rows = str.match(/\\trowd[\s\S]*?\\row\b/g);
if(!rows.length) throw new Error("RTF missing table");
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/);
rows.forEach(function(rowtf, R) {
if(Array.isArray(ws)) ws[R] = [];
var rtfre = /\\[\w\-]+\b/g;
var last_index = 0;
var res;
var C = -1;
var payload = [];
while((res = rtfre.exec(rowtf))) {
var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length);
if(data.charCodeAt(0) == 0x20) data = data.slice(1);
if(data.length) payload.push(data);
switch(res[0]) {
case "\\cell":
++C;
if(payload.length) {
// TODO: value parsing, including codepage adjustments
var cell = {v: payload.join(""), t:"s"};
if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; }
else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); }
if(Array.isArray(ws)) ws[R][C] = cell;
else ws[encode_cell({r:R, c:C})] = cell;
}
payload = [];
break;
case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par"
payload.push("\n");
break;
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return ws;
}
function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); }
/* TODO: this is a stub */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
last_index = rtfre.lastIndex;
}
return o.join("") + "}";
}
if(C > range.e.c) range.e.c = C;
});
ws['!ref'] = encode_range(range);
return wb;
}
return {
to_workbook: rtf_to_workbook,
to_sheet: rtf_to_sheet,
from_sheet: sheet_to_rtf
};
})();
/* TODO: standardize sheet names as titles for tables */
function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ {
var o = ["{\\rtf1\\ansi"];
var r = safe_decode_range(ws['!ref']), cell/*:Cell*/;
var dense = Array.isArray(ws);
for(var R = r.s.r; R <= r.e.r; ++R) {
o.push("\\trowd\\trautofit1");
for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1));
o.push("\\pard\\intbl");
for(C = r.s.c; C <= r.e.c; ++C) {
var coord = encode_cell({r:R,c:C});
cell = dense ? (ws[R]||[])[C]: ws[coord];
if(!cell || cell.v == null && (!cell.f || cell.F)) continue;
o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par "));
o.push("\\cell");
}
o.push("\\pard\\intbl\\row");
}
return o.join("") + "}";
}
function hex2RGB(h) {
var o = h.slice(h[0]==="#"?1:0).slice(0,6);
return [parseInt(o.slice(0,2),16),parseInt(o.slice(2,4),16),parseInt(o.slice(4,6),16)];
@ -24940,7 +24936,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
}
break;
case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o);
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break;
case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break;
case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o);
case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break;
case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break;
@ -25101,7 +25097,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) {
case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o);
case 'fods': return write_string_type(write_ods(wb, o), o);
case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o);