diff --git a/Makefile b/Makefile index 41c6079..68aeb1b 100644 --- a/Makefile +++ b/Makefile @@ -165,10 +165,15 @@ $(TESTESMFMT): test-esm_%: FMTS=$* make test-esm TESTDENOFMT=$(patsubst %,test-deno_%,$(FMT)) -.PHONY: $(TESTESMFMT) +.PHONY: $(TESTDENOFMT) $(TESTDENOFMT): test-deno_%: FMTS=$* make test-deno +TESTDENOCPFMT=$(patsubst %,test-denocp_%,$(FMT)) +.PHONY: $(TESTDENOCPFMT) +$(TESTDENOCPFMT): test-denocp_%: + FMTS=$* make test-denocp + .PHONY: travis travis: ## Run test suite with minimal output mocha -R dot -t 30000 diff --git a/bits/45_rtf.js b/bits/45_rtf.js index b63dac7..3a1b7ee 100644 --- a/bits/45_rtf.js +++ b/bits/45_rtf.js @@ -1,82 +1,78 @@ -var RTF = /*#__PURE__*/(function() { - function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ { - switch(opts.type) { - case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts); - case 'binary': return rtf_to_sheet_str(d, opts); - case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); - case 'array': return rtf_to_sheet_str(cc2str(d), opts); - } - throw new Error("Unrecognized type " + opts.type); +function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { + switch(opts.type) { + case 'base64': return rtf_to_book_str(Base64_decode(d), opts); + case 'binary': return rtf_to_book_str(d, opts); + case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); + case 'array': return rtf_to_book_str(cc2str(d), opts); } + throw new Error("Unrecognized type " + opts.type); +} - /* TODO: this is a stub */ - function rtf_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ { - var o = opts || {}; - var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); +/* TODO: RTF technically can store multiple tables, even if Excel does not */ +function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ { + var o = opts || {}; + var sname = o.sheet || "Sheet1"; + var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); + var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} }; + wb.Sheets[sname] = ws; - var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); - if(!rows.length) throw new Error("RTF missing table"); - var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); - rows.forEach(function(rowtf, R) { - if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\[\w\-]+\b/g; - var last_index = 0; - var res; - var C = -1; - var payload = []; - while((res = rtfre.exec(rowtf))) { - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data.charCodeAt(0) == 0x20) data = data.slice(1); - if(data.length) payload.push(data); - switch(res[0]) { - case "\\cell": - ++C; - if(payload.length) { - // TODO: value parsing, including codepage adjustments - var cell = {v: payload.join(""), t:"s"}; - if(Array.isArray(ws)) ws[R][C] = cell; - else ws[encode_cell({r:R, c:C})] = cell; - } - payload = []; - break; - case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" - payload.push("\n"); - break; - } - last_index = rtfre.lastIndex; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if(!rows.length) throw new Error("RTF missing table"); + var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); + rows.forEach(function(rowtf, R) { + if(Array.isArray(ws)) ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while((res = rtfre.exec(rowtf))) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if(data.charCodeAt(0) == 0x20) data = data.slice(1); + if(data.length) payload.push(data); + switch(res[0]) { + case "\\cell": + ++C; + if(payload.length) { + // TODO: value parsing, including codepage adjustments + var cell = {v: payload.join(""), t:"s"}; + if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; } + else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); } + + if(Array.isArray(ws)) ws[R][C] = cell; + else ws[encode_cell({r:R, c:C})] = cell; + } + payload = []; + break; + case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" + payload.push("\n"); + break; } - if(C > range.e.c) range.e.c = C; - }); - ws['!ref'] = encode_range(range); - return ws; - } - - function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); } - - /* TODO: this is a stub */ - function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { - var o = ["{\\rtf1\\ansi"]; - var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; - var dense = Array.isArray(ws); - for(var R = r.s.r; R <= r.e.r; ++R) { - o.push("\\trowd\\trautofit1"); - for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); - o.push("\\pard\\intbl"); - for(C = r.s.c; C <= r.e.c; ++C) { - var coord = encode_cell({r:R,c:C}); - cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); - o.push("\\cell"); - } - o.push("\\pard\\intbl\\row"); + last_index = rtfre.lastIndex; } - return o.join("") + "}"; - } + if(C > range.e.c) range.e.c = C; + }); + ws['!ref'] = encode_range(range); + return wb; +} - return { - to_workbook: rtf_to_workbook, - to_sheet: rtf_to_sheet, - from_sheet: sheet_to_rtf - }; -})(); +/* TODO: standardize sheet names as titles for tables */ +function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { + var o = ["{\\rtf1\\ansi"]; + var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; + var dense = Array.isArray(ws); + for(var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); + o.push("\\pard\\intbl"); + for(C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({r:R,c:C}); + cell = dense ? (ws[R]||[])[C]: ws[coord]; + if(!cell || cell.v == null && (!cell.f || cell.F)) continue; + o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; +} diff --git a/bits/87_read.js b/bits/87_read.js index 19809f0..0908895 100644 --- a/bits/87_read.js +++ b/bits/87_read.js @@ -105,7 +105,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { } break; case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o); - case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break; + case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break; case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break; case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break; diff --git a/bits/88_write.js b/bits/88_write.js index c1a8b77..7ca6a42 100644 --- a/bits/88_write.js +++ b/bits/88_write.js @@ -142,7 +142,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) { case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); - case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); + case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o); case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'fods': return write_string_type(write_ods(wb, o), o); case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o); diff --git a/demos/README.md b/demos/README.md index 73c75e3..96c9d86 100644 --- a/demos/README.md +++ b/demos/README.md @@ -17,37 +17,40 @@ can be installed with Bash on Windows or with `cygwin`. ### Included Demos -**Frameworks and APIs** +**JavaScript APIs** +- [`XMLHttpRequest and fetch`](xhr/) +- [`Clipboard Data`](https://docs.sheetjs.com/docs/getting-started/demos/clipboard) +- [`Typed Arrays and Math`](array/) + +**Frameworks** - [`angularjs`](angular/) - [`angular and ionic`](angular2/) - [`knockout`](knockout/) - [`meteor`](meteor/) - [`react, react-native, next`](react/) - [`vue 2.x, weex, nuxt`](vue/) -- [`XMLHttpRequest and fetch`](xhr/) -- [`nodejs server`](server/) -- [`databases and key/value stores`](database/) -- [`typed arrays and math`](array/) **Front-End UI Components** - [`canvas-datagrid`](datagrid/) - [`x-spreadsheet`](xspreadsheet/) - [`react-data-grid`](react/modify/) -- [`vue3-table-light`](/vue/modify/) +- [`vue3-table-light`](vue/modify/) **Platforms and Integrations** -- [`deno`](deno/) +- [`NodeJS Server-Side Processing`](server/) +- [`Deno`](deno/) - [`electron application`](electron/) -- [`nw.js application`](nwjs/) +- [`NW.js`](nwjs/) - [`Chrome / Chromium extensions`](chrome/) - [`Google Sheets API`](https://docs.sheetjs.com/docs/getting-started/demos/gsheet) - [`ExtendScript for Adobe Apps`](https://docs.sheetjs.com/docs/getting-started/demos/extendscript) - [`NetSuite SuiteScript`](https://docs.sheetjs.com/docs/getting-started/demos/netsuite) - [`SalesForce Lightning Web Components`](https://docs.sheetjs.com/docs/getting-started/demos/salesforce) - [`Excel JavaScript API`](https://docs.sheetjs.com/docs/getting-started/demos/excel) -- [`Headless Browsers`](headless/) +- [`Headless Automation`](https://docs.sheetjs.com/docs/getting-started/demos/headless) - [`Swift JSC and other engines`](altjs/) - [`"serverless" functions`](function/) +- [`databases and key/value stores`](database/) - [`internet explorer`](oldie/) **Bundlers and Tooling** diff --git a/demos/headless/.eslintrc b/demos/headless/.eslintrc deleted file mode 100644 index 4d91234..0000000 --- a/demos/headless/.eslintrc +++ /dev/null @@ -1,11 +0,0 @@ -{ - - "env": { "node":true }, - "parserOptions": { - "ecmaVersion": 8 - }, - "rules": { - "no-var": 0, - "semi": [ 2, "always" ] - } -} diff --git a/demos/headless/.gitignore b/demos/headless/.gitignore deleted file mode 100644 index a136337..0000000 --- a/demos/headless/.gitignore +++ /dev/null @@ -1 +0,0 @@ -*.pdf diff --git a/demos/headless/README.md b/demos/headless/README.md index bb9e196..43fb249 100644 --- a/demos/headless/README.md +++ b/demos/headless/README.md @@ -1,52 +1,8 @@ # Headless Browsers -The library, eschewing unstable and nascent ECMAScript features, plays nicely -with most headless browsers. This demo shows a few common headless scenarios. - -NodeJS does not ship with its own layout engine. For advanced HTML exports, a -headless browser is generally indistinguishable from a browser process. - -## Chromium Automation with Puppeteer - -[Puppeteer](https://pptr.dev/) enables headless Chromium automation. - -[`html.js`](./html.js) shows a dedicated script for converting an HTML file to -XLSB using puppeteer. The first argument is the path to the HTML file. The -script writes to `output.xlsb`: - -```bash -# read from test.html and write to output.xlsb -$ node html.js test.html -``` - -The script pulls up the webpage using headless Chromium and adds a script tag -reference to the standalone browser build. That will make the `XLSX` variable -available to future scripts added in the page! The browser context is not able -to save the file using `writeFile`, so the demo generates the XLSB spreadsheet -bytes with the `base64` type, sends the string back to the main process, and -uses `fs.writeFileSync` to write the file. - -## WebKit Automation with PhantomJS - -This was tested using [PhantomJS 2.1.1](https://phantomjs.org/download.html) - -```bash -$ phantomjs phantomjs.js -``` - -The flow is similar to the Puppeteer flow (scrape table and generate workbook in -website context, copy string back, write string to file from main process). - -The `binary` type generates strings that can be written in PhantomJS using the -`fs.write` method with mode `"wb"`. - -## wkhtmltopdf - -This was tested in wkhtmltopdf 0.12.4, installed using the official binaries: - -```bash -$ wkhtmltopdf --javascript-delay 20000 http://oss.sheetjs.com/sheetjs/tests/ test.pdf -``` +[The new demo](https://docs.sheetjs.com/docs/getting-started/demos/headless) +has a more focused table export example as well as a demo script for Chromium +automation with Puppeteer and multi-browser automation with Playwright. [![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-xlsx?pixel)](https://github.com/SheetJS/js-xlsx) diff --git a/demos/headless/html.js b/demos/headless/html.js deleted file mode 100755 index 4abec89..0000000 --- a/demos/headless/html.js +++ /dev/null @@ -1,51 +0,0 @@ -#!/usr/bin/env node -/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ -const puppeteer = require("puppeteer"); -const path = require("path"); -const fs = require("fs"); - -/* inf is the path to the html file -> url is a file URL */ -let inf = process.argv[2] || "test.html"; -let htmlpath = path.join(__dirname, inf); -if(!fs.existsSync(htmlpath)) htmlpath = path.join(process.cwd(), inf); -if(!fs.existsSync(htmlpath)) htmlpath = path.resolve(inf); -if(!fs.existsSync(htmlpath)) { console.error(`Could not find a valid file for \`${inf}\``); process.exit(4); } -console.error(`Reading from ${htmlpath}`); -const url = `file://${htmlpath}`; - -/* get the standalone build source (e.g. node_modules/xlsx/dist/xlsx.full.min.js) */ -// const websrc = fs.readFileSync(require.resolve("xlsx/dist/xlsx.full.min.js"), "utf8"); -const get_lib = (jspath) => fs.readFileSync(path.resolve(__dirname, jspath)).toString(); -const websrc = get_lib("xlsx.full.min.js"); - -(async() => { - /* start browser and go to web page */ - const browser = await puppeteer.launch(); - const page = await browser.newPage(); - page.on("console", msg => console.log("PAGE LOG:", msg.text())); - await page.setViewport({width: 1920, height: 1080}); - await page.goto(url, {waitUntil: "networkidle2"}); - - /* inject library */ - await page.addScriptTag({content: websrc}); - - /* this function `s5s` will be called by the script below, receiving the Base64-encoded file */ - await page.exposeFunction("s5s", async(b64) => { - fs.writeFileSync("output.xlsb", b64, {encoding: "base64"}); - }); - - /* generate XLSB file in webpage context and send back a Base64-encoded string */ - await page.addScriptTag({content: ` - /* call table_to_book on first table */ - var wb = XLSX.utils.table_to_book(document.getElementsByTagName("TABLE")[0]); - - /* generate XLSB file */ - var b64 = XLSX.write(wb, {type: "base64", bookType: "xlsb"}); - - /* call "s5s" hook exposed from the node process */ - window.s5s(b64); - `}); - - /* cleanup */ - await browser.close(); -})(); diff --git a/demos/headless/phantomjs.js b/demos/headless/phantomjs.js deleted file mode 100644 index 27f6af7..0000000 --- a/demos/headless/phantomjs.js +++ /dev/null @@ -1,35 +0,0 @@ -/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */ -/* eslint-env phantomjs */ -var XLSX = require('xlsx'); - -var page = require('webpage').create(); -page.onConsoleMessage = function(msg) { console.log(msg); }; - -/* this code will be run in the page */ -var code = [ "function(){", - /* call table_to_book on first table */ - "var wb = XLSX.utils.table_to_book(document.body.getElementsByTagName('table')[0]);", - - /* generate XLSB file and return binary string */ - "return XLSX.write(wb, {type: 'binary', bookType: 'xlsb'});", -"}" ].join(""); - -page.open('https://sheetjs.com/demos/table', function() { - console.log("Page Loaded"); - /* Load the browser script from the UNPKG CDN */ - page.includeJs("https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js", function() { - /* Verify the page is loaded by logging the version number */ - var version = "function(){ console.log('Library Version:' + window.XLSX.version); }"; - page.evaluateJavaScript(version); - - /* The code will return a binary string */ - var bin = page.evaluateJavaScript(code); - var workbook = XLSX.read(bin, {type: "binary"}); - console.log(XLSX.utils.sheet_to_csv(workbook.Sheets[workbook.SheetNames[0]])); - - /* XLSX.writeFile will not work here -- have to write manually */ - require("fs").write("phantomjs.xlsb", bin, "wb"); - phantom.exit(); - }); -}); - diff --git a/demos/headless/sheetjs.xlsx b/demos/headless/sheetjs.xlsx deleted file mode 120000 index 1386e36..0000000 --- a/demos/headless/sheetjs.xlsx +++ /dev/null @@ -1 +0,0 @@ -../extendscript/sheetjs.xlsx \ No newline at end of file diff --git a/demos/headless/test.html b/demos/headless/test.html deleted file mode 100644 index 2a6f24c..0000000 --- a/demos/headless/test.html +++ /dev/null @@ -1,35 +0,0 @@ - - - - - SheetJS Table Export - - - - - - - - - - - - - - - - - - - - - - - - - - - -
SheetJSTableExportTest
வணக்கம்สวัสดี你好가지마
1234
Clicktoeditcells
- - diff --git a/demos/headless/xlsx.full.min.js b/demos/headless/xlsx.full.min.js deleted file mode 120000 index dbca48d..0000000 --- a/demos/headless/xlsx.full.min.js +++ /dev/null @@ -1 +0,0 @@ -../../dist/xlsx.full.min.js \ No newline at end of file diff --git a/demos/nwjs/index.js b/demos/nwjs/index.js index f7b152b..fc95ca3 100644 --- a/demos/nwjs/index.js +++ b/demos/nwjs/index.js @@ -68,13 +68,13 @@ var export_xlsx = (function() { var HTMLOUT = document.getElementById('htmlout'); var input = document.createElement('input'); input.style.display = 'none'; - input.setAttribute('nwsaveas', 'sheetjs.xlsx'); + input.setAttribute('nwsaveas', 'SheetJSNWDemo.xlsx'); input.setAttribute('type', 'file'); document.body.appendChild(input); input.addEventListener('cancel',function(){ alert("Save was canceled!"); }); input.addEventListener('change',function(e){ var filename=this.value, bookType=(filename.match(/[^\.]*$/)||["xlsx"])[0]; - var wb = XLSX.utils.table_to_book(HTMLOUT); + var wb = XLSX.utils.table_to_book(HTMLOUT.getElementsByTagName("TABLE")[0]); var wbout = XLSX.write(wb, {type:'buffer', bookType:bookType}); fs.writeFile(filename, wbout, function(err) { if(!err) return alert("Saved to " + filename); diff --git a/demos/nwjs/package.json b/demos/nwjs/package.json index e716739..a36ec40 100644 --- a/demos/nwjs/package.json +++ b/demos/nwjs/package.json @@ -4,7 +4,7 @@ "version": "0.0.0", "main": "index.html", "dependencies": { - "nw": "~0.63.0", + "nw": "~0.66.0", "xlsx": "https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz" } } diff --git a/test.js b/test.js index d6c3d3e..7b23b2a 100644 --- a/test.js +++ b/test.js @@ -723,6 +723,7 @@ describe('output formats', function() { ["fods", true, true], ["csv", true, true], ["txt", true, true], + ["rtf", false, true], ["sylk", false, true], ["eth", false, true], ["html", true, true], @@ -2531,6 +2532,23 @@ describe('js -> file -> js', function() { }); }); +describe('rtf', function() { + it('roundtrip should be idempotent', function() { + var ws = X.utils.aoa_to_sheet([ + [1,2,3], + [true, false, null, "sheetjs"], + ["foo", "bar", fixdate, "0.3"], + ["baz", null, "q\"ux"] + ]); + var wb1 = X.utils.book_new(); + X.utils.book_append_sheet(wb1, ws, "Sheet1"); + var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"}); + var wb2 = X.read(rtf1, {type: "string"}); + var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"}); + assert.equal(rtf1, rtf2); + }); +}); + describe('corner cases', function() { it('output functions', function() { var ws = X.utils.aoa_to_sheet([ diff --git a/test.mjs b/test.mjs index fdb7c0c..862b3fa 100644 --- a/test.mjs +++ b/test.mjs @@ -720,6 +720,7 @@ describe('output formats', function() { ["fods", true, true], ["csv", true, true], ["txt", true, true], + ["rtf", false, true], ["sylk", false, true], ["eth", false, true], ["html", true, true], @@ -2517,6 +2518,23 @@ describe('js -> file -> js', function() { }); }); +describe('rtf', function() { + it('roundtrip should be idempotent', function() { + var ws = X.utils.aoa_to_sheet([ + [1,2,3], + [true, false, null, "sheetjs"], + ["foo", "bar", fixdate, "0.3"], + ["baz", null, "q\"ux"] + ]); + var wb1 = X.utils.book_new(); + X.utils.book_append_sheet(wb1, ws, "Sheet1"); + var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"}); + var wb2 = X.read(rtf1, {type: "string"}); + var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"}); + assert.equal(rtf1, rtf2); + }); +}); + describe('corner cases', function() { it('output functions', function() { var ws = X.utils.aoa_to_sheet([ diff --git a/test.mts b/test.mts index d9dacac..e9228ae 100644 --- a/test.mts +++ b/test.mts @@ -737,6 +737,7 @@ describe('output formats', function() { ["fods", true, true], ["csv", true, true], ["txt", true, true], + ["rtf", false, true], ["sylk", false, true], ["eth", false, true], ["html", true, true], @@ -2226,6 +2227,11 @@ describe('numbers', function() { assert.equal(get_cell(ws2, "A1").v, 1); assert.equal(get_cell(ws2, "ALL2").v, 2); }); + it('should support icloud.com files', function() { + var wb = X.read(fs.readFileSync(dir + 'Attendance.numbers'), {type:TYPE, WTF:true}); + var ws = wb.Sheets["Attendance"]; + assert.equal(get_cell(ws, "A1").v, "Date"); + }); }); describe('dbf', function() { @@ -2415,6 +2421,23 @@ describe('js -> file -> js', function() { }); }); +describe('rtf', function() { + it('roundtrip should be idempotent', function() { + var ws = X.utils.aoa_to_sheet([ + [1,2,3], + [true, false, null, "sheetjs"], + ["foo", "bar", fixdate, "0.3"], + ["baz", null, "q\"ux"] + ]); + var wb1 = X.utils.book_new(); + X.utils.book_append_sheet(wb1, ws, "Sheet1"); + var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"}); + var wb2 = X.read(rtf1, {type: "string"}); + var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"}); + assert.equal(rtf1, rtf2); + }); +}); + describe('corner cases', function() { it('output functions', function() { var ws = X.utils.aoa_to_sheet([ diff --git a/test.ts b/test.ts index b974673..9d2c8d8 100644 --- a/test.ts +++ b/test.ts @@ -737,6 +737,7 @@ Deno.test('output formats', async function(t) { ["fods", true, true], ["csv", true, true], ["txt", true, true], + ["rtf", false, true], ["sylk", false, true], ["eth", false, true], ["html", true, true], @@ -2420,6 +2421,23 @@ Deno.test('js -> file -> js', async function(t) { }); }); +Deno.test('rtf', async function(t) { + await t.step('roundtrip should be idempotent', async function(t) { + var ws = X.utils.aoa_to_sheet([ + [1,2,3], + [true, false, null, "sheetjs"], + ["foo", "bar", fixdate, "0.3"], + ["baz", null, "q\"ux"] + ]); + var wb1 = X.utils.book_new(); + X.utils.book_append_sheet(wb1, ws, "Sheet1"); + var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"}); + var wb2 = X.read(rtf1, {type: "string"}); + var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"}); + assert.equal(rtf1, rtf2); + }); +}); + Deno.test('corner cases', async function(t) { await t.step('output functions', async function(t) { var ws = X.utils.aoa_to_sheet([ diff --git a/testnocp.ts b/testnocp.ts index 16ff674..aead2ab 100644 --- a/testnocp.ts +++ b/testnocp.ts @@ -736,6 +736,7 @@ Deno.test('output formats', async function(t) { ["fods", true, true], ["csv", true, true], ["txt", true, true], + ["rtf", false, true], ["sylk", false, true], ["eth", false, true], ["html", true, true], @@ -2225,6 +2226,11 @@ Deno.test('numbers', async function(t) { assert.equal(get_cell(ws2, "A1").v, 1); assert.equal(get_cell(ws2, "ALL2").v, 2); }); + await t.step('should support icloud.com files', async function(t) { + var wb = X.read(fs.readFileSync(dir + 'Attendance.numbers'), {type:TYPE, WTF:true}); + var ws = wb.Sheets["Attendance"]; + assert.equal(get_cell(ws, "A1").v, "Date"); + }); }); Deno.test('dbf', async function(t) { @@ -2414,6 +2420,23 @@ Deno.test('js -> file -> js', async function(t) { }); }); +Deno.test('rtf', async function(t) { + await t.step('roundtrip should be idempotent', async function(t) { + var ws = X.utils.aoa_to_sheet([ + [1,2,3], + [true, false, null, "sheetjs"], + ["foo", "bar", fixdate, "0.3"], + ["baz", null, "q\"ux"] + ]); + var wb1 = X.utils.book_new(); + X.utils.book_append_sheet(wb1, ws, "Sheet1"); + var rtf1 = X.write(wb1, {bookType: "rtf", type: "string"}); + var wb2 = X.read(rtf1, {type: "string"}); + var rtf2 = X.write(wb2, {bookType: "rtf", type: "string"}); + assert.equal(rtf1, rtf2); + }); +}); + Deno.test('corner cases', async function(t) { await t.step('output functions', async function(t) { var ws = X.utils.aoa_to_sheet([ diff --git a/xlsx.flow.js b/xlsx.flow.js index 18d2a37..123ce0e 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -10322,88 +10322,84 @@ function parse_FilePass(blob, length/*:number*/, opts) { } -var RTF = /*#__PURE__*/(function() { - function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ { - switch(opts.type) { - case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts); - case 'binary': return rtf_to_sheet_str(d, opts); - case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); - case 'array': return rtf_to_sheet_str(cc2str(d), opts); - } - throw new Error("Unrecognized type " + opts.type); +function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { + switch(opts.type) { + case 'base64': return rtf_to_book_str(Base64_decode(d), opts); + case 'binary': return rtf_to_book_str(d, opts); + case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); + case 'array': return rtf_to_book_str(cc2str(d), opts); } + throw new Error("Unrecognized type " + opts.type); +} - /* TODO: this is a stub */ - function rtf_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ { - var o = opts || {}; - var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); +/* TODO: RTF technically can store multiple tables, even if Excel does not */ +function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ { + var o = opts || {}; + var sname = o.sheet || "Sheet1"; + var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); + var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} }; + wb.Sheets[sname] = ws; - var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); - if(!rows.length) throw new Error("RTF missing table"); - var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); - rows.forEach(function(rowtf, R) { - if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\[\w\-]+\b/g; - var last_index = 0; - var res; - var C = -1; - var payload = []; - while((res = rtfre.exec(rowtf))) { - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data.charCodeAt(0) == 0x20) data = data.slice(1); - if(data.length) payload.push(data); - switch(res[0]) { - case "\\cell": - ++C; - if(payload.length) { - // TODO: value parsing, including codepage adjustments - var cell = {v: payload.join(""), t:"s"}; - if(Array.isArray(ws)) ws[R][C] = cell; - else ws[encode_cell({r:R, c:C})] = cell; - } - payload = []; - break; - case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" - payload.push("\n"); - break; - } - last_index = rtfre.lastIndex; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if(!rows.length) throw new Error("RTF missing table"); + var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); + rows.forEach(function(rowtf, R) { + if(Array.isArray(ws)) ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while((res = rtfre.exec(rowtf))) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if(data.charCodeAt(0) == 0x20) data = data.slice(1); + if(data.length) payload.push(data); + switch(res[0]) { + case "\\cell": + ++C; + if(payload.length) { + // TODO: value parsing, including codepage adjustments + var cell = {v: payload.join(""), t:"s"}; + if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; } + else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); } + + if(Array.isArray(ws)) ws[R][C] = cell; + else ws[encode_cell({r:R, c:C})] = cell; + } + payload = []; + break; + case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" + payload.push("\n"); + break; } - if(C > range.e.c) range.e.c = C; - }); - ws['!ref'] = encode_range(range); - return ws; - } - - function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); } - - /* TODO: this is a stub */ - function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { - var o = ["{\\rtf1\\ansi"]; - var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; - var dense = Array.isArray(ws); - for(var R = r.s.r; R <= r.e.r; ++R) { - o.push("\\trowd\\trautofit1"); - for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); - o.push("\\pard\\intbl"); - for(C = r.s.c; C <= r.e.c; ++C) { - var coord = encode_cell({r:R,c:C}); - cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); - o.push("\\cell"); - } - o.push("\\pard\\intbl\\row"); + last_index = rtfre.lastIndex; } - return o.join("") + "}"; - } + if(C > range.e.c) range.e.c = C; + }); + ws['!ref'] = encode_range(range); + return wb; +} - return { - to_workbook: rtf_to_workbook, - to_sheet: rtf_to_sheet, - from_sheet: sheet_to_rtf - }; -})(); +/* TODO: standardize sheet names as titles for tables */ +function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { + var o = ["{\\rtf1\\ansi"]; + var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; + var dense = Array.isArray(ws); + for(var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); + o.push("\\pard\\intbl"); + for(C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({r:R,c:C}); + cell = dense ? (ws[R]||[])[C]: ws[coord]; + if(!cell || cell.v == null && (!cell.f || cell.F)) continue; + o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; +} function hex2RGB(h) { var o = h.slice(h[0]==="#"?1:0).slice(0,6); return [parseInt(o.slice(0,2),16),parseInt(o.slice(2,4),16),parseInt(o.slice(4,6),16)]; @@ -24945,7 +24941,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { } break; case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o); - case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break; + case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break; case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break; case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break; @@ -25106,7 +25102,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) { case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); - case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); + case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o); case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'fods': return write_string_type(write_ods(wb, o), o); case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o); diff --git a/xlsx.mjs b/xlsx.mjs index 77a0053..a5bc4c4 100644 --- a/xlsx.mjs +++ b/xlsx.mjs @@ -10317,88 +10317,84 @@ function parse_FilePass(blob, length/*:number*/, opts) { } -var RTF = /*#__PURE__*/(function() { - function rtf_to_sheet(d/*:RawData*/, opts)/*:Worksheet*/ { - switch(opts.type) { - case 'base64': return rtf_to_sheet_str(Base64_decode(d), opts); - case 'binary': return rtf_to_sheet_str(d, opts); - case 'buffer': return rtf_to_sheet_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); - case 'array': return rtf_to_sheet_str(cc2str(d), opts); - } - throw new Error("Unrecognized type " + opts.type); +function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { + switch(opts.type) { + case 'base64': return rtf_to_book_str(Base64_decode(d), opts); + case 'binary': return rtf_to_book_str(d, opts); + case 'buffer': return rtf_to_book_str(has_buf && Buffer.isBuffer(d) ? d.toString('binary') : a2s(d), opts); + case 'array': return rtf_to_book_str(cc2str(d), opts); } + throw new Error("Unrecognized type " + opts.type); +} - /* TODO: this is a stub */ - function rtf_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ { - var o = opts || {}; - var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); +/* TODO: RTF technically can store multiple tables, even if Excel does not */ +function rtf_to_book_str(str/*:string*/, opts)/*:Workbook*/ { + var o = opts || {}; + var sname = o.sheet || "Sheet1"; + var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/); + var wb/*:Workbook*/ = { SheetNames: [ sname ], Sheets: {} }; + wb.Sheets[sname] = ws; - var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); - if(!rows.length) throw new Error("RTF missing table"); - var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); - rows.forEach(function(rowtf, R) { - if(Array.isArray(ws)) ws[R] = []; - var rtfre = /\\[\w\-]+\b/g; - var last_index = 0; - var res; - var C = -1; - var payload = []; - while((res = rtfre.exec(rowtf))) { - var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); - if(data.charCodeAt(0) == 0x20) data = data.slice(1); - if(data.length) payload.push(data); - switch(res[0]) { - case "\\cell": - ++C; - if(payload.length) { - // TODO: value parsing, including codepage adjustments - var cell = {v: payload.join(""), t:"s"}; - if(Array.isArray(ws)) ws[R][C] = cell; - else ws[encode_cell({r:R, c:C})] = cell; - } - payload = []; - break; - case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" - payload.push("\n"); - break; - } - last_index = rtfre.lastIndex; + var rows = str.match(/\\trowd[\s\S]*?\\row\b/g); + if(!rows.length) throw new Error("RTF missing table"); + var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:rows.length - 1}}/*:any*/); + rows.forEach(function(rowtf, R) { + if(Array.isArray(ws)) ws[R] = []; + var rtfre = /\\[\w\-]+\b/g; + var last_index = 0; + var res; + var C = -1; + var payload = []; + while((res = rtfre.exec(rowtf))) { + var data = rowtf.slice(last_index, rtfre.lastIndex - res[0].length); + if(data.charCodeAt(0) == 0x20) data = data.slice(1); + if(data.length) payload.push(data); + switch(res[0]) { + case "\\cell": + ++C; + if(payload.length) { + // TODO: value parsing, including codepage adjustments + var cell = {v: payload.join(""), t:"s"}; + if(cell.v == "TRUE" || cell.v == "FALSE") { cell.v = cell.v == "TRUE"; cell.t = "b"; } + else if(!isNaN(fuzzynum(cell.v))) { cell.t = 'n'; if(o.cellText !== false) cell.w = cell.v; cell.v = fuzzynum(cell.v); } + + if(Array.isArray(ws)) ws[R][C] = cell; + else ws[encode_cell({r:R, c:C})] = cell; + } + payload = []; + break; + case "\\par": // NOTE: Excel serializes both "\r" and "\n" as "\\par" + payload.push("\n"); + break; } - if(C > range.e.c) range.e.c = C; - }); - ws['!ref'] = encode_range(range); - return ws; - } - - function rtf_to_workbook(d/*:RawData*/, opts)/*:Workbook*/ { return sheet_to_workbook(rtf_to_sheet(d, opts), opts); } - - /* TODO: this is a stub */ - function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { - var o = ["{\\rtf1\\ansi"]; - var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; - var dense = Array.isArray(ws); - for(var R = r.s.r; R <= r.e.r; ++R) { - o.push("\\trowd\\trautofit1"); - for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); - o.push("\\pard\\intbl"); - for(C = r.s.c; C <= r.e.c; ++C) { - var coord = encode_cell({r:R,c:C}); - cell = dense ? (ws[R]||[])[C]: ws[coord]; - if(!cell || cell.v == null && (!cell.f || cell.F)) continue; - o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); - o.push("\\cell"); - } - o.push("\\pard\\intbl\\row"); + last_index = rtfre.lastIndex; } - return o.join("") + "}"; - } + if(C > range.e.c) range.e.c = C; + }); + ws['!ref'] = encode_range(range); + return wb; +} - return { - to_workbook: rtf_to_workbook, - to_sheet: rtf_to_sheet, - from_sheet: sheet_to_rtf - }; -})(); +/* TODO: standardize sheet names as titles for tables */ +function sheet_to_rtf(ws/*:Worksheet*//*::, opts*/)/*:string*/ { + var o = ["{\\rtf1\\ansi"]; + var r = safe_decode_range(ws['!ref']), cell/*:Cell*/; + var dense = Array.isArray(ws); + for(var R = r.s.r; R <= r.e.r; ++R) { + o.push("\\trowd\\trautofit1"); + for(var C = r.s.c; C <= r.e.c; ++C) o.push("\\cellx" + (C+1)); + o.push("\\pard\\intbl"); + for(C = r.s.c; C <= r.e.c; ++C) { + var coord = encode_cell({r:R,c:C}); + cell = dense ? (ws[R]||[])[C]: ws[coord]; + if(!cell || cell.v == null && (!cell.f || cell.F)) continue; + o.push(" " + (cell.w || (format_cell(cell), cell.w)).replace(/[\r\n]/g, "\\par ")); + o.push("\\cell"); + } + o.push("\\pard\\intbl\\row"); + } + return o.join("") + "}"; +} function hex2RGB(h) { var o = h.slice(h[0]==="#"?1:0).slice(0,6); return [parseInt(o.slice(0,2),16),parseInt(o.slice(2,4),16),parseInt(o.slice(4,6),16)]; @@ -24940,7 +24936,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { } break; case 0x03: case 0x83: case 0x8B: case 0x8C: return DBF.to_workbook(d, o); - case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return RTF.to_workbook(d, o); break; + case 0x7B: if(n[1] === 0x5C && n[2] === 0x72 && n[3] === 0x74) return rtf_to_workbook(d, o); break; case 0x0A: case 0x0D: case 0x20: return read_plaintext_raw(d, o); case 0x89: if(n[1] === 0x50 && n[2] === 0x4E && n[3] === 0x47) throw new Error("PNG Image File is not a spreadsheet"); break; case 0x08: if(n[1] === 0xE7) throw new Error("Unsupported Multiplan 1.x file!"); break; @@ -25101,7 +25097,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) { case 'dif': return write_string_type(DIF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'dbf': return write_binary_type(DBF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'prn': return write_string_type(PRN.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); - case 'rtf': return write_string_type(RTF.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); + case 'rtf': return write_string_type(sheet_to_rtf(wb.Sheets[wb.SheetNames[idx]], o), o); case 'eth': return write_string_type(ETH.from_sheet(wb.Sheets[wb.SheetNames[idx]], o), o); case 'fods': return write_string_type(write_ods(wb, o), o); case 'wk1': return write_binary_type(WK_.sheet_to_wk1(wb.Sheets[wb.SheetNames[idx]], o), o);