diff --git a/docz/docs/03-demos/11-bigdata/01-stream.md b/docz/docs/03-demos/11-bigdata/01-stream.md index c0ff670..cb1fea0 100644 --- a/docz/docs/03-demos/11-bigdata/01-stream.md +++ b/docz/docs/03-demos/11-bigdata/01-stream.md @@ -16,8 +16,8 @@ using platform-specific APIs. ## Dense Mode -The `dense` option (supported in `read`, `readFile` and `aoa_to_sheet`) creates -worksheet objects that use arrays of arrays under the hood: +`read`, `readFile` and `aoa_to_sheet` accept the `dense` option. When enabled, +the methods create worksheet objects that store cells in arrays of arrays: ```js var dense_wb = XLSX.read(ab, {dense: true}); @@ -55,7 +55,9 @@ take the same arguments as the normal write functions:
Historical Note (click to show) -NodeJS push streams were introduced in 2012. +NodeJS push streams were introduced in 2012. The text streaming methods `to_csv` +and `to_html` are supported in NodeJS v0.10 and later while the object streaming +method `to_json` is supported in NodeJS v0.12 and later. The first streaming write function, `to_csv`, was introduced in April 2017. It used and still uses the same NodeJS streaming API. @@ -68,14 +70,14 @@ For maximal compatibility, the library uses NodeJS push streams. ### NodeJS -:::note - In a CommonJS context, NodeJS Streams and `fs` immediately work with SheetJS: ```js const XLSX = require("xlsx"); // "just works" ``` +:::warning ECMAScript Module Machinations + In NodeJS ESM, the dependency must be loaded manually: ```js @@ -98,44 +100,214 @@ XLSX.set_fs(fs); // manually load fs helpers ::: +**`XLSX.stream.to_csv`** + This example reads a worksheet passed as an argument to the script, pulls the -first worksheet, converts to CSV and writes to `out.csv`: +first worksheet, converts to CSV and writes to `SheetJSNodeJStream.csv`: ```js -var XLSX = require("xlsx"); -var workbook = XLSX.readFile(process.argv[2]); -var worksheet = workbook.Sheets[workbook.SheetNames[0]]; -// highlight-next-line -var stream = XLSX.stream.to_csv(worksheet); +var XLSX = require("xlsx"), fs = require("fs"); + +var wb = XLSX.readFile(process.argv[2]); +var ws = wb.Sheets[wb.SheetNames[0]]; +var ostream = fs.createWriteStream("SheetJSNodeJStream.csv"); -var output_file_name = "out.csv"; // highlight-next-line -stream.pipe(fs.createWriteStream(output_file_name)); +XLSX.stream.to_csv(ws).pipe(ostream); ``` +**`XLSX.stream.to_json`** + `stream.to_json` uses Object-mode streams. A `Transform` stream can be used to generate a normal stream for streaming to a file or the screen: ```js -var XLSX = require("xlsx"); -var workbook = XLSX.readFile(process.argv[2], {dense: true}); -var worksheet = workbook.Sheets[workbook.SheetNames[0]]; -/* to_json returns an object-mode stream */ -// highlight-next-line -var stream = XLSX.stream.to_json(worksheet, {raw:true}); +var XLSX = require("xlsx"), Transform = require("stream").Transform; +var wb = XLSX.readFile(process.argv[2], {dense: true}); +var ws = wb.Sheets[wb.SheetNames[0]]; -/* this Transform stream converts JS objects to text and prints to screen */ +/* this Transform stream converts JS objects to text */ var conv = new Transform({writableObjectMode:true}); conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); }; -conv.pipe(process.stdout); +/* pipe `to_json` -> transformer -> standard output */ // highlight-next-line -stream.pipe(conv); +XLSX.stream.to_json(ws, {raw: true}).pipe(conv).pipe(process.stdout); ``` +**Demo** + +:::note + +This demo was last tested in the following deployments: + +| Node Version | Date | Node Status when tested | +|:-------------|:-----------|:------------------------| +| `0.12.18` | 2023-05-30 | End-of-Life | +| `4.9.1` | 2023-05-30 | End-of-Life | +| `6.17.1` | 2023-05-30 | End-of-Life | +| `8.17.0` | 2023-05-30 | End-of-Life | +| `10.24.1` | 2023-05-30 | End-of-Life | +| `12.22.12` | 2023-05-30 | End-of-Life | +| `14.21.3` | 2023-05-30 | End-of-Life | +| `16.20.0` | 2023-05-30 | Maintenance LTS | +| `18.16.0` | 2023-05-30 | Active LTS | +| `20.2.0` | 2023-05-30 | Current | + +While streaming methods work in End-of-Life versions of NodeJS, production +deployments should upgrade to a Current or LTS version of NodeJS. + +::: + +1) Install the [NodeJS module](/docs/getting-started/installation/nodejs) + +{`\ +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz`} + + +2) Download [`SheetJSNodeJStream.js`](pathname:///stream/SheetJSNodeJStream.js): + +```bash +curl -LO https://docs.sheetjs.com/stream/SheetJSNodeJStream.js +``` + +3) Download [the test file](https://sheetjs.com/pres.xlsx): + +```bash +curl -LO https://sheetjs.com/pres.xlsx +``` + +4) Run the script: + +```bash +node SheetJSNodeJStream.js pres.xlsx +``` + +
Expected Output (click to show) + +The console will display a list of objects: + +```json +{"Name":"Bill Clinton","Index":42} +{"Name":"GeorgeW Bush","Index":43} +{"Name":"Barack Obama","Index":44} +{"Name":"Donald Trump","Index":45} +{"Name":"Joseph Biden","Index":46} +``` + +The script will also generate `SheetJSNodeJStream.csv`: + +```csv +Name,Index +Bill Clinton,42 +GeorgeW Bush,43 +Barack Obama,44 +Donald Trump,45 +Joseph Biden,46 +``` + +
+ ### Browser -
Live Demo (click to show) +:::note + +The live demo was last tested on 2023 May 30 in Chromium 113. + +::: + +NodeJS streaming APIs are not available in the browser. The following function +supplies a pseudo stream object compatible with the `to_csv` function: + +```js +function sheet_to_csv_cb(ws, cb, opts, batch = 1000) { + XLSX.stream.set_readable(() => ({ + __done: false, + // this function will be assigned by the SheetJS stream methods + _read: function() { this.__done = true; }, + // this function is called by the stream methods + push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; }, + resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); } + })); + return XLSX.stream.to_csv(ws, opts); +} + +// assuming `workbook` is a workbook, stream the first sheet +const ws = workbook.Sheets[workbook.SheetNames[0]]; +const strm = sheet_to_csv_cb(ws, (csv)=>{ if(csv != null) console.log(csv); }); +strm.resume(); +``` + +#### Web Workers + +For processing large files in the browser, it is strongly encouraged to use Web +Workers. The [Worker demo](/docs/demos/bigdata/worker#streaming-write) includes +examples using the File System Access API. + +
Web Worker Details (click to show) + +Typically, the file and stream processing occurs in the Web Worker. CSV rows +can be sent back to the main thread in the callback: + +{`\ +/* load standalone script from CDN */ +importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js"); +\n\ +function sheet_to_csv_cb(ws, cb, opts, batch = 1000) { + XLSX.stream.set_readable(() => ({ + __done: false, + // this function will be assigned by the SheetJS stream methods + _read: function() { this.__done = true; }, + // this function is called by the stream methods + push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; }, + resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); } + })); + return XLSX.stream.to_csv(ws, opts); +} +\n\ +/* this callback will run once the main context sends a message */ +self.addEventListener('message', async(e) => { + try { + postMessage({state: "fetching " + e.data.url}); + /* Fetch file */ + const res = await fetch(e.data.url); + const ab = await res.arrayBuffer(); +\n\ + /* Parse file */ + postMessage({state: "parsing"}); + const wb = XLSX.read(ab, {dense: true}); + const ws = wb.Sheets[wb.SheetNames[0]]; +\n\ + /* Generate CSV rows */ + postMessage({state: "csv"}); + const strm = sheet_to_csv_cb(ws, (csv) => { + if(csv != null) postMessage({csv}); + else postMessage({state: "done"}); + }); + strm.resume(); + } catch(e) { + /* Pass the error message back */ + postMessage({error: String(e.message || e) }); + } +}, false);`} + + +The main thread will receive messages with CSV rows for further processing: + +```js title="main.js" +worker.onmessage = function(e) { + if(e.data.error) { console.error(e.data.error); /* show an error message */ } + else if(e.data.state) { console.info(e.data.state); /* current state */ } + else { + /* e.data.csv is the row generated by the stream */ + console.log(e.data.csv); + } +}; +``` + +
+ +### Live Demo The following live demo fetches and parses a file in a Web Worker. The `to_csv` streaming function is used to generate CSV rows and pass back to the main thread @@ -231,117 +403,13 @@ self.addEventListener('message', async(e) => { }`} -
- -NodeJS streaming APIs are not available in the browser. The following function -supplies a pseudo stream object compatible with the `to_csv` function: - -```js -function sheet_to_csv_cb(ws, cb, opts, batch = 1000) { - XLSX.stream.set_readable(() => ({ - __done: false, - // this function will be assigned by the SheetJS stream methods - _read: function() { this.__done = true; }, - // this function is called by the stream methods - push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; }, - resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); } - })); - return XLSX.stream.to_csv(ws, opts); -} - -// assuming `workbook` is a workbook, stream the first sheet -const ws = workbook.Sheets[workbook.SheetNames[0]]; -const strm = sheet_to_csv_cb(ws, (csv)=>{ if(csv != null) console.log(csv); }); -strm.resume(); -``` - -#### Web Workers - -For processing large files in the browser, it is strongly encouraged to use Web -Workers. The [Worker demo](/docs/demos/bigdata/worker#streaming-write) includes -examples using the File System Access API. - -Typically, the file and stream processing occurs in the Web Worker. CSV rows -can be sent back to the main thread in the callback: - -{`\ -/* load standalone script from CDN */ -importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js"); -\n\ -function sheet_to_csv_cb(ws, cb, opts, batch = 1000) { - XLSX.stream.set_readable(() => ({ - __done: false, - // this function will be assigned by the SheetJS stream methods - _read: function() { this.__done = true; }, - // this function is called by the stream methods - push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; }, - resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); } - })); - return XLSX.stream.to_csv(ws, opts); -} -\n\ -/* this callback will run once the main context sends a message */ -self.addEventListener('message', async(e) => { - try { - postMessage({state: "fetching " + e.data.url}); - /* Fetch file */ - const res = await fetch(e.data.url); - const ab = await res.arrayBuffer(); -\n\ - /* Parse file */ - postMessage({state: "parsing"}); - const wb = XLSX.read(ab, {dense: true}); - const ws = wb.Sheets[wb.SheetNames[0]]; -\n\ - /* Generate CSV rows */ - postMessage({state: "csv"}); - const strm = sheet_to_csv_cb(ws, (csv) => { - if(csv != null) postMessage({csv}); - else postMessage({state: "done"}); - }); - strm.resume(); - } catch(e) { - /* Pass the error message back */ - postMessage({error: String(e.message || e) }); - } -}, false);`} - - -The main thread will receive messages with CSV rows for further processing: - -```js -worker.onmessage = function(e) { - if(e.data.error) { console.error(e.data.error); /* show an error message */ } - else if(e.data.state) { console.info(e.data.state); /* current state */ } - else { - /* e.data.csv is the row generated by the stream */ - console.log(e.data.csv); - } -}; -``` - ### Deno -Deno does not support NodeJS streams in normal execution, so a wrapper is used. -This example fetches and prints CSV rows: +Deno does not support NodeJS streams in normal execution, so a wrapper is used: -{`\ +{`\ // @deno-types="https://cdn.sheetjs.com/xlsx-${current}/package/types/index.d.ts" -import { stream, Sheet2CSVOpts, WorkSheet } from 'https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs'; -\n\ -interface Resumable { resume:()=>void; }; -/* Generate row strings from a worksheet */ -function sheet_to_csv_cb(ws: WorkSheet, cb:(d:string|null)=>void, opts: Sheet2CSVOpts = {}, batch = 1000): Resumable { - stream.set_readable(() => ({ - __done: false, - // this function will be assigned by the SheetJS stream methods - _read: function() { this.__done = true; }, - // this function is called by the stream methods - push: function(d: any) { if(!this.__done) cb(d); if(d == null) this.__done = true; }, - resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); } - })); - return stream.to_csv(ws, opts) as Resumable; -} +import { stream } from 'https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs'; \n\ /* Callback invoked on each row (string) and at the end (null) */ const csv_cb = (d:string|null) => { @@ -350,12 +418,37 @@ const csv_cb = (d:string|null) => { Deno.stdout.write(new TextEncoder().encode(d)); }; \n\ -/* Fetch https://sheetjs.com/pres.numbers, parse, and get first worksheet */ -import { read } from 'https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs'; -const ab = await (await fetch("https://sheetjs.com/pres.numbers")).arrayBuffer(); -const wb = read(ab, { dense: true }); -const ws = wb.Sheets[wb.SheetNames[0]]; +/* Prepare \`Readable\` function */ +const Readable = () => ({ + __done: false, + // this function will be assigned by the SheetJS stream methods + _read: function() { this.__done = true; }, + // this function is called by the stream methods + push: function(d: any) { + if(!this.__done) csv_cb(d); + if(d == null) this.__done = true; + }, + resume: function pump() { + for(var i = 0; i < 1000 && !this.__done; ++i) this._read(); + if(!this.__done) setTimeout(pump.bind(this), 0); + } +}) +/* Wire up */ +stream.set_readable(Readable); \n\ -/* Create and start CSV stream */ -sheet_to_csv_cb(ws, csv_cb).resume();`} +/* assuming \`workbook\` is a workbook, stream the first sheet */ +const ws = workbook.Sheets[workbook.SheetNames[0]]; +stream.to_csv(wb.Sheets[wb.SheetNames[0]]).resume();`} + +:::note + +This demo was last tested on 2023 May 30 + +::: + +[`SheetJSDenoStream.ts`](pathname:///stream/SheetJSDenoStream.ts) is a small +example script that downloads and prints +CSV row objects. + +1) Run `deno run -A https://docs.sheetjs.com/stream/SheetJSDenoStream.ts` diff --git a/docz/docs/03-demos/12-engines/09_hermes.md b/docz/docs/03-demos/12-engines/09_hermes.md new file mode 100644 index 0000000..9a7eb45 --- /dev/null +++ b/docz/docs/03-demos/12-engines/09_hermes.md @@ -0,0 +1,283 @@ +--- +title: C++ + Hermes +pagination_prev: demos/bigdata/index +pagination_next: solutions/input +--- + +import current from '/version.js'; +import CodeBlock from '@theme/CodeBlock'; + +Hermes is an embeddable JS engine written in C++. With some light shims, it can +run the standalone browser scripts. + +The [Standalone scripts](/docs/getting-started/installation/standalone) can be +parsed and evaluated in a Hermes context. + +:::caution Here be Dragons + +The main target for Hermes is React Native. At the time of writing, there was +no official documentation for embedding the Hermes engine in C++ programs. + +::: + +## Integration Details + +_Initialize Hermes_ + +The runtime can be initialized in one line: + +```cpp +std::unique_ptr rt(facebook::hermes::makeHermesRuntime()); +``` + +Hermes does not expose a `console` or `global` variable, but those can be +synthesized from JS code in the runtime: + +```cpp +auto src = std::make_shared( + /* create global object */ + "var global = (function(){ return this; }).call(null);" + /* create a fake `console` from the hermes `print` builtin */ + "var console = { log: function(x) { print(x); } };" +); +auto js = rt->prepareJavaScript(src, std::string("")); +rt->evaluatePreparedJavaScript(js); +``` + +_Load SheetJS Scripts_ + +The main library can be loaded by reading the script from the file system and +evaluating in the Hermes context: + +```cpp +static char *read_file(const char *filename, size_t *sz) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + long fsize; { fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); } + char *buf = (char *)malloc(fsize * sizeof(char)); + *sz = fread((void *) buf, 1, fsize, f); + fclose(f); + return buf; +} + +/* Unfortunately the library provides no C-friendly Buffer classes */ +class CBuffer : public facebook::jsi::Buffer { + public: + CBuffer(const uint8_t *data, size_t size) : buf(data), sz(size) {} + size_t size() const override { return sz; } + const uint8_t *data() const override { return buf; } + + private: + const uint8_t *buf; + size_t sz; +}; + +// ... + /* load SheetJS library */ + size_t sz; char *xlsx_full_min_js = read_file("xlsx.full.min.js", &sz); + auto src = std::make_shared(CBuffer((uint8_t *)xlsx_full_min_js, sz)); + auto js = rt->prepareJavaScript(src, std::string("xlsx.full.min.js")); + rt->evaluatePreparedJavaScript(js); +``` + +To confirm the library is loaded, `XLSX.version` can be printed to the console: + +```cpp +auto src = std::make_shared( + "console.log('SheetJS Library Version: ' + XLSX.version)" +); +auto js = rt->prepareJavaScript(src, std::string("")); +rt->evaluatePreparedJavaScript(js); +``` + +### Reading Files + +Hermes supports `ArrayBuffer` but has no simple helper to read raw memory. +Libraries are expected to implement `MutableBuffer`: + +```cpp +/* ArrayBuffer constructor expects MutableBuffer*/ +class CMutableBuffer : public facebook::jsi::MutableBuffer { + public: + CMutableBuffer(uint8_t *data, size_t size) : buf(data), sz(size) {} + size_t size() const override { return sz; } + uint8_t *data() override { return buf; } + + private: + uint8_t *buf; + size_t sz; +}; +// ... + /* load payload as ArrayBuffer */ + size_t sz; char *data = read_file(argv[1], &sz); + auto payload = std::make_shared(CMutableBuffer((uint8_t *)data, sz)); + auto ab = facebook::jsi::ArrayBuffer(*rt, payload); +``` + +It is strongly recommended to create a stub function to perform the entire +workflow in JS code and pass the final result back to C++. + +> _JS Stub function_ +> +```js +function(buf) { + /* `buf` will be an ArrayBuffer */ + var wb = XLSX.read(buf); + return XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]); +} +``` + +_C++ integration code_ + +```cpp + /* define stub function to read and convert first sheet to CSV */ + auto src = std::make_shared( + "(function(buf) {" + "var wb = XLSX.read(buf);" + "return XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]);" + "})" + ); + auto js = rt->prepareJavaScript(src, std::string("")); + auto func = rt->evaluatePreparedJavaScript(js); + + /* call stub function and capture result */ + auto csv = func.asObject(*rt).asFunction(*rt).call(*rt, ab); + + /* interpret as utf8 and print to stdout */ + std::string str = csv.getString(*rt).utf8(*rt); +``` + +## Complete Example + +The "Integration Example" covers a traditional integration in a C++ application, +while the "CLI Test" demonstrates other concepts using the `hermes` CLI tool. + +### Integration Example + +:::note + +This demo was last tested on 2023 May 30 against Hermes commit `869312f` on +a Intel Mac. `llvm-g++ -v` printed: + +``` +Apple clang version 14.0.0 (clang-1400.0.29.202) +Target: x86_64-apple-darwin21.6.0 +``` + +::: + +0) Make a project directory: + +```bash +mkdir sheetjs-hermes +cd sheetjs-hermes +``` + +1) Download the [`Makefile`](pathname:///hermes/Makefile): + +```bash +curl -LO https://docs.sheetjs.com/hermes/Makefile +``` + +2) Download [`sheetjs-hermes.cpp`](pathname:///hermes/sheetjs-hermes.cpp): + +```bash +curl -LO https://docs.sheetjs.com/hermes/sheetjs-hermes.cpp +``` + +3) Build the library (this is the `init` target): + +```bash +make init +``` + +4) Build the application: + +```bash +make sheetjs-hermes +``` + +5) Download the standalone script and test file: + + + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js +curl -LO https://sheetjs.com/pres.numbers`} + + +6) Run the application: + +```bash +./sheetjs-hermes pres.numbers +``` + +If successful, the program will print the library version number and the +contents of the first sheet as CSV rows. + +### CLI Test + +:::note + +This demo was last tested on 2023 May 30 against Hermes version `0.11.0`. + +::: + +Due to limitations of the standalone binary, this demo will encode a test file +as a Base64 string and directly add it to an amalgamated script. + +0) Install the `hermes` command line tool + +1) Download the standalone script and test file: + + + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js +curl -LO https://sheetjs.com/pres.numbers`} + + +2) Bundle the test file and create `payload.js`: + +```bash +node -e "fs.writeFileSync('payload.js', 'var payload = \"' + fs.readFileSync('pres.numbers').toString('base64') + '\";')" +``` + +3) Create support scripts: + +- `global.js` creates a `global` variable and defines a fake `console`: + +```js title="global.js" +var global = (function(){ return this; }).call(null); +var console = { log: function(x) { print(x); } }; +``` + +- `hermes.js` will call `XLSX.read` and `XLSX.utils.sheet_to_csv`: + +```js title="hermes.js" +var wb = XLSX.read(payload, {type:'base64'}); +console.log(XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]])); +``` + +4) Create the amalgamation `xlsx.hermes.js`: + +```bash +cat global.js xlsx.full.min.js payload.js hermes.js > xlsx.hermes.js +``` + +The final script defines `global` before loading the standalone library. Once +ready, it will read the bundled test data and print the contents as CSV. + +5) Run the script using the Hermes standalone binary: + +```bash +hermes xlsx.hermes.js +``` + +If successful, the script will print CSV data from the test file \ No newline at end of file diff --git a/docz/docs/03-demos/12-engines/index.md b/docz/docs/03-demos/12-engines/index.md index 0236b0f..c3695ab 100644 --- a/docz/docs/03-demos/12-engines/index.md +++ b/docz/docs/03-demos/12-engines/index.md @@ -104,75 +104,9 @@ This demo has been moved [to a dedicated page](/docs/demos/engines/goja). ### Hermes -Hermes is an embeddable JS engine for React Native. The library and binary -distributions include a command-line tool `hermes` for running JS scripts. - -The simplest way to interact with the engine is to pass Base64 strings. The make -target builds a very simple payload with the data. - -:::note - -The official release includes the `hermes` standalone tool. While applications -should link against the official libraries, the standalone tool is useful for -verifying functionality. - -::: - -
Complete Example (click to show) - -Due to limitations of the standalone binary, this demo will encode a test file -as a Base64 string and directly add it to an amalgamated script. - -0) Install the `hermes` command line tool - -1) Download the standalone script, shim, and test file: - - - -2) Bundle the test file and create `payload.js`: - -```bash -node -e "fs.writeFileSync('payload.js', 'var payload = \"' + fs.readFileSync('pres.numbers').toString('base64') + '\";')" -``` - -3) Create support scripts: - -- `global.js` creates a `global` variable and defines a fake `console`: - -```js title="global.js" -var global = (function(){ return this; }).call(null); -var console = { log: function(x) { print(x); } }; -``` - -- `hermes.js` will call `XLSX.read` and `XLSX.utils.sheet_to_csv`: - -```js title="hermes.js" -/* sheetjs (C) 2013-present SheetJS -- https://sheetjs.com */ -var wb = XLSX.read(payload, {type:'base64'}); -console.log(XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]])); -``` - -4) Create the amalgamation `xlsx.hermes.js`: - -```bash -cat global.js xlsx.full.min.js payload.js hermes.js > xlsx.hermes.js -``` - -The final script defines `global` before loading the standalone library. Once -ready, it will read the bundled test data and print the contents as CSV. - -5) Run the script using the Hermes standalone binary: - -```bash -hermes xlsx.hermes.js -``` - -
+Hermes is an embeddable JS engine written in C++. +This demo has been moved [to a dedicated page](/docs/demos/engines/hermes). ### JavaScriptCore diff --git a/docz/docs/08-api/index.md b/docz/docs/08-api/index.md index 6ff55ea..1aa775b 100644 --- a/docz/docs/08-api/index.md +++ b/docz/docs/08-api/index.md @@ -111,6 +111,8 @@ _Miscellaneous_ - `to_html(sheet, opts)` streams an HTML table incrementally - `to_json(sheet, opts)` streams JS objects (object-mode stream) +Streaming write functions are described in the [Streaming Write demo](/docs/demos/bigdata/stream#streaming-write). + ### ESM Helpers Due to broad inconsistencies in ESM implementations, the `mjs` build does not diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js index 4ec9308..b3e9df2 100644 --- a/docz/docusaurus.config.js +++ b/docz/docusaurus.config.js @@ -34,7 +34,7 @@ const config = { ({ docs: { sidebarPath: require.resolve('./sidebars.js'), - // editUrl: 'https://git.sheetjs.com/sheetjs/docs.sheetjs.com/src/branch/master/docz', + editUrl: 'https://git.sheetjs.com/sheetjs/docs.sheetjs.com/src/branch/master/docz', }, //blog: { // showReadingTime: true, diff --git a/docz/static/hermes/Makefile b/docz/static/hermes/Makefile new file mode 100644 index 0000000..32c899a --- /dev/null +++ b/docz/static/hermes/Makefile @@ -0,0 +1,57 @@ +# Note: The official Hermes documentation includes zero guidance on embedding. +# Tested against commit 869312f185b73a7d7678a28f5f3216052c667e90 + +.PHONY: doit +doit: sheetjs-hermes + curl -LO https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js + curl -LO https://sheetjs.com/pres.numbers + ./sheetjs-hermes pres.numbers + +.PHONY: clean +clean: + rm -f sheetjs-hermes + +.PHONY: clean-all +clean-all: clean + rm -rf build_release hermes xlsx.full.min.js pres.numbers + +# This sequence was cobbled together by linking against every artifact. +# Some of these lines are likely extraneous +sheetjs-hermes: sheetjs-hermes.cpp init + llvm-g++ $< -o $@ -std=gnu++17 \ + -Ihermes/include/ -Ihermes/API/ -Ihermes/API/jsi -Ihermes/public \ + -Lbuild_release/API/hermes/ -lhermesapi -lcompileJS -lsynthTrace -lsynthTraceParser -ltimerStats -ltraceInterpreter \ + -Lbuild_release/external/dtoa/ -ldtoa \ + -Lbuild_release/external/llvh/lib/Demangle/ -lLLVHDemangle \ + -Lbuild_release/external/llvh/lib/Support/ -lLLVHSupport \ + -Lbuild_release/jsi/ -ljsi \ + -Lbuild_release/lib/ -lhermesFrontend \ + -Lbuild_release/lib/ -lhermesOptimizer \ + -Lbuild_release/lib/ADT -lhermesADT \ + -Lbuild_release/lib/AST/ -lhermesAST \ + -Lbuild_release/lib/AST2JS/ -lhermesAST2JS \ + -Lbuild_release/lib/BCGen/ -lhermesBackend \ + -Lbuild_release/lib/BCGen/HBC/ -lhermesHBCBackend \ + -Lbuild_release/lib/CompilerDriver/ -lhermesCompilerDriver \ + -Lbuild_release/lib/ConsoleHost/ -lhermesConsoleHost \ + -Lbuild_release/lib/DependencyExtractor/ -lhermesDependencyExtractor \ + -Lbuild_release/lib/FlowParser/ -lhermesFlowParser \ + -Lbuild_release/lib/FrontEndDefs/ -lhermesFrontEndDefs \ + -Lbuild_release/lib/Inst/ -lhermesInst \ + -Lbuild_release/lib/InternalBytecode/ -lhermesInternalBytecode \ + -Lbuild_release/lib/Parser/ -lhermesParser \ + -Lbuild_release/lib/Platform/ -lhermesPlatform \ + -Lbuild_release/lib/Platform/Intl/ -lhermesBCP47Parser \ + -Lbuild_release/lib/Platform/Unicode/ -lhermesPlatformUnicode \ + -Lbuild_release/lib/Regex/ -lhermesRegex \ + -Lbuild_release/lib/SourceMap/ -lhermesSourceMap \ + -Lbuild_release/lib/Support/ -lhermesSupport \ + -Lbuild_release/lib/VM/ -lhermesVMRuntime \ + -Lbuild_release/public/hermes/Public -lhermesPublic \ + -Lhermes/external/flowparser/ -lflowparser-mac \ + -framework CoreFoundation + +.PHONY: init +init: + if [ ! -e hermes ]; then git clone https://github.com/facebook/hermes.git; cd hermes; git checkout 869312f185b73a7d7678a28f5f3216052c667e90; cd ..; fi + if [ ! -e build_release ]; then cmake -S hermes -B build_release -G Ninja -DCMAKE_BUILD_TYPE=Release; cmake --build ./build_release; fi diff --git a/docz/static/hermes/sheetjs-hermes.cpp b/docz/static/hermes/sheetjs-hermes.cpp new file mode 100644 index 0000000..1ae943b --- /dev/null +++ b/docz/static/hermes/sheetjs-hermes.cpp @@ -0,0 +1,105 @@ +/* sheetjs-hermes.cpp Copyright (c) SheetJS LLC. */ +#include +#include "hermes/hermes.h" + +static char *read_file(const char *filename, size_t *sz) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + long fsize; { fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); } + char *buf = (char *)malloc(fsize * sizeof(char)); + *sz = fread((void *) buf, 1, fsize, f); + fclose(f); + return buf; +} + +/* Unfortunately the library provides no C-friendly Buffer classes */ +class CBuffer : public facebook::jsi::Buffer { + public: + CBuffer(const uint8_t *data, size_t size) : buf(data), sz(size) {} + size_t size() const override { return sz; } + const uint8_t *data() const override { return buf; } + + private: + const uint8_t *buf; + size_t sz; +}; +/* ArrayBuffer constructor expects MutableBuffer*/ +class CMutableBuffer : public facebook::jsi::MutableBuffer { + public: + CMutableBuffer(uint8_t *data, size_t size) : buf(data), sz(size) {} + size_t size() const override { return sz; } + uint8_t *data() override { return buf; } + + private: + uint8_t *buf; + size_t sz; +}; + +int main(int argc, char **argv) { + std::unique_ptr rt(facebook::hermes::makeHermesRuntime()); + + /* setup */ + try { + auto src = std::make_shared( + "var global = (function(){ return this; }).call(null);" + "var console = { log: function(x) { print(x); } };" + ); + auto js = rt->prepareJavaScript(src, std::string("")); + rt->evaluatePreparedJavaScript(js); + } catch (const facebook::jsi::JSIException &e) { + std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << '\n'; + return 1; + } + + /* load SheetJS library */ + try { + size_t sz; char *xlsx_full_min_js = read_file("xlsx.full.min.js", &sz); + auto src = std::make_shared(CBuffer((uint8_t *)xlsx_full_min_js, sz)); + auto js = rt->prepareJavaScript(src, std::string("xlsx.full.min.js")); + rt->evaluatePreparedJavaScript(js); + } catch (const facebook::jsi::JSIException &e) { + std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << '\n'; + return 1; + } + + /* print library version */ + try { + auto src = std::make_shared( + "console.log('SheetJS Library Version: ' + XLSX.version)" + ); + auto js = rt->prepareJavaScript(src, std::string("")); + rt->evaluatePreparedJavaScript(js); + } catch (const facebook::jsi::JSIException &e) { + std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << '\n'; + return 1; + } + + try { + /* load payload as ArrayBuffer */ + size_t sz; char *data = read_file(argv[1], &sz); + auto payload = std::make_shared(CMutableBuffer((uint8_t *)data, sz)); + auto ab = facebook::jsi::ArrayBuffer(*rt, payload); + + /* define stub function to read and convert first sheet to CSV */ + auto src = std::make_shared( + "(function(buf) {" + "var wb = XLSX.read(buf);" + "return XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]);" + "})" + ); + auto js = rt->prepareJavaScript(src, std::string("")); + auto func = rt->evaluatePreparedJavaScript(js); + + /* call stub function and capture result */ + auto csv = func.asObject(*rt).asFunction(*rt).call(*rt, ab); + + /* interpret as utf8 and print to stdout */ + std::string str = csv.getString(*rt).utf8(*rt); + std::cout << str << std::endl; + } catch (const facebook::jsi::JSIException &e) { + std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << std::endl; + return 1; + } + + return 0; +} \ No newline at end of file diff --git a/docz/static/stream/SheetJSDenoStream.ts b/docz/static/stream/SheetJSDenoStream.ts new file mode 100644 index 0000000..7403911 --- /dev/null +++ b/docz/static/stream/SheetJSDenoStream.ts @@ -0,0 +1,38 @@ +#!/usr/bin/env -S deno run --allow-net +// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts" +import { read, stream, Sheet2CSVOpts, WorkSheet } from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs'; + +interface Resumable { resume:()=>void; }; +/* Generate row strings from a worksheet */ +function sheet_to_csv_cb(ws: WorkSheet, cb:(d:string|null)=>void, opts: Sheet2CSVOpts = {}, batch = 1000): Resumable { + stream.set_readable(() => ({ + __done: false, + // this function will be assigned by the SheetJS stream methods + _read: function() { this.__done = true; }, + // this function is called by the stream methods + push: function(d: string|null) { + if(!this.__done) cb(d); + if(d == null) this.__done = true; + }, + resume: function pump() { + for(var i = 0; i < batch && !this.__done; ++i) this._read(); + if(!this.__done) setTimeout(pump.bind(this), 0); + } + })); + return stream.to_csv(ws, opts) as Resumable; +} + +/* Callback invoked on each row (string) and at the end (null) */ +const csv_cb = (d:string|null) => { + if(d == null) return; + /* The strings include line endings, so raw write ops should be used */ + Deno.stdout.write(new TextEncoder().encode(d)); +}; + +/* Fetch https://sheetjs.com/pres.numbers, parse, and get first worksheet */ +const ab = await (await fetch("https://sheetjs.com/pres.numbers")).arrayBuffer(); +const wb = read(ab, { dense: true }); +const ws = wb.Sheets[wb.SheetNames[0]]; + +/* Create and start CSV stream */ +sheet_to_csv_cb(ws, csv_cb).resume(); diff --git a/docz/static/stream/SheetJSNodeJStream.js b/docz/static/stream/SheetJSNodeJStream.js new file mode 100644 index 0000000..a065d04 --- /dev/null +++ b/docz/static/stream/SheetJSNodeJStream.js @@ -0,0 +1,16 @@ +/* this script works in Node 0.12 (which predated ES6) so no modern syntax */ +var XLSX = require("xlsx"), fs = require("fs"), stream = require("stream"); + +var wb = XLSX.readFile(process.argv[2]); +var ws = wb.Sheets[wb.SheetNames[0]]; + +/* this Transform stream converts JS objects to text */ +var conv = new stream.Transform({writableObjectMode:true}); +conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); }; + +/* to_json -> transformer -> standard output */ +XLSX.stream.to_json(ws, {raw: true}).pipe(conv).pipe(process.stdout); + +/* to_csv -> SheetJSNodeJStream.csv */ +var ostream = fs.createWriteStream("SheetJSNodeJStream.csv"); +XLSX.stream.to_csv(ws).pipe(ostream);