This commit is contained in:
SheetJS 2023-05-30 02:41:09 -04:00
parent 820cbdfada
commit 686b5c55af
9 changed files with 734 additions and 206 deletions

@ -16,8 +16,8 @@ using platform-specific APIs.
## Dense Mode
The `dense` option (supported in `read`, `readFile` and `aoa_to_sheet`) creates
worksheet objects that use arrays of arrays under the hood:
`read`, `readFile` and `aoa_to_sheet` accept the `dense` option. When enabled,
the methods create worksheet objects that store cells in arrays of arrays:
```js
var dense_wb = XLSX.read(ab, {dense: true});
@ -55,7 +55,9 @@ take the same arguments as the normal write functions:
<details><summary><b>Historical Note</b> (click to show)</summary>
NodeJS push streams were introduced in 2012.
NodeJS push streams were introduced in 2012. The text streaming methods `to_csv`
and `to_html` are supported in NodeJS v0.10 and later while the object streaming
method `to_json` is supported in NodeJS v0.12 and later.
The first streaming write function, `to_csv`, was introduced in April 2017. It
used and still uses the same NodeJS streaming API.
@ -68,14 +70,14 @@ For maximal compatibility, the library uses NodeJS push streams.
### NodeJS
:::note
In a CommonJS context, NodeJS Streams and `fs` immediately work with SheetJS:
```js
const XLSX = require("xlsx"); // "just works"
```
:::warning ECMAScript Module Machinations
In NodeJS ESM, the dependency must be loaded manually:
```js
@ -98,44 +100,214 @@ XLSX.set_fs(fs); // manually load fs helpers
:::
**`XLSX.stream.to_csv`**
This example reads a worksheet passed as an argument to the script, pulls the
first worksheet, converts to CSV and writes to `out.csv`:
first worksheet, converts to CSV and writes to `SheetJSNodeJStream.csv`:
```js
var XLSX = require("xlsx");
var workbook = XLSX.readFile(process.argv[2]);
var worksheet = workbook.Sheets[workbook.SheetNames[0]];
// highlight-next-line
var stream = XLSX.stream.to_csv(worksheet);
var XLSX = require("xlsx"), fs = require("fs");
var wb = XLSX.readFile(process.argv[2]);
var ws = wb.Sheets[wb.SheetNames[0]];
var ostream = fs.createWriteStream("SheetJSNodeJStream.csv");
var output_file_name = "out.csv";
// highlight-next-line
stream.pipe(fs.createWriteStream(output_file_name));
XLSX.stream.to_csv(ws).pipe(ostream);
```
**`XLSX.stream.to_json`**
`stream.to_json` uses Object-mode streams. A `Transform` stream can be used to
generate a normal stream for streaming to a file or the screen:
```js
var XLSX = require("xlsx");
var workbook = XLSX.readFile(process.argv[2], {dense: true});
var worksheet = workbook.Sheets[workbook.SheetNames[0]];
/* to_json returns an object-mode stream */
// highlight-next-line
var stream = XLSX.stream.to_json(worksheet, {raw:true});
var XLSX = require("xlsx"), Transform = require("stream").Transform;
var wb = XLSX.readFile(process.argv[2], {dense: true});
var ws = wb.Sheets[wb.SheetNames[0]];
/* this Transform stream converts JS objects to text and prints to screen */
/* this Transform stream converts JS objects to text */
var conv = new Transform({writableObjectMode:true});
conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); };
conv.pipe(process.stdout);
/* pipe `to_json` -> transformer -> standard output */
// highlight-next-line
stream.pipe(conv);
XLSX.stream.to_json(ws, {raw: true}).pipe(conv).pipe(process.stdout);
```
**Demo**
:::note
This demo was last tested in the following deployments:
| Node Version | Date | Node Status when tested |
|:-------------|:-----------|:------------------------|
| `0.12.18` | 2023-05-30 | End-of-Life |
| `4.9.1` | 2023-05-30 | End-of-Life |
| `6.17.1` | 2023-05-30 | End-of-Life |
| `8.17.0` | 2023-05-30 | End-of-Life |
| `10.24.1` | 2023-05-30 | End-of-Life |
| `12.22.12` | 2023-05-30 | End-of-Life |
| `14.21.3` | 2023-05-30 | End-of-Life |
| `16.20.0` | 2023-05-30 | Maintenance LTS |
| `18.16.0` | 2023-05-30 | Active LTS |
| `20.2.0` | 2023-05-30 | Current |
While streaming methods work in End-of-Life versions of NodeJS, production
deployments should upgrade to a Current or LTS version of NodeJS.
:::
1) Install the [NodeJS module](/docs/getting-started/installation/nodejs)
<CodeBlock language="bash">{`\
npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz`}
</CodeBlock>
2) Download [`SheetJSNodeJStream.js`](pathname:///stream/SheetJSNodeJStream.js):
```bash
curl -LO https://docs.sheetjs.com/stream/SheetJSNodeJStream.js
```
3) Download [the test file](https://sheetjs.com/pres.xlsx):
```bash
curl -LO https://sheetjs.com/pres.xlsx
```
4) Run the script:
```bash
node SheetJSNodeJStream.js pres.xlsx
```
<details><summary><b>Expected Output</b> (click to show)</summary>
The console will display a list of objects:
```json
{"Name":"Bill Clinton","Index":42}
{"Name":"GeorgeW Bush","Index":43}
{"Name":"Barack Obama","Index":44}
{"Name":"Donald Trump","Index":45}
{"Name":"Joseph Biden","Index":46}
```
The script will also generate `SheetJSNodeJStream.csv`:
```csv
Name,Index
Bill Clinton,42
GeorgeW Bush,43
Barack Obama,44
Donald Trump,45
Joseph Biden,46
```
</details>
### Browser
<details><summary><b>Live Demo</b> (click to show)</summary>
:::note
The live demo was last tested on 2023 May 30 in Chromium 113.
:::
NodeJS streaming APIs are not available in the browser. The following function
supplies a pseudo stream object compatible with the `to_csv` function:
```js
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
XLSX.stream.set_readable(() => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
}));
return XLSX.stream.to_csv(ws, opts);
}
// assuming `workbook` is a workbook, stream the first sheet
const ws = workbook.Sheets[workbook.SheetNames[0]];
const strm = sheet_to_csv_cb(ws, (csv)=>{ if(csv != null) console.log(csv); });
strm.resume();
```
#### Web Workers
For processing large files in the browser, it is strongly encouraged to use Web
Workers. The [Worker demo](/docs/demos/bigdata/worker#streaming-write) includes
examples using the File System Access API.
<details><summary><b>Web Worker Details</b> (click to show)</summary>
Typically, the file and stream processing occurs in the Web Worker. CSV rows
can be sent back to the main thread in the callback:
<CodeBlock language="js" title="worker.js">{`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
\n\
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
XLSX.stream.set_readable(() => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
}));
return XLSX.stream.to_csv(ws, opts);
}
\n\
/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
try {
postMessage({state: "fetching " + e.data.url});
/* Fetch file */
const res = await fetch(e.data.url);
const ab = await res.arrayBuffer();
\n\
/* Parse file */
postMessage({state: "parsing"});
const wb = XLSX.read(ab, {dense: true});
const ws = wb.Sheets[wb.SheetNames[0]];
\n\
/* Generate CSV rows */
postMessage({state: "csv"});
const strm = sheet_to_csv_cb(ws, (csv) => {
if(csv != null) postMessage({csv});
else postMessage({state: "done"});
});
strm.resume();
} catch(e) {
/* Pass the error message back */
postMessage({error: String(e.message || e) });
}
}, false);`}
</CodeBlock>
The main thread will receive messages with CSV rows for further processing:
```js title="main.js"
worker.onmessage = function(e) {
if(e.data.error) { console.error(e.data.error); /* show an error message */ }
else if(e.data.state) { console.info(e.data.state); /* current state */ }
else {
/* e.data.csv is the row generated by the stream */
console.log(e.data.csv);
}
};
```
</details>
### Live Demo
The following live demo fetches and parses a file in a Web Worker. The `to_csv`
streaming function is used to generate CSV rows and pass back to the main thread
@ -231,117 +403,13 @@ self.addEventListener('message', async(e) => {
}`}
</CodeBlock>
</details>
NodeJS streaming APIs are not available in the browser. The following function
supplies a pseudo stream object compatible with the `to_csv` function:
```js
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
XLSX.stream.set_readable(() => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
}));
return XLSX.stream.to_csv(ws, opts);
}
// assuming `workbook` is a workbook, stream the first sheet
const ws = workbook.Sheets[workbook.SheetNames[0]];
const strm = sheet_to_csv_cb(ws, (csv)=>{ if(csv != null) console.log(csv); });
strm.resume();
```
#### Web Workers
For processing large files in the browser, it is strongly encouraged to use Web
Workers. The [Worker demo](/docs/demos/bigdata/worker#streaming-write) includes
examples using the File System Access API.
Typically, the file and stream processing occurs in the Web Worker. CSV rows
can be sent back to the main thread in the callback:
<CodeBlock language="js" title="worker.js">{`\
/* load standalone script from CDN */
importScripts("https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js");
\n\
function sheet_to_csv_cb(ws, cb, opts, batch = 1000) {
XLSX.stream.set_readable(() => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
}));
return XLSX.stream.to_csv(ws, opts);
}
\n\
/* this callback will run once the main context sends a message */
self.addEventListener('message', async(e) => {
try {
postMessage({state: "fetching " + e.data.url});
/* Fetch file */
const res = await fetch(e.data.url);
const ab = await res.arrayBuffer();
\n\
/* Parse file */
postMessage({state: "parsing"});
const wb = XLSX.read(ab, {dense: true});
const ws = wb.Sheets[wb.SheetNames[0]];
\n\
/* Generate CSV rows */
postMessage({state: "csv"});
const strm = sheet_to_csv_cb(ws, (csv) => {
if(csv != null) postMessage({csv});
else postMessage({state: "done"});
});
strm.resume();
} catch(e) {
/* Pass the error message back */
postMessage({error: String(e.message || e) });
}
}, false);`}
</CodeBlock>
The main thread will receive messages with CSV rows for further processing:
```js
worker.onmessage = function(e) {
if(e.data.error) { console.error(e.data.error); /* show an error message */ }
else if(e.data.state) { console.info(e.data.state); /* current state */ }
else {
/* e.data.csv is the row generated by the stream */
console.log(e.data.csv);
}
};
```
### Deno
Deno does not support NodeJS streams in normal execution, so a wrapper is used.
This example fetches <https://sheetjs.com/pres.numbers> and prints CSV rows:
Deno does not support NodeJS streams in normal execution, so a wrapper is used:
<CodeBlock language="ts" title="sheet2csv.ts">{`\
<CodeBlock language="ts">{`\
// @deno-types="https://cdn.sheetjs.com/xlsx-${current}/package/types/index.d.ts"
import { stream, Sheet2CSVOpts, WorkSheet } from 'https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs';
\n\
interface Resumable { resume:()=>void; };
/* Generate row strings from a worksheet */
function sheet_to_csv_cb(ws: WorkSheet, cb:(d:string|null)=>void, opts: Sheet2CSVOpts = {}, batch = 1000): Resumable {
stream.set_readable(() => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d: any) { if(!this.__done) cb(d); if(d == null) this.__done = true; },
resume: function pump() { for(var i = 0; i < batch && !this.__done; ++i) this._read(); if(!this.__done) setTimeout(pump.bind(this), 0); }
}));
return stream.to_csv(ws, opts) as Resumable;
}
import { stream } from 'https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs';
\n\
/* Callback invoked on each row (string) and at the end (null) */
const csv_cb = (d:string|null) => {
@ -350,12 +418,37 @@ const csv_cb = (d:string|null) => {
Deno.stdout.write(new TextEncoder().encode(d));
};
\n\
/* Fetch https://sheetjs.com/pres.numbers, parse, and get first worksheet */
import { read } from 'https://cdn.sheetjs.com/xlsx-${current}/package/xlsx.mjs';
const ab = await (await fetch("https://sheetjs.com/pres.numbers")).arrayBuffer();
const wb = read(ab, { dense: true });
const ws = wb.Sheets[wb.SheetNames[0]];
/* Prepare \`Readable\` function */
const Readable = () => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d: any) {
if(!this.__done) csv_cb(d);
if(d == null) this.__done = true;
},
resume: function pump() {
for(var i = 0; i < 1000 && !this.__done; ++i) this._read();
if(!this.__done) setTimeout(pump.bind(this), 0);
}
})
/* Wire up */
stream.set_readable(Readable);
\n\
/* Create and start CSV stream */
sheet_to_csv_cb(ws, csv_cb).resume();`}
/* assuming \`workbook\` is a workbook, stream the first sheet */
const ws = workbook.Sheets[workbook.SheetNames[0]];
stream.to_csv(wb.Sheets[wb.SheetNames[0]]).resume();`}
</CodeBlock>
:::note
This demo was last tested on 2023 May 30
:::
[`SheetJSDenoStream.ts`](pathname:///stream/SheetJSDenoStream.ts) is a small
example script that downloads <https://sheetjs.com/pres.numbers> and prints
CSV row objects.
1) Run `deno run -A https://docs.sheetjs.com/stream/SheetJSDenoStream.ts`

@ -0,0 +1,283 @@
---
title: C++ + Hermes
pagination_prev: demos/bigdata/index
pagination_next: solutions/input
---
import current from '/version.js';
import CodeBlock from '@theme/CodeBlock';
Hermes is an embeddable JS engine written in C++. With some light shims, it can
run the standalone browser scripts.
The [Standalone scripts](/docs/getting-started/installation/standalone) can be
parsed and evaluated in a Hermes context.
:::caution Here be Dragons
The main target for Hermes is React Native. At the time of writing, there was
no official documentation for embedding the Hermes engine in C++ programs.
:::
## Integration Details
_Initialize Hermes_
The runtime can be initialized in one line:
```cpp
std::unique_ptr<facebook::jsi::Runtime> rt(facebook::hermes::makeHermesRuntime());
```
Hermes does not expose a `console` or `global` variable, but those can be
synthesized from JS code in the runtime:
```cpp
auto src = std::make_shared<facebook::jsi::StringBuffer>(
/* create global object */
"var global = (function(){ return this; }).call(null);"
/* create a fake `console` from the hermes `print` builtin */
"var console = { log: function(x) { print(x); } };"
);
auto js = rt->prepareJavaScript(src, std::string("<eval>"));
rt->evaluatePreparedJavaScript(js);
```
_Load SheetJS Scripts_
The main library can be loaded by reading the script from the file system and
evaluating in the Hermes context:
```cpp
static char *read_file(const char *filename, size_t *sz) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
long fsize; { fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); }
char *buf = (char *)malloc(fsize * sizeof(char));
*sz = fread((void *) buf, 1, fsize, f);
fclose(f);
return buf;
}
/* Unfortunately the library provides no C-friendly Buffer classes */
class CBuffer : public facebook::jsi::Buffer {
public:
CBuffer(const uint8_t *data, size_t size) : buf(data), sz(size) {}
size_t size() const override { return sz; }
const uint8_t *data() const override { return buf; }
private:
const uint8_t *buf;
size_t sz;
};
// ...
/* load SheetJS library */
size_t sz; char *xlsx_full_min_js = read_file("xlsx.full.min.js", &sz);
auto src = std::make_shared<CBuffer>(CBuffer((uint8_t *)xlsx_full_min_js, sz));
auto js = rt->prepareJavaScript(src, std::string("xlsx.full.min.js"));
rt->evaluatePreparedJavaScript(js);
```
To confirm the library is loaded, `XLSX.version` can be printed to the console:
```cpp
auto src = std::make_shared<facebook::jsi::StringBuffer>(
"console.log('SheetJS Library Version: ' + XLSX.version)"
);
auto js = rt->prepareJavaScript(src, std::string("<eval>"));
rt->evaluatePreparedJavaScript(js);
```
### Reading Files
Hermes supports `ArrayBuffer` but has no simple helper to read raw memory.
Libraries are expected to implement `MutableBuffer`:
```cpp
/* ArrayBuffer constructor expects MutableBuffer*/
class CMutableBuffer : public facebook::jsi::MutableBuffer {
public:
CMutableBuffer(uint8_t *data, size_t size) : buf(data), sz(size) {}
size_t size() const override { return sz; }
uint8_t *data() override { return buf; }
private:
uint8_t *buf;
size_t sz;
};
// ...
/* load payload as ArrayBuffer */
size_t sz; char *data = read_file(argv[1], &sz);
auto payload = std::make_shared<CMutableBuffer>(CMutableBuffer((uint8_t *)data, sz));
auto ab = facebook::jsi::ArrayBuffer(*rt, payload);
```
It is strongly recommended to create a stub function to perform the entire
workflow in JS code and pass the final result back to C++.
> _JS Stub function_
>
```js
function(buf) {
/* `buf` will be an ArrayBuffer */
var wb = XLSX.read(buf);
return XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]);
}
```
_C++ integration code_
```cpp
/* define stub function to read and convert first sheet to CSV */
auto src = std::make_shared<facebook::jsi::StringBuffer>(
"(function(buf) {"
"var wb = XLSX.read(buf);"
"return XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]);"
"})"
);
auto js = rt->prepareJavaScript(src, std::string("<eval>"));
auto func = rt->evaluatePreparedJavaScript(js);
/* call stub function and capture result */
auto csv = func.asObject(*rt).asFunction(*rt).call(*rt, ab);
/* interpret as utf8 and print to stdout */
std::string str = csv.getString(*rt).utf8(*rt);
```
## Complete Example
The "Integration Example" covers a traditional integration in a C++ application,
while the "CLI Test" demonstrates other concepts using the `hermes` CLI tool.
### Integration Example
:::note
This demo was last tested on 2023 May 30 against Hermes commit `869312f` on
a Intel Mac. `llvm-g++ -v` printed:
```
Apple clang version 14.0.0 (clang-1400.0.29.202)
Target: x86_64-apple-darwin21.6.0
```
:::
0) Make a project directory:
```bash
mkdir sheetjs-hermes
cd sheetjs-hermes
```
1) Download the [`Makefile`](pathname:///hermes/Makefile):
```bash
curl -LO https://docs.sheetjs.com/hermes/Makefile
```
2) Download [`sheetjs-hermes.cpp`](pathname:///hermes/sheetjs-hermes.cpp):
```bash
curl -LO https://docs.sheetjs.com/hermes/sheetjs-hermes.cpp
```
3) Build the library (this is the `init` target):
```bash
make init
```
4) Build the application:
```bash
make sheetjs-hermes
```
5) Download the standalone script and test file:
<ul>
<li><a href={`https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}>xlsx.full.min.js</a></li>
<li><a href="https://sheetjs.com/pres.numbers">pres.numbers</a></li>
</ul>
<CodeBlock language="bash">{`\
curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js
curl -LO https://sheetjs.com/pres.numbers`}
</CodeBlock>
6) Run the application:
```bash
./sheetjs-hermes pres.numbers
```
If successful, the program will print the library version number and the
contents of the first sheet as CSV rows.
### CLI Test
:::note
This demo was last tested on 2023 May 30 against Hermes version `0.11.0`.
:::
Due to limitations of the standalone binary, this demo will encode a test file
as a Base64 string and directly add it to an amalgamated script.
0) Install the `hermes` command line tool
1) Download the standalone script and test file:
<ul>
<li><a href={`https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}>xlsx.full.min.js</a></li>
<li><a href="https://sheetjs.com/pres.numbers">pres.numbers</a></li>
</ul>
<CodeBlock language="bash">{`\
curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js
curl -LO https://sheetjs.com/pres.numbers`}
</CodeBlock>
2) Bundle the test file and create `payload.js`:
```bash
node -e "fs.writeFileSync('payload.js', 'var payload = \"' + fs.readFileSync('pres.numbers').toString('base64') + '\";')"
```
3) Create support scripts:
- `global.js` creates a `global` variable and defines a fake `console`:
```js title="global.js"
var global = (function(){ return this; }).call(null);
var console = { log: function(x) { print(x); } };
```
- `hermes.js` will call `XLSX.read` and `XLSX.utils.sheet_to_csv`:
```js title="hermes.js"
var wb = XLSX.read(payload, {type:'base64'});
console.log(XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]));
```
4) Create the amalgamation `xlsx.hermes.js`:
```bash
cat global.js xlsx.full.min.js payload.js hermes.js > xlsx.hermes.js
```
The final script defines `global` before loading the standalone library. Once
ready, it will read the bundled test data and print the contents as CSV.
5) Run the script using the Hermes standalone binary:
```bash
hermes xlsx.hermes.js
```
If successful, the script will print CSV data from the test file

@ -104,75 +104,9 @@ This demo has been moved [to a dedicated page](/docs/demos/engines/goja).
### Hermes
Hermes is an embeddable JS engine for React Native. The library and binary
distributions include a command-line tool `hermes` for running JS scripts.
The simplest way to interact with the engine is to pass Base64 strings. The make
target builds a very simple payload with the data.
:::note
The official release includes the `hermes` standalone tool. While applications
should link against the official libraries, the standalone tool is useful for
verifying functionality.
:::
<details><summary><b>Complete Example</b> (click to show)</summary>
Due to limitations of the standalone binary, this demo will encode a test file
as a Base64 string and directly add it to an amalgamated script.
0) Install the `hermes` command line tool
1) Download the standalone script, shim, and test file:
<ul>
<li><a href={`https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}>xlsx.full.min.js</a></li>
<li><a href={`https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js`}>shim.min.js</a></li>
<li><a href="https://sheetjs.com/pres.numbers">pres.numbers</a></li>
</ul>
2) Bundle the test file and create `payload.js`:
```bash
node -e "fs.writeFileSync('payload.js', 'var payload = \"' + fs.readFileSync('pres.numbers').toString('base64') + '\";')"
```
3) Create support scripts:
- `global.js` creates a `global` variable and defines a fake `console`:
```js title="global.js"
var global = (function(){ return this; }).call(null);
var console = { log: function(x) { print(x); } };
```
- `hermes.js` will call `XLSX.read` and `XLSX.utils.sheet_to_csv`:
```js title="hermes.js"
/* sheetjs (C) 2013-present SheetJS -- https://sheetjs.com */
var wb = XLSX.read(payload, {type:'base64'});
console.log(XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]));
```
4) Create the amalgamation `xlsx.hermes.js`:
```bash
cat global.js xlsx.full.min.js payload.js hermes.js > xlsx.hermes.js
```
The final script defines `global` before loading the standalone library. Once
ready, it will read the bundled test data and print the contents as CSV.
5) Run the script using the Hermes standalone binary:
```bash
hermes xlsx.hermes.js
```
</details>
Hermes is an embeddable JS engine written in C++.
This demo has been moved [to a dedicated page](/docs/demos/engines/hermes).
### JavaScriptCore

@ -111,6 +111,8 @@ _Miscellaneous_
- `to_html(sheet, opts)` streams an HTML table incrementally
- `to_json(sheet, opts)` streams JS objects (object-mode stream)
Streaming write functions are described in the [Streaming Write demo](/docs/demos/bigdata/stream#streaming-write).
### ESM Helpers
Due to broad inconsistencies in ESM implementations, the `mjs` build does not

@ -34,7 +34,7 @@ const config = {
({
docs: {
sidebarPath: require.resolve('./sidebars.js'),
// editUrl: 'https://git.sheetjs.com/sheetjs/docs.sheetjs.com/src/branch/master/docz',
editUrl: 'https://git.sheetjs.com/sheetjs/docs.sheetjs.com/src/branch/master/docz',
},
//blog: {
// showReadingTime: true,

@ -0,0 +1,57 @@
# Note: The official Hermes documentation includes zero guidance on embedding.
# Tested against commit 869312f185b73a7d7678a28f5f3216052c667e90
.PHONY: doit
doit: sheetjs-hermes
curl -LO https://cdn.sheetjs.com/xlsx-latest/package/dist/xlsx.full.min.js
curl -LO https://sheetjs.com/pres.numbers
./sheetjs-hermes pres.numbers
.PHONY: clean
clean:
rm -f sheetjs-hermes
.PHONY: clean-all
clean-all: clean
rm -rf build_release hermes xlsx.full.min.js pres.numbers
# This sequence was cobbled together by linking against every artifact.
# Some of these lines are likely extraneous
sheetjs-hermes: sheetjs-hermes.cpp init
llvm-g++ $< -o $@ -std=gnu++17 \
-Ihermes/include/ -Ihermes/API/ -Ihermes/API/jsi -Ihermes/public \
-Lbuild_release/API/hermes/ -lhermesapi -lcompileJS -lsynthTrace -lsynthTraceParser -ltimerStats -ltraceInterpreter \
-Lbuild_release/external/dtoa/ -ldtoa \
-Lbuild_release/external/llvh/lib/Demangle/ -lLLVHDemangle \
-Lbuild_release/external/llvh/lib/Support/ -lLLVHSupport \
-Lbuild_release/jsi/ -ljsi \
-Lbuild_release/lib/ -lhermesFrontend \
-Lbuild_release/lib/ -lhermesOptimizer \
-Lbuild_release/lib/ADT -lhermesADT \
-Lbuild_release/lib/AST/ -lhermesAST \
-Lbuild_release/lib/AST2JS/ -lhermesAST2JS \
-Lbuild_release/lib/BCGen/ -lhermesBackend \
-Lbuild_release/lib/BCGen/HBC/ -lhermesHBCBackend \
-Lbuild_release/lib/CompilerDriver/ -lhermesCompilerDriver \
-Lbuild_release/lib/ConsoleHost/ -lhermesConsoleHost \
-Lbuild_release/lib/DependencyExtractor/ -lhermesDependencyExtractor \
-Lbuild_release/lib/FlowParser/ -lhermesFlowParser \
-Lbuild_release/lib/FrontEndDefs/ -lhermesFrontEndDefs \
-Lbuild_release/lib/Inst/ -lhermesInst \
-Lbuild_release/lib/InternalBytecode/ -lhermesInternalBytecode \
-Lbuild_release/lib/Parser/ -lhermesParser \
-Lbuild_release/lib/Platform/ -lhermesPlatform \
-Lbuild_release/lib/Platform/Intl/ -lhermesBCP47Parser \
-Lbuild_release/lib/Platform/Unicode/ -lhermesPlatformUnicode \
-Lbuild_release/lib/Regex/ -lhermesRegex \
-Lbuild_release/lib/SourceMap/ -lhermesSourceMap \
-Lbuild_release/lib/Support/ -lhermesSupport \
-Lbuild_release/lib/VM/ -lhermesVMRuntime \
-Lbuild_release/public/hermes/Public -lhermesPublic \
-Lhermes/external/flowparser/ -lflowparser-mac \
-framework CoreFoundation
.PHONY: init
init:
if [ ! -e hermes ]; then git clone https://github.com/facebook/hermes.git; cd hermes; git checkout 869312f185b73a7d7678a28f5f3216052c667e90; cd ..; fi
if [ ! -e build_release ]; then cmake -S hermes -B build_release -G Ninja -DCMAKE_BUILD_TYPE=Release; cmake --build ./build_release; fi

@ -0,0 +1,105 @@
/* sheetjs-hermes.cpp Copyright (c) SheetJS LLC. */
#include <iostream>
#include "hermes/hermes.h"
static char *read_file(const char *filename, size_t *sz) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
long fsize; { fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); }
char *buf = (char *)malloc(fsize * sizeof(char));
*sz = fread((void *) buf, 1, fsize, f);
fclose(f);
return buf;
}
/* Unfortunately the library provides no C-friendly Buffer classes */
class CBuffer : public facebook::jsi::Buffer {
public:
CBuffer(const uint8_t *data, size_t size) : buf(data), sz(size) {}
size_t size() const override { return sz; }
const uint8_t *data() const override { return buf; }
private:
const uint8_t *buf;
size_t sz;
};
/* ArrayBuffer constructor expects MutableBuffer*/
class CMutableBuffer : public facebook::jsi::MutableBuffer {
public:
CMutableBuffer(uint8_t *data, size_t size) : buf(data), sz(size) {}
size_t size() const override { return sz; }
uint8_t *data() override { return buf; }
private:
uint8_t *buf;
size_t sz;
};
int main(int argc, char **argv) {
std::unique_ptr<facebook::jsi::Runtime> rt(facebook::hermes::makeHermesRuntime());
/* setup */
try {
auto src = std::make_shared<facebook::jsi::StringBuffer>(
"var global = (function(){ return this; }).call(null);"
"var console = { log: function(x) { print(x); } };"
);
auto js = rt->prepareJavaScript(src, std::string("<eval>"));
rt->evaluatePreparedJavaScript(js);
} catch (const facebook::jsi::JSIException &e) {
std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << '\n';
return 1;
}
/* load SheetJS library */
try {
size_t sz; char *xlsx_full_min_js = read_file("xlsx.full.min.js", &sz);
auto src = std::make_shared<CBuffer>(CBuffer((uint8_t *)xlsx_full_min_js, sz));
auto js = rt->prepareJavaScript(src, std::string("xlsx.full.min.js"));
rt->evaluatePreparedJavaScript(js);
} catch (const facebook::jsi::JSIException &e) {
std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << '\n';
return 1;
}
/* print library version */
try {
auto src = std::make_shared<facebook::jsi::StringBuffer>(
"console.log('SheetJS Library Version: ' + XLSX.version)"
);
auto js = rt->prepareJavaScript(src, std::string("<eval>"));
rt->evaluatePreparedJavaScript(js);
} catch (const facebook::jsi::JSIException &e) {
std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << '\n';
return 1;
}
try {
/* load payload as ArrayBuffer */
size_t sz; char *data = read_file(argv[1], &sz);
auto payload = std::make_shared<CMutableBuffer>(CMutableBuffer((uint8_t *)data, sz));
auto ab = facebook::jsi::ArrayBuffer(*rt, payload);
/* define stub function to read and convert first sheet to CSV */
auto src = std::make_shared<facebook::jsi::StringBuffer>(
"(function(buf) {"
"var wb = XLSX.read(buf);"
"return XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]]);"
"})"
);
auto js = rt->prepareJavaScript(src, std::string("<eval>"));
auto func = rt->evaluatePreparedJavaScript(js);
/* call stub function and capture result */
auto csv = func.asObject(*rt).asFunction(*rt).call(*rt, ab);
/* interpret as utf8 and print to stdout */
std::string str = csv.getString(*rt).utf8(*rt);
std::cout << str << std::endl;
} catch (const facebook::jsi::JSIException &e) {
std::cerr << "JavaScript terminated via uncaught exception: " << e.what() << std::endl;
return 1;
}
return 0;
}

@ -0,0 +1,38 @@
#!/usr/bin/env -S deno run --allow-net
// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts"
import { read, stream, Sheet2CSVOpts, WorkSheet } from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs';
interface Resumable { resume:()=>void; };
/* Generate row strings from a worksheet */
function sheet_to_csv_cb(ws: WorkSheet, cb:(d:string|null)=>void, opts: Sheet2CSVOpts = {}, batch = 1000): Resumable {
stream.set_readable(() => ({
__done: false,
// this function will be assigned by the SheetJS stream methods
_read: function() { this.__done = true; },
// this function is called by the stream methods
push: function(d: string|null) {
if(!this.__done) cb(d);
if(d == null) this.__done = true;
},
resume: function pump() {
for(var i = 0; i < batch && !this.__done; ++i) this._read();
if(!this.__done) setTimeout(pump.bind(this), 0);
}
}));
return stream.to_csv(ws, opts) as Resumable;
}
/* Callback invoked on each row (string) and at the end (null) */
const csv_cb = (d:string|null) => {
if(d == null) return;
/* The strings include line endings, so raw write ops should be used */
Deno.stdout.write(new TextEncoder().encode(d));
};
/* Fetch https://sheetjs.com/pres.numbers, parse, and get first worksheet */
const ab = await (await fetch("https://sheetjs.com/pres.numbers")).arrayBuffer();
const wb = read(ab, { dense: true });
const ws = wb.Sheets[wb.SheetNames[0]];
/* Create and start CSV stream */
sheet_to_csv_cb(ws, csv_cb).resume();

@ -0,0 +1,16 @@
/* this script works in Node 0.12 (which predated ES6) so no modern syntax */
var XLSX = require("xlsx"), fs = require("fs"), stream = require("stream");
var wb = XLSX.readFile(process.argv[2]);
var ws = wb.Sheets[wb.SheetNames[0]];
/* this Transform stream converts JS objects to text */
var conv = new stream.Transform({writableObjectMode:true});
conv._transform = function(obj, e, cb){ cb(null, JSON.stringify(obj) + "\n"); };
/* to_json -> transformer -> standard output */
XLSX.stream.to_json(ws, {raw: true}).pipe(conv).pipe(process.stdout);
/* to_csv -> SheetJSNodeJStream.csv */
var ostream = fs.createWriteStream("SheetJSNodeJStream.csv");
XLSX.stream.to_csv(ws).pipe(ostream);