rusty-sheetjs

This commit is contained in:
SheetJS 2023-05-22 04:06:09 -04:00
parent 173ba31e0a
commit 58f45f9574
11 changed files with 394 additions and 7 deletions

@ -260,3 +260,72 @@ g++ -I. -Iinclude sheetjs.v8.cc -o sheetjs.v8 -fno-rtti -lv8_monolith \
If the program succeeded, the CSV contents will be printed to console and the
file `sheetjsw.xlsb` will be created. That file can be opened with Excel.
## Bindings
V8 is easily embeddable. Bindings exist for many languages.
### Rust
The `v8` crate provides binary builds and straightforward bindings. The Rust
code is similar to the C++ code.
Pulling data from an `ArrayBuffer` back into Rust involves an unsafe operation:
```rust
/* assuming JS code returns an ArrayBuffer, copy result to a Vec<u8> */
fn eval_code_ab(scope: &mut v8::HandleScope, code: &str) -> Vec<u8> {
let source = v8::String::new(scope, &code).unwrap();
let script = v8::Script::compile(scope, source, None).unwrap();
let result: v8::Local<v8::ArrayBuffer> = script.run(scope).unwrap().try_into().unwrap();
/* In C++, `Data` returns a pointer. Collecting data into Vec<u8> is unsafe */
unsafe { return std::slice::from_raw_parts_mut(
result.data().unwrap().cast::<u8>().as_ptr(),
result.byte_length()
).to_vec(); }
}
```
:::note
This demo was last tested on 2023 May 22 against `v8` crate version `0.71.2`
:::
1) Create a new project:
```bash
cargo new sheetjs-rustyv8
cd sheetjs-rustyv8
cargo run
```
2) Add the `v8` crate:
```bash
cargo add v8
cargo run
```
3) Download the [Standalone build](/docs/getting-started/installation/standalone):
<CodeBlock language="bash">{`\
curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}
</CodeBlock>
4) Download [`main.rs`](pathname:///v8/main.rs) and replace `src/main.rs`:
```bash
curl -L -o src/main.rs https://docs.sheetjs.com/v8/main.rs
```
5) Download [the test file](https://sheetjs.com/pres.numbers) and run:
```bash
curl -LO https://sheetjs.com/pres.numbers
cargo run pres.numbers
```
If the program succeeded, the CSV contents will be printed to console and the
file `sheetjsw.xlsb` will be created. That file can be opened with Excel.

@ -78,7 +78,7 @@ engine.put("bytes", Files.readAllBytes(Paths.get(args[0])));
engine.eval(
"function b2a(b) {" +
"var out = typeof Uint8Array == 'function' ? new Uint8Array(b.length) : new Array(b.length);" +
"for(var i = 0; i < out.length; i++) out[i] = (b[i] + 256) & 0xFF;" +
"for(var i = 0; i < out.length; i++) out[i] = b[i] & 0xFF;" +
"return out;" +
"}" +
"var u8a = b2a(bytes)"
@ -92,10 +92,16 @@ engine.eval("var wb = XLSX.read(u8a, {type: 'array'})");
:::note
This demo was last tested on 2023 March 27 using:
This demo was tested in the following deployments:
- OpenJDK 19.0.1 + Nashorn 15.4 standalone
- OpenJDK 11.0.18 + Official Nashorn
| OpenJDK | Nashorn | Date |
|:--------|:----------------|:-----------|
| 20.0.1 | 15.4 standalone | 2023-05-21 |
| 19.0.2 | 15.4 standalone | 2023-05-21 |
| 17.0.6 | 15.4 standalone | 2023-05-21 |
| 15.0.10 | 15.4 standalone | 2023-05-21 |
| 11.0.19 | Built-in | 2023-05-21 |
| 1.8.0 | Built-in | 2023-05-21 |
:::

@ -0,0 +1,151 @@
---
title: Rust + Boa
pagination_prev: demos/bigdata/index
pagination_next: solutions/input
---
import current from '/version.js';
import CodeBlock from '@theme/CodeBlock';
:::warning
In a production application, it is strongly recommended to use a binding for a
more performant engine like [`v8`](/docs/demos/engines/v8#rust)
:::
Boa is a pure-Rust JavaScript engine.
The [Standalone scripts](/docs/getting-started/installation/standalone) can be
parsed and evaluated in a Boa context.
## Integration Details
_Initialize Engine_
A JS context can be constructed in one line:
```rust
use boa_engine::Context;
/* initialize */
let context = &mut Context::default();
```
The following helper function evaluates strings as JS code:
```rust
use std::string::String;
use boa_engine::{Context, Source, JsError};
/* simple wrapper to evaluate code snippets */
fn eval_code(c: &mut Context, code: &str) -> Result<String, JsError> {
let src = Source::from_bytes(code);
match c.eval_script(src) {
Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); }
Err(e) => { return Err(e); }
};
}
```
_Load SheetJS Scripts_
Boa provides a special helper to read source code from a path:
```rust
use std::path::Path;
use std::string::String;
use boa_engine::{Context, Source, JsError};
/* simple wrapper to evaluate an entire script file */
fn eval_file(c: &mut Context, path: &str) -> Result<String, JsError> {
let src = Source::from_filepath(Path::new(path)).unwrap();
match c.eval_script(src) {
Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); }
Err(e) => { return Err(e); }
};
}
// ...
/* load library */
match eval_file(context, "./xlsx.full.min.js") {
Ok(_res) => {}
Err(e) => { return eprintln!("Uncaught {e}"); }
}
```
To confirm the library is loaded, `XLSX.version` can be inspected:
```rust
/* get version string */
match eval_code(context, "XLSX.version") {
Ok(res) => { println!( "SheetJS library version {}", res); }
Err(e) => { return eprintln!("Uncaught {e}"); }
}
```
### Reading Files
Boa supports `ArrayBuffer` natively. This snippet reads data from a file into
`Vec<u8>` and stores the data as an `ArrayBuffer` in global scope:
```rust
/* read file */
let data: Vec<u8> = fs::read("pres.xlsx").unwrap();
let array: JsArrayBuffer = JsArrayBuffer::from_byte_block(data, context).unwrap();
let attrs = Attribute::WRITABLE | Attribute::ENUMERABLE | Attribute::CONFIGURABLE;
context.register_global_property("buf", array, attrs);
/* parse with SheetJS */
match eval_code(context, "void (globalThis.wb = XLSX.read(buf))") {
Ok(_res) => { }
Err(e) => { return eprintln!("Uncaught {e}"); }
}
```
`wb` will be a variable in the JS environment that can be inspected using the
various SheetJS API functions.
## Complete Example
:::note
This demo was tested on 2023 May 22
:::
1) Create a new project:
```bash
cargo new sheetjs-rs
cd sheetjs-rs
cargo run
```
2) Add the `boa` crate from the Git repository:
```bash
cargo add --git https://github.com/boa-dev/boa boa_engine
```
3) Download the [Standalone build](/docs/getting-started/installation/standalone):
<CodeBlock language="bash">{`\
curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}
</CodeBlock>
4) Download [`main.rs`](pathname:///boa/main.rs) and replace `src/main.rs`:
```bash
curl -L -o src/main.rs https://docs.sheetjs.com/boa/main.rs
```
5) Download [the test file](https://sheetjs.com/pres.xlsx) and run:
```bash
curl -LO https://sheetjs.com/pres.xlsx
cargo run
```
After a short wait, the contents will be displayed in CSV form.

@ -10,7 +10,7 @@ import CodeBlock from '@theme/CodeBlock';
:::warning
In a production application, it is strongly recommended to use a binding for a
C engine like [`JavaScript::Duktape`](/docs/demos/engines/duktape)
C engine like [`JavaScript::Duktape`](/docs/demos/engines/duktape#perl)
:::

@ -52,11 +52,36 @@ other exports. APIs that accept pointers without length should be avoided.
Base64 strings are safe for passing between JS and native code, but they should
only be used when there is no safe way to pass `ArrayBuffer` or `Uint8Array`.
**Byte Conventions**
Java has no native concept of unsigned bytes. Values in a `byte[]` are clamped
to the range `-128 .. 127`. They need to be fixed within the JS engine.
Some engines support typed arrays. The `Uint8Array` constructor will fix values:
```js
var signed_data = [-48, -49, 17, -32, /* ... */]; // 0xD0 0xCF 0x11 0xE0 ...
var fixed_data = new Uint8Array(signed_data);
```
When `Uint8Array` is not supported, values can be fixed with bitwise operations:
```js
var signed_data = [-48, -49, 17, -32, /* ... */]; // 0xD0 0xCF 0x11 0xE0 ...
var fixed_data = new Array(signed_data.length);
for(var i = 0; i < signed_data.length; ++i) fixed_data[i] = signed_data[i] & 0xFF;
```
## Engines
This list is sorted in alphabetical order.
### Boa
Boa is an embeddable JS engine written in Rust.
This demo has been moved [to a dedicated page](/docs/demos/engines/boa).
### ChakraCore
ChakraCore is an embeddable JS engine written in C++.
@ -267,3 +292,12 @@ This demo has been moved [to a dedicated page](/docs/demos/engines/quickjs).
Rhino is an ES3+ engine in Java.
This demo has been moved [to a dedicated page](/docs/demos/engines/rhino).
### V8
V8 is an embeddable JS engine written in C++. It powers Chromium and Chrome,
NodeJS and Deno, Adobe UXP and other platforms.
This demo has been moved [to a dedicated page](/docs/demos/engines/v8).
The demo includes examples in C++ and Rust.

@ -142,7 +142,7 @@ const config = {
prism: {
theme: lightCodeTheme,
darkTheme: darkCodeTheme,
additionalLanguages: [ "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid" ],
additionalLanguages: [ "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust" ],
},
liveCodeBlock: {
playgroundPosition: 'top'

58
docz/static/boa/main.rs Normal file

@ -0,0 +1,58 @@
/*! sheetjs (C) SheetJS -- https://sheetjs.com */
use std::path::Path;
use std::string::String;
use std::fs;
use boa_engine::{Context, Source, JsError};
use boa_engine::object::builtins::JsArrayBuffer;
use boa_engine::property::Attribute;
fn eval_file(c: &mut Context, path: &str) -> Result<String, JsError> {
let src = Source::from_filepath(Path::new(path)).unwrap();
match c.eval_script(src) {
Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); }
Err(e) => { return Err(e); }
};
}
fn eval_code(c: &mut Context, code: &str) -> Result<String, JsError> {
let src = Source::from_bytes(code);
match c.eval_script(src) {
Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); }
Err(e) => { return Err(e); }
};
}
fn main() {
let context = &mut Context::default();
/* load library */
match eval_file(context, "./xlsx.full.min.js") {
Ok(_res) => {}
Err(e) => { return eprintln!("Uncaught {e}"); }
}
/* get version string */
match eval_code(context, "XLSX.version") {
Ok(res) => { println!( "SheetJS library version {}", res); }
Err(e) => { return eprintln!("Uncaught {e}"); }
}
/* read file */
let data: Vec<u8> = fs::read("pres.xlsx").unwrap();
let array: JsArrayBuffer = JsArrayBuffer::from_byte_block(data, context).unwrap();
let attrs = Attribute::WRITABLE | Attribute::ENUMERABLE | Attribute::CONFIGURABLE;
context.register_global_property("buf", array, attrs);
/* parse workbook and assign to global `wb` property */
match eval_code(context, "void (globalThis.wb = XLSX.read(buf))") {
Ok(_res) => { }
Err(e) => { return eprintln!("Uncaught {e}"); }
}
/* print CSV of first worksheet */
match eval_code(context, "XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]])") {
Ok(res) => { println!( "{}", res); }
Err(e) => { return eprintln!("Uncaught {e}"); }
}
}

@ -21,7 +21,7 @@ public class SheetJSNashorn {
/* convert signed byte array to JS Uint8Array or unsigned byte array */
engine.eval("function b2a(b) {" +
"var out = typeof Uint8Array == 'function' ? new Uint8Array(b.length) : new Array(b.length);" +
"for(var i = 0; i < out.length; i++) out[i] = (b[i] + 256) & 0xFF;" +
"for(var i = 0; i < out.length; i++) out[i] = b[i] & 0xFF;" +
"return out;" +
"}" +
"var u8a = b2a(bytes)");

69
docz/static/v8/main.rs Normal file

@ -0,0 +1,69 @@
/*! sheetjs (C) SheetJS -- https://sheetjs.com */
/* run code, get result as a Rust String */
fn eval_code(scope: &mut v8::HandleScope, code: &str) -> std::string::String {
let source = v8::String::new(scope, &code).unwrap();
let script = v8::Script::compile(scope, source, None).unwrap();
let result = script.run(scope).unwrap();
return result.to_string(scope).unwrap().to_rust_string_lossy(scope);
}
/* assuming JS code returns an ArrayBuffer, copy result to a Vec<u8> */
fn eval_code_ab(scope: &mut v8::HandleScope, code: &str) -> Vec<u8> {
let source = v8::String::new(scope, &code).unwrap();
let script = v8::Script::compile(scope, source, None).unwrap();
let result: v8::Local<v8::ArrayBuffer> = script.run(scope).unwrap().try_into().unwrap();
unsafe { return std::slice::from_raw_parts_mut(result.data().unwrap().cast::<u8>().as_ptr(), result.byte_length()).to_vec(); }
}
fn main() {
/* initialize */
let platform = v8::new_default_platform(0, false).make_shared();
v8::V8::initialize_platform(platform);
v8::V8::initialize();
let isolate = &mut v8::Isolate::new(Default::default());
let handle_scope = &mut v8::HandleScope::new(isolate);
let context = v8::Context::new(handle_scope);
let context_scope = &mut v8::ContextScope::new(handle_scope, context);
/* load library */
{
let script = std::fs::read_to_string("./xlsx.full.min.js").expect("Error reading xlsx.full.min.js");
let _result = eval_code(context_scope, &script);
}
/* get version string */
{
let result = eval_code(context_scope, "XLSX.version");
println!("SheetJS library version {}", result);
}
/* read file */
{
let path: String = std::env::args().collect::<Vec<_>>().into_iter().nth(1).unwrap().to_string();
let data: Vec<u8> = std::fs::read(path.clone()).unwrap();
let back: v8::UniqueRef<v8::BackingStore> = v8::ArrayBuffer::new_backing_store_from_vec(data);
let shared = back.make_shared();
let ab: v8::Local<v8::ArrayBuffer> = v8::ArrayBuffer::with_backing_store(context_scope, &shared);
let key = v8::String::new(context_scope, "buf").unwrap();
context.global(context_scope).set(context_scope, key.into(), ab.into());
println!("Loaded file {}", path);
}
/* parse workbook and assign to global `wb` property */
{
let _result = eval_code(context_scope, "void (globalThis.wb = XLSX.read(buf))");
}
/* print CSV of first worksheet */
{
let result = eval_code(context_scope, "XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]])");
println!("{}", result);
}
/* write sheetjsw.xlsb */
{
let result = eval_code_ab(context_scope, "XLSX.write(wb, {type:'array', bookType:'xlsb'})");
std::fs::write("sheetjsw.xlsb", result).unwrap();
}
}