From 58f45f957459c5e89cdd5a2b310342e0de898186 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Mon, 22 May 2023 04:06:09 -0400 Subject: [PATCH] rusty-sheetjs --- docz/docs/03-demos/12-engines/02_v8.md | 69 ++++++++ docz/docs/03-demos/12-engines/07_nashorn.md | 14 +- .../12-engines/{09_rb.md => 15_rb.md} | 0 .../12-engines/{10_chakra.md => 20_chakra.md} | 0 docz/docs/03-demos/12-engines/21_boa.md | 151 ++++++++++++++++++ .../12-engines/{11_perl.md => 22_perl.md} | 2 +- docz/docs/03-demos/12-engines/index.md | 34 ++++ docz/docusaurus.config.js | 2 +- docz/static/boa/main.rs | 58 +++++++ docz/static/nashorn/SheetJSNashorn.java | 2 +- docz/static/v8/main.rs | 69 ++++++++ 11 files changed, 394 insertions(+), 7 deletions(-) rename docz/docs/03-demos/12-engines/{09_rb.md => 15_rb.md} (100%) rename docz/docs/03-demos/12-engines/{10_chakra.md => 20_chakra.md} (100%) create mode 100644 docz/docs/03-demos/12-engines/21_boa.md rename docz/docs/03-demos/12-engines/{11_perl.md => 22_perl.md} (97%) create mode 100644 docz/static/boa/main.rs create mode 100644 docz/static/v8/main.rs diff --git a/docz/docs/03-demos/12-engines/02_v8.md b/docz/docs/03-demos/12-engines/02_v8.md index 2d3cfc4..c489c28 100644 --- a/docz/docs/03-demos/12-engines/02_v8.md +++ b/docz/docs/03-demos/12-engines/02_v8.md @@ -260,3 +260,72 @@ g++ -I. -Iinclude sheetjs.v8.cc -o sheetjs.v8 -fno-rtti -lv8_monolith \ If the program succeeded, the CSV contents will be printed to console and the file `sheetjsw.xlsb` will be created. That file can be opened with Excel. + +## Bindings + +V8 is easily embeddable. Bindings exist for many languages. + +### Rust + +The `v8` crate provides binary builds and straightforward bindings. The Rust +code is similar to the C++ code. + +Pulling data from an `ArrayBuffer` back into Rust involves an unsafe operation: + +```rust +/* assuming JS code returns an ArrayBuffer, copy result to a Vec */ +fn eval_code_ab(scope: &mut v8::HandleScope, code: &str) -> Vec { + let source = v8::String::new(scope, &code).unwrap(); + let script = v8::Script::compile(scope, source, None).unwrap(); + let result: v8::Local = script.run(scope).unwrap().try_into().unwrap(); + /* In C++, `Data` returns a pointer. Collecting data into Vec is unsafe */ + unsafe { return std::slice::from_raw_parts_mut( + result.data().unwrap().cast::().as_ptr(), + result.byte_length() + ).to_vec(); } +} + +``` + +:::note + +This demo was last tested on 2023 May 22 against `v8` crate version `0.71.2` + +::: + +1) Create a new project: + +```bash +cargo new sheetjs-rustyv8 +cd sheetjs-rustyv8 +cargo run +``` + +2) Add the `v8` crate: + +```bash +cargo add v8 +cargo run +``` + +3) Download the [Standalone build](/docs/getting-started/installation/standalone): + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`} + + +4) Download [`main.rs`](pathname:///v8/main.rs) and replace `src/main.rs`: + +```bash +curl -L -o src/main.rs https://docs.sheetjs.com/v8/main.rs +``` + +5) Download [the test file](https://sheetjs.com/pres.numbers) and run: + +```bash +curl -LO https://sheetjs.com/pres.numbers +cargo run pres.numbers +``` + +If the program succeeded, the CSV contents will be printed to console and the +file `sheetjsw.xlsb` will be created. That file can be opened with Excel. diff --git a/docz/docs/03-demos/12-engines/07_nashorn.md b/docz/docs/03-demos/12-engines/07_nashorn.md index b0f7380..0fd59fc 100644 --- a/docz/docs/03-demos/12-engines/07_nashorn.md +++ b/docz/docs/03-demos/12-engines/07_nashorn.md @@ -78,7 +78,7 @@ engine.put("bytes", Files.readAllBytes(Paths.get(args[0]))); engine.eval( "function b2a(b) {" + "var out = typeof Uint8Array == 'function' ? new Uint8Array(b.length) : new Array(b.length);" + - "for(var i = 0; i < out.length; i++) out[i] = (b[i] + 256) & 0xFF;" + + "for(var i = 0; i < out.length; i++) out[i] = b[i] & 0xFF;" + "return out;" + "}" + "var u8a = b2a(bytes)" @@ -92,10 +92,16 @@ engine.eval("var wb = XLSX.read(u8a, {type: 'array'})"); :::note -This demo was last tested on 2023 March 27 using: +This demo was tested in the following deployments: -- OpenJDK 19.0.1 + Nashorn 15.4 standalone -- OpenJDK 11.0.18 + Official Nashorn +| OpenJDK | Nashorn | Date | +|:--------|:----------------|:-----------| +| 20.0.1 | 15.4 standalone | 2023-05-21 | +| 19.0.2 | 15.4 standalone | 2023-05-21 | +| 17.0.6 | 15.4 standalone | 2023-05-21 | +| 15.0.10 | 15.4 standalone | 2023-05-21 | +| 11.0.19 | Built-in | 2023-05-21 | +| 1.8.0 | Built-in | 2023-05-21 | ::: diff --git a/docz/docs/03-demos/12-engines/09_rb.md b/docz/docs/03-demos/12-engines/15_rb.md similarity index 100% rename from docz/docs/03-demos/12-engines/09_rb.md rename to docz/docs/03-demos/12-engines/15_rb.md diff --git a/docz/docs/03-demos/12-engines/10_chakra.md b/docz/docs/03-demos/12-engines/20_chakra.md similarity index 100% rename from docz/docs/03-demos/12-engines/10_chakra.md rename to docz/docs/03-demos/12-engines/20_chakra.md diff --git a/docz/docs/03-demos/12-engines/21_boa.md b/docz/docs/03-demos/12-engines/21_boa.md new file mode 100644 index 0000000..e258a9f --- /dev/null +++ b/docz/docs/03-demos/12-engines/21_boa.md @@ -0,0 +1,151 @@ +--- +title: Rust + Boa +pagination_prev: demos/bigdata/index +pagination_next: solutions/input +--- + +import current from '/version.js'; +import CodeBlock from '@theme/CodeBlock'; + +:::warning + +In a production application, it is strongly recommended to use a binding for a +more performant engine like [`v8`](/docs/demos/engines/v8#rust) + +::: + +Boa is a pure-Rust JavaScript engine. + +The [Standalone scripts](/docs/getting-started/installation/standalone) can be +parsed and evaluated in a Boa context. + + +## Integration Details + +_Initialize Engine_ + +A JS context can be constructed in one line: + +```rust +use boa_engine::Context; + +/* initialize */ +let context = &mut Context::default(); +``` + +The following helper function evaluates strings as JS code: + +```rust +use std::string::String; +use boa_engine::{Context, Source, JsError}; + +/* simple wrapper to evaluate code snippets */ +fn eval_code(c: &mut Context, code: &str) -> Result { + let src = Source::from_bytes(code); + match c.eval_script(src) { + Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); } + Err(e) => { return Err(e); } + }; +} +``` + +_Load SheetJS Scripts_ + +Boa provides a special helper to read source code from a path: + +```rust +use std::path::Path; +use std::string::String; +use boa_engine::{Context, Source, JsError}; + +/* simple wrapper to evaluate an entire script file */ +fn eval_file(c: &mut Context, path: &str) -> Result { + let src = Source::from_filepath(Path::new(path)).unwrap(); + match c.eval_script(src) { + Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); } + Err(e) => { return Err(e); } + }; +} + +// ... + /* load library */ + match eval_file(context, "./xlsx.full.min.js") { + Ok(_res) => {} + Err(e) => { return eprintln!("Uncaught {e}"); } + } +``` + +To confirm the library is loaded, `XLSX.version` can be inspected: + +```rust + /* get version string */ + match eval_code(context, "XLSX.version") { + Ok(res) => { println!( "SheetJS library version {}", res); } + Err(e) => { return eprintln!("Uncaught {e}"); } + } +``` + +### Reading Files + +Boa supports `ArrayBuffer` natively. This snippet reads data from a file into +`Vec` and stores the data as an `ArrayBuffer` in global scope: + +```rust + /* read file */ + let data: Vec = fs::read("pres.xlsx").unwrap(); + let array: JsArrayBuffer = JsArrayBuffer::from_byte_block(data, context).unwrap(); + let attrs = Attribute::WRITABLE | Attribute::ENUMERABLE | Attribute::CONFIGURABLE; + context.register_global_property("buf", array, attrs); + + /* parse with SheetJS */ + match eval_code(context, "void (globalThis.wb = XLSX.read(buf))") { + Ok(_res) => { } + Err(e) => { return eprintln!("Uncaught {e}"); } + } +``` + +`wb` will be a variable in the JS environment that can be inspected using the +various SheetJS API functions. + +## Complete Example + +:::note + +This demo was tested on 2023 May 22 + +::: + +1) Create a new project: + +```bash +cargo new sheetjs-rs +cd sheetjs-rs +cargo run +``` + +2) Add the `boa` crate from the Git repository: + +```bash +cargo add --git https://github.com/boa-dev/boa boa_engine +``` + +3) Download the [Standalone build](/docs/getting-started/installation/standalone): + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`} + + +4) Download [`main.rs`](pathname:///boa/main.rs) and replace `src/main.rs`: + +```bash +curl -L -o src/main.rs https://docs.sheetjs.com/boa/main.rs +``` + +5) Download [the test file](https://sheetjs.com/pres.xlsx) and run: + +```bash +curl -LO https://sheetjs.com/pres.xlsx +cargo run +``` + +After a short wait, the contents will be displayed in CSV form. diff --git a/docz/docs/03-demos/12-engines/11_perl.md b/docz/docs/03-demos/12-engines/22_perl.md similarity index 97% rename from docz/docs/03-demos/12-engines/11_perl.md rename to docz/docs/03-demos/12-engines/22_perl.md index b95b855..0bbd365 100644 --- a/docz/docs/03-demos/12-engines/11_perl.md +++ b/docz/docs/03-demos/12-engines/22_perl.md @@ -10,7 +10,7 @@ import CodeBlock from '@theme/CodeBlock'; :::warning In a production application, it is strongly recommended to use a binding for a -C engine like [`JavaScript::Duktape`](/docs/demos/engines/duktape) +C engine like [`JavaScript::Duktape`](/docs/demos/engines/duktape#perl) ::: diff --git a/docz/docs/03-demos/12-engines/index.md b/docz/docs/03-demos/12-engines/index.md index 3601ba2..0236b0f 100644 --- a/docz/docs/03-demos/12-engines/index.md +++ b/docz/docs/03-demos/12-engines/index.md @@ -52,11 +52,36 @@ other exports. APIs that accept pointers without length should be avoided. Base64 strings are safe for passing between JS and native code, but they should only be used when there is no safe way to pass `ArrayBuffer` or `Uint8Array`. +**Byte Conventions** + +Java has no native concept of unsigned bytes. Values in a `byte[]` are clamped +to the range `-128 .. 127`. They need to be fixed within the JS engine. + +Some engines support typed arrays. The `Uint8Array` constructor will fix values: + +```js +var signed_data = [-48, -49, 17, -32, /* ... */]; // 0xD0 0xCF 0x11 0xE0 ... +var fixed_data = new Uint8Array(signed_data); +``` + +When `Uint8Array` is not supported, values can be fixed with bitwise operations: + +```js +var signed_data = [-48, -49, 17, -32, /* ... */]; // 0xD0 0xCF 0x11 0xE0 ... +var fixed_data = new Array(signed_data.length); +for(var i = 0; i < signed_data.length; ++i) fixed_data[i] = signed_data[i] & 0xFF; +``` ## Engines This list is sorted in alphabetical order. +### Boa + +Boa is an embeddable JS engine written in Rust. + +This demo has been moved [to a dedicated page](/docs/demos/engines/boa). + ### ChakraCore ChakraCore is an embeddable JS engine written in C++. @@ -267,3 +292,12 @@ This demo has been moved [to a dedicated page](/docs/demos/engines/quickjs). Rhino is an ES3+ engine in Java. This demo has been moved [to a dedicated page](/docs/demos/engines/rhino). + + +### V8 + +V8 is an embeddable JS engine written in C++. It powers Chromium and Chrome, +NodeJS and Deno, Adobe UXP and other platforms. + +This demo has been moved [to a dedicated page](/docs/demos/engines/v8). +The demo includes examples in C++ and Rust. diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js index 951decc..cf559c7 100644 --- a/docz/docusaurus.config.js +++ b/docz/docusaurus.config.js @@ -142,7 +142,7 @@ const config = { prism: { theme: lightCodeTheme, darkTheme: darkCodeTheme, - additionalLanguages: [ "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid" ], + additionalLanguages: [ "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust" ], }, liveCodeBlock: { playgroundPosition: 'top' diff --git a/docz/static/boa/main.rs b/docz/static/boa/main.rs new file mode 100644 index 0000000..e9ef60f --- /dev/null +++ b/docz/static/boa/main.rs @@ -0,0 +1,58 @@ +/*! sheetjs (C) SheetJS -- https://sheetjs.com */ +use std::path::Path; +use std::string::String; +use std::fs; + +use boa_engine::{Context, Source, JsError}; +use boa_engine::object::builtins::JsArrayBuffer; +use boa_engine::property::Attribute; + +fn eval_file(c: &mut Context, path: &str) -> Result { + let src = Source::from_filepath(Path::new(path)).unwrap(); + match c.eval_script(src) { + Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); } + Err(e) => { return Err(e); } + }; +} + +fn eval_code(c: &mut Context, code: &str) -> Result { + let src = Source::from_bytes(code); + match c.eval_script(src) { + Ok(res) => { return Ok(res.to_string(c).unwrap().to_std_string_escaped()); } + Err(e) => { return Err(e); } + }; +} + +fn main() { + let context = &mut Context::default(); + + /* load library */ + match eval_file(context, "./xlsx.full.min.js") { + Ok(_res) => {} + Err(e) => { return eprintln!("Uncaught {e}"); } + } + + /* get version string */ + match eval_code(context, "XLSX.version") { + Ok(res) => { println!( "SheetJS library version {}", res); } + Err(e) => { return eprintln!("Uncaught {e}"); } + } + + /* read file */ + let data: Vec = fs::read("pres.xlsx").unwrap(); + let array: JsArrayBuffer = JsArrayBuffer::from_byte_block(data, context).unwrap(); + let attrs = Attribute::WRITABLE | Attribute::ENUMERABLE | Attribute::CONFIGURABLE; + context.register_global_property("buf", array, attrs); + + /* parse workbook and assign to global `wb` property */ + match eval_code(context, "void (globalThis.wb = XLSX.read(buf))") { + Ok(_res) => { } + Err(e) => { return eprintln!("Uncaught {e}"); } + } + + /* print CSV of first worksheet */ + match eval_code(context, "XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]])") { + Ok(res) => { println!( "{}", res); } + Err(e) => { return eprintln!("Uncaught {e}"); } + } +} \ No newline at end of file diff --git a/docz/static/nashorn/SheetJSNashorn.java b/docz/static/nashorn/SheetJSNashorn.java index d9a9340..e0e1753 100644 --- a/docz/static/nashorn/SheetJSNashorn.java +++ b/docz/static/nashorn/SheetJSNashorn.java @@ -21,7 +21,7 @@ public class SheetJSNashorn { /* convert signed byte array to JS Uint8Array or unsigned byte array */ engine.eval("function b2a(b) {" + "var out = typeof Uint8Array == 'function' ? new Uint8Array(b.length) : new Array(b.length);" + - "for(var i = 0; i < out.length; i++) out[i] = (b[i] + 256) & 0xFF;" + + "for(var i = 0; i < out.length; i++) out[i] = b[i] & 0xFF;" + "return out;" + "}" + "var u8a = b2a(bytes)"); diff --git a/docz/static/v8/main.rs b/docz/static/v8/main.rs new file mode 100644 index 0000000..bd7ac47 --- /dev/null +++ b/docz/static/v8/main.rs @@ -0,0 +1,69 @@ +/*! sheetjs (C) SheetJS -- https://sheetjs.com */ +/* run code, get result as a Rust String */ +fn eval_code(scope: &mut v8::HandleScope, code: &str) -> std::string::String { + let source = v8::String::new(scope, &code).unwrap(); + let script = v8::Script::compile(scope, source, None).unwrap(); + let result = script.run(scope).unwrap(); + return result.to_string(scope).unwrap().to_rust_string_lossy(scope); +} + +/* assuming JS code returns an ArrayBuffer, copy result to a Vec */ +fn eval_code_ab(scope: &mut v8::HandleScope, code: &str) -> Vec { + let source = v8::String::new(scope, &code).unwrap(); + let script = v8::Script::compile(scope, source, None).unwrap(); + let result: v8::Local = script.run(scope).unwrap().try_into().unwrap(); + unsafe { return std::slice::from_raw_parts_mut(result.data().unwrap().cast::().as_ptr(), result.byte_length()).to_vec(); } +} + +fn main() { + /* initialize */ + let platform = v8::new_default_platform(0, false).make_shared(); + v8::V8::initialize_platform(platform); + v8::V8::initialize(); + + let isolate = &mut v8::Isolate::new(Default::default()); + let handle_scope = &mut v8::HandleScope::new(isolate); + let context = v8::Context::new(handle_scope); + let context_scope = &mut v8::ContextScope::new(handle_scope, context); + + /* load library */ + { + let script = std::fs::read_to_string("./xlsx.full.min.js").expect("Error reading xlsx.full.min.js"); + let _result = eval_code(context_scope, &script); + } + + /* get version string */ + { + let result = eval_code(context_scope, "XLSX.version"); + println!("SheetJS library version {}", result); + } + + /* read file */ + { + let path: String = std::env::args().collect::>().into_iter().nth(1).unwrap().to_string(); + let data: Vec = std::fs::read(path.clone()).unwrap(); + let back: v8::UniqueRef = v8::ArrayBuffer::new_backing_store_from_vec(data); + let shared = back.make_shared(); + let ab: v8::Local = v8::ArrayBuffer::with_backing_store(context_scope, &shared); + let key = v8::String::new(context_scope, "buf").unwrap(); + context.global(context_scope).set(context_scope, key.into(), ab.into()); + println!("Loaded file {}", path); + } + + /* parse workbook and assign to global `wb` property */ + { + let _result = eval_code(context_scope, "void (globalThis.wb = XLSX.read(buf))"); + } + + /* print CSV of first worksheet */ + { + let result = eval_code(context_scope, "XLSX.utils.sheet_to_csv(wb.Sheets[wb.SheetNames[0]])"); + println!("{}", result); + } + + /* write sheetjsw.xlsb */ + { + let result = eval_code_ab(context_scope, "XLSX.write(wb, {type:'array', bookType:'xlsb'})"); + std::fs::write("sheetjsw.xlsb", result).unwrap(); + } +} \ No newline at end of file