MuJS C demo

This commit is contained in:
SheetJS 2024-04-22 15:38:55 -04:00
parent b699cfaf9a
commit 01481c65cf
7 changed files with 596 additions and 77 deletions

@ -37,7 +37,7 @@
</Style>
</Styles>
<Worksheet ss:Name="Engines">
<Table ss:ExpandedColumnCount="8" ss:ExpandedRowCount="15" x:FullColumns="1"
<Table ss:ExpandedColumnCount="8" ss:ExpandedRowCount="16" x:FullColumns="1"
x:FullRows="1" ss:DefaultColumnWidth="65" ss:DefaultRowHeight="16">
<Column ss:Index="3" ss:Width="24"/>
<Column ss:Width="31"/>
@ -191,6 +191,16 @@
<Cell ss:StyleID="s16"><Data ss:Type="String">✔</Data></Cell>
<Cell ss:StyleID="s16"/>
</Row>
<Row>
<Cell><Data ss:Type="String">MuJS</Data></Cell>
<Cell><Data ss:Type="String">C</Data></Cell>
<Cell ss:StyleID="s16"><Data ss:Type="String">✔</Data></Cell>
<Cell ss:StyleID="s16"/>
<Cell ss:StyleID="s16"/>
<Cell ss:StyleID="s16"/>
<Cell ss:StyleID="s16"><Data ss:Type="String">✔</Data></Cell>
<Cell ss:StyleID="s16"/>
</Row>
</Table>
<WorksheetOptions xmlns="urn:schemas-microsoft-com:office:excel">
<PageSetup>

@ -1145,13 +1145,13 @@ The highlighted lines should be added to the iOS project `Info.plist` just
before the last `</dict>` tag:
```xml title="ios/SheetJSPres/Info.plist"
<key>UIViewControllerBasedStatusBarAppearance</key>
<false/>
<key>UIViewControllerBasedStatusBarAppearance</key>
<false/>
<!-- highlight-start -->
<key>UIFileSharingEnabled</key>
<true/>
<key>LSSupportsOpeningDocumentsInPlace</key>
<true/>
<key>UIFileSharingEnabled</key>
<true/>
<key>LSSupportsOpeningDocumentsInPlace</key>
<true/>
<!-- highlight-end -->
</dict>
</plist>

@ -37,19 +37,19 @@ string and return an object that represents `document`. An API method such as
```mermaid
flowchart LR
subgraph Synthetic DOM Operations
html(HTML\nstring)
subgraph Synthetic DOM Operations
html(HTML\nstring)
doc{{`document`\nDOM Object}}
end
subgraph SheetJS Operations
table{{DOM\nTable}}
wb(((SheetJS\nWorkbook)))
file(workbook\nfile)
end
end
subgraph SheetJS Operations
table{{DOM\nTable}}
wb(((SheetJS\nWorkbook)))
file(workbook\nfile)
end
html --> |Library\n\n| doc
doc --> |DOM\nAPI| table
table --> |`table_to_book`\n\n| wb
wb --> |`writeFile`\n\n| file
wb --> |`writeFile`\n\n| file
```
SheetJS methods use features that may be missing from some DOM implementations.
@ -196,9 +196,9 @@ tested version (`0.8.10`), the following patches were needed:
```js
Object.defineProperty(tbl.__proto__, "innerHTML", { get: function() {
var outerHTML = new XMLSerializer().serializeToString(this);
if(outerHTML.match(/</g).length == 1) return "";
return outerHTML.slice(0, outerHTML.lastIndexOf("</")).replace(/<[^"'>]*(("[^"]*"|'[^']*')[^"'>]*)*>/, "");
var outerHTML = new XMLSerializer().serializeToString(this);
if(outerHTML.match(/</g).length == 1) return "";
return outerHTML.slice(0, outerHTML.lastIndexOf("</")).replace(/<[^"'>]*(("[^"]*"|'[^']*')[^"'>]*)*>/, "");
}});
```

@ -241,48 +241,48 @@ document.addEventListener('DOMContentLoaded', () => {
```ts title="src/background/index.ts"
chrome.runtime.onInstalled.addListener(function() {
chrome.contextMenus.create({
type: "normal",
id: "sjsexport",
title: "Export Table to XLSX",
contexts: ["page", "selection"]
});
chrome.contextMenus.create({
type: "normal",
id: "sj5export",
title: "Export All Tables in Page",
contexts: ["page", "selection"]
});
chrome.contextMenus.onClicked.addListener(function(info/*, tab*/) {
var mode = "";
switch(info.menuItemId) {
case 'sjsexport': mode = "JS"; break;
case 'sj5export': mode = "J5"; break;
default: return;
}
chrome.tabs.query({active: true, currentWindow: true}, function(tabs){
chrome.tabs.sendMessage(tabs[0].id, {Sheet:mode}, sjsexport_cb);
});
});
chrome.contextMenus.create({
type: "normal",
id: "sjsexport",
title: "Export Table to XLSX",
contexts: ["page", "selection"]
});
chrome.contextMenus.create({
type: "normal",
id: "sj5export",
title: "Export All Tables in Page",
contexts: ["page", "selection"]
});
chrome.contextMenus.onClicked.addListener(function(info/*, tab*/) {
var mode = "";
switch(info.menuItemId) {
case 'sjsexport': mode = "JS"; break;
case 'sj5export': mode = "J5"; break;
default: return;
}
chrome.tabs.query({active: true, currentWindow: true}, function(tabs){
chrome.tabs.sendMessage(tabs[0].id, {Sheet:mode}, sjsexport_cb);
});
});
chrome.contextMenus.create({
id: "sjsabout",
title: "About",
contexts: ["browser_action"]
});
chrome.contextMenus.onClicked.addListener(function(info/*, tab*/) {
if(info.menuItemId !== "sjsabout") return;
chrome.tabs.create({url: "https://sheetjs.com/"});
});
chrome.contextMenus.create({
id: "sjsabout",
title: "About",
contexts: ["browser_action"]
});
chrome.contextMenus.onClicked.addListener(function(info/*, tab*/) {
if(info.menuItemId !== "sjsabout") return;
chrome.tabs.create({url: "https://sheetjs.com/"});
});
});
function sjsexport_cb(wb) {
if(!wb || !wb.SheetNames || !wb.Sheets) { return alert("Error in exporting table"); }
const b64 = XLSX.write(wb, {bookType: "xlsx", type: "base64"});
chrome.downloads.download({
url: `data:application/octet-stream;base64,${b64}`,
filename: `SheetJSTables.xlsx`
})
if(!wb || !wb.SheetNames || !wb.Sheets) { return alert("Error in exporting table"); }
const b64 = XLSX.write(wb, {bookType: "xlsx", type: "base64"});
chrome.downloads.download({
url: `data:application/octet-stream;base64,${b64}`,
filename: `SheetJSTables.xlsx`
})
}
```
@ -292,27 +292,27 @@ function sjsexport_cb(wb) {
import { utils } from 'xlsx';
var coords = [0,0];
document.addEventListener('mousedown', function(mouse) {
if(mouse && mouse.button == 2) coords = [mouse.clientX, mouse.clientY];
if(mouse && mouse.button == 2) coords = [mouse.clientX, mouse.clientY];
});
chrome.runtime.onMessage.addListener(function(msg, sender, cb) {
if(!msg || !msg['Sheet']) return;
if(msg.Sheet == "JS") {
var elt = document.elementFromPoint(coords[0], coords[1]);
while(elt != null) {
if(elt.tagName.toLowerCase() == "table") return cb(utils.table_to_book(elt));
elt = elt.parentElement;
}
} else if(msg.Sheet == "J5") {
var tables = document.getElementsByTagName("table");
var wb = utils.book_new();
for(var i = 0; i < tables.length; ++i) {
var ws = utils.table_to_sheet(tables[i]);
utils.book_append_sheet(wb, ws, "Table" + i);
}
return cb(wb);
}
cb(coords);
if(!msg || !msg['Sheet']) return;
if(msg.Sheet == "JS") {
var elt = document.elementFromPoint(coords[0], coords[1]);
while(elt != null) {
if(elt.tagName.toLowerCase() == "table") return cb(utils.table_to_book(elt));
elt = elt.parentElement;
}
} else if(msg.Sheet == "J5") {
var tables = document.getElementsByTagName("table");
var wb = utils.book_new();
for(var i = 0; i < tables.length; ++i) {
var ws = utils.table_to_sheet(tables[i]);
utils.book_append_sheet(wb, ws, "Table" + i);
}
return cb(wb);
}
cb(coords);
});
```

@ -359,7 +359,7 @@ For example, `duk_create_heap_default` is defined as follows:
```c
#define duk_create_heap_default() \
duk_create_heap(NULL, NULL, NULL, NULL, NULL)
duk_create_heap(NULL, NULL, NULL, NULL, NULL)
```
The `duk_create_heap_default` blingo will not be defined in the shared library.
@ -714,7 +714,7 @@ The Zig translator does not properly handle blingo `void` casts. For example,
```c title="duk_eval_string_noresult blingo"
#define duk_eval_string_noresult(ctx,src) \
((void) duk_eval_raw((ctx), (src), 0, 0 /*args*/ | DUK_COMPILE_EVAL | DUK_COMPILE_NOSOURCE | DUK_COMPILE_STRLEN | DUK_COMPILE_NORESULT | DUK_COMPILE_NOFILENAME))
((void) duk_eval_raw((ctx), (src), 0, 0 /*args*/ | DUK_COMPILE_EVAL | DUK_COMPILE_NOSOURCE | DUK_COMPILE_STRLEN | DUK_COMPILE_NORESULT | DUK_COMPILE_NOFILENAME))
```
The compiler will throw an error involving `anyopaque` (C `void`):

@ -0,0 +1,391 @@
---
title: Munging Data in MuJS
sidebar_label: C + MuJS
pagination_prev: demos/bigdata/index
pagination_next: solutions/input
---
import current from '/version.js';
import Tabs from '@theme/Tabs';
import TabItem from '@theme/TabItem';
import CodeBlock from '@theme/CodeBlock';
[MuJS](https://mujs.com/) is a C89-compatible embeddable JS engine.
[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing
data from spreadsheets.
This demo uses MuJS and SheetJS to pull data from a spreadsheet and print CSV
rows. We'll explore how to load SheetJS in a MuJS context and process
spreadsheets from a C program.
The ["Integration Example"](#integration-example) section includes a complete
command-line tool for reading data from files.
:::danger pass
The MuJS engine has a number of bugs that affect parsing in XLSX, XLML and other
XML and plaintext file formats. If software does not need to support legacy
systems or architecture, it is strongly recommended to use a modern engine such
as [Duktape](/docs/demos/engines/duktape).
:::
## Integration Details
:::info pass
Many MuJS functions are not documented. The explanation was verified against
version `1.3.4`.
:::
### Initialize MuJS
A MuJS engine instance is created with `js_newstate`:
```c
js_State *J = js_newstate(NULL, NULL, 0);
```
#### Error Messages
A special `report` callback should be used to display error messages. This
report function is used in official examples:
```c
static void report(js_State *J, const char *msg) { fprintf(stderr, "REPORT MSG: %s\n", msg); }
```
The `js_setreport` function attaches the reporter to the engine:
```c
js_setreport(J, report);
```
#### Global
MuJS does not expose a `global` variable. It can be obtained from a reference
to `this` in an unbound function. The following snippet will be evaluated:
```js
/* create global object */
var global = (function(){ return this; }).call(null);
```
In MuJS, `js_dostring` evaluates code stored in C strings:
```c
/* create `global` variable */
js_dostring(J, "var global = (function() { return this; })(null);");
```
#### Console
MuJS has no built-in method to print data. The official examples define the
following `print` method:
```c
static void jsB_print(js_State *J) {
int i = 1, top = js_gettop(J);
for (; i < top; ++i) {
const char *s = js_tostring(J, i);
if (i > 1) putchar(' ');
/* note: the official example uses `fputs`, but `puts` makes more sense */
puts(s);
}
putchar('\n');
js_pushundefined(J);
}
```
This function can be exposed in the JS engine by using `js_newcfunction` to add
the function to the engine and `js_setglobal` to bind to a name:
```c
js_newcfunction(J, jsB_print, "print", 0);
js_setglobal(J, "print");
```
After adding `print` to the engine, the following JS snippet will create a
`console` object with a `log` method:
```js
/* create a fake `console` from the hermes `print` builtin */
var console = { log: function(x) { print(x); } };
```
In MuJS, `js_dostring` evaluates code stored in C strings:
```C
js_dostring(J, "var console = { log: print };");
```
### Load SheetJS Scripts
[SheetJS Standalone scripts](/docs/getting-started/installation/standalone) can
be parsed and evaluated in a C context.
The shim and main library can be loaded by with the MuJS `js_dofile` method. It
reads scripts from the filesystem and evaluates in the MuJS context:
```c
/* load scripts */
js_dofile(J, "shim.min.js");
js_dofile(J, "xlsx.full.min.js");
```
### Reading Files
MuJS does not expose a method to pass raw byte arrays into the engine. Instead,
the raw data should be encoded in Base64.
#### Reading File Bytes
File bytes can be read using standard C library methods. The example defines a
method `read_file` with the following signature:
```c
/* Read data from filesystem
* `filename` - path to filename
* `sz` - pointer to size_t
* return value is a pointer to the start of the file data
* the length of the data will be written to `sz`
*/
char *read_file(const char *filename, size_t *sz);
```
<details>
<summary><b>File Reader Implementation</b> (click to show)</summary>
This function uses standard C API methods.
```c
/* -------------------- */
/* read file from filesystem */
static char *read_file(const char *filename, size_t *sz) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
long fsize; { fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); }
char *buf = (char *)malloc(fsize * sizeof(char));
*sz = fread((void *) buf, 1, fsize, f);
fclose(f);
return buf;
}
/* -------------------- */
```
</details>
The example program will accept an argument and read the specified file:
```c
/* read file */
size_t dlen; char *dbuf = read_file(argv[1], &dlen);
```
#### Base64 String
The example defines a method `Base64_encode` with the following signature:
```c
/* Encode data with Base64
* `dst` - start of output buffer
* `src` - start of input data
* `len` - number of bytes to encode
* return value is the number of bytes
*/
int Base64_encode(char *dst, const char *src, int len);
```
<details>
<summary><b>Base64 Encoder Implementation</b> (click to show)</summary>
The method mirrors [the TypeScript implementation](https://git.sheetjs.com/sheetjs/sheetjs/src/branch/master/modules/04_base64.ts):
```c
/* -------------------- */
/* base64 encoder */
const char Base64_map[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static int Base64_encode(char *dst, const char *src, int len) {
unsigned char c1 = 0, c2 = 0, c3 = 0;
char *p = dst;
size_t i = 0;
for(; i < len;) {
c1 = src[i++];
*p++ = Base64_map[(c1 >> 2)];
c2 = src[i++];
*p++ = Base64_map[((c1 & 3) << 4) | (c2 >> 4)];
c3 = src[i++];
*p++ = Base64_map[((c2 & 15) << 2) | (c3 >> 6)];
*p++ = Base64_map[c3 & 0x3F];
}
if(i < len) {
c1 = src[i++];
*p++ = Base64_map[(c1 >> 2)];
if(i == len) {
*p++ = Base64_map[(c1 & 3) << 4];
*p++ = '=';
} else {
c2 = src[i++];
*p++ = Base64_map[((c1 & 3) << 4) | (c2 >> 4)];
*p++ = Base64_map[(c2 & 15) << 2];
}
*p++ = '=';
}
*p++ = '\0';
return p - dst;
}
/* -------------------- */
```
</details>
Typically C code will read files and encode to Base64 strings. The intermediate
string length is approximately 33% larger than the original length (3 raw bytes
are mapped to 4 Base64 characters).
```c
/* base64 encode the file */
int sz = ((dlen + 2) / 3) * 4 + 1;
char *b64 = malloc(sz+1);
sz = Base64_encode(b64, dbuf, dlen);
```
#### Passing Strings
The Base64 string can be added to the engine using `js_pushlstring`. After
adding to the engine, `js_setglobal` can bind the variable to the name `buf`:
```c
/* create `buf` global from the data */
js_pushlstring(J, b64, sz);
js_setglobal(J, "buf");
```
#### SheetJS Operations
In this example, the goal is to pull the first worksheet and generate CSV rows.
`XLSX.read`[^1] parses the Base64 string and returns a SheetJS workbook object:
```js
/* parse file */
js_dostring(J, "var wb = XLSX.read(buf, {type: 'base64'});");
```
The `SheetNames` property[^2] is an array of the sheet names in the workbook.
The first sheet name can be obtained with the following JS snippet:
```js
var first_sheet_name = wb.SheetNames[0];
```
The `Sheets` property[^3] is an object whose keys are sheet names and whose
corresponding values are worksheet objects.
```js
var first_sheet = wb.Sheets[first_sheet_name];
```
The `sheet_to_csv` utility function[^4] generates a CSV string from the sheet:
```js
var csv = XLSX.utils.sheet_to_csv(first_sheet);
```
_C integration code_
In this example, the `console.log` method will print the generated CSV:
```c
/* print CSV from first worksheet */
js_dostring(J, "var ws = wb.Sheets[wb.SheetNames[0]]");
js_dostring(J, "console.log(XLSX.utils.sheet_to_csv(ws));");
```
## Integration Example
:::note Tested Deployments
This demo was tested in the following deployments:
| Architecture | Version | Date |
|:-------------|:--------|:-----------|
| `darwin-x64` | `1.3.4` | 2024-04-21 |
| `linux-x64` | `1.3.4` | 2024-04-21 |
:::
1) Make a project directory:
```bash
mkdir sheetjs-mu
cd sheetjs-mu
```
2) Build the MuJS shared library from source:
```bash
curl -LO https://mujs.com/downloads/mujs-1.3.4.zip
unzip mujs-1.3.4.zip
cd mujs-1.3.4
make release
cd ..
```
3) Copy the `mujs.h` header file and `libmujs.a` library to the project folder:
```bash
cp mujs-1.3.4/build/release/libmujs.a mujs-1.3.4/mujs.h .
```
4) Download [`SheetJSMu.c`](pathname:///mujs/SheetJSMu.c):
```bash
curl -LO https://docs.sheetjs.com/mujs/SheetJSMu.c
```
5) Build the application:
```bash
gcc -o SheetJSMu SheetJSMu.c -L. -lmujs -lm -lc -std=c89 -Wall
```
6) Download the SheetJS Standalone script, shim script and test file. Move all
three files to the project directory:
<ul>
<li><a href={`https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js`}>shim.min.js</a></li>
<li><a href={`https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js`}>xlsx.full.min.js</a></li>
<li><a href="https://sheetjs.com/pres.xlsb">pres.xlsb</a></li>
</ul>
<CodeBlock language="bash">{`\
curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js
curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js
curl -LO https://sheetjs.com/pres.xlsb`}
</CodeBlock>
7) Run the application:
```bash
./SheetJSMu pres.xlsb
```
If successful, the app will print the contents of the first sheet as CSV rows.
[^1]: See [`read` in "Reading Files"](/docs/api/parse-options)
[^2]: See ["Workbook Object"](/docs/csf/book)
[^3]: See ["Workbook Object"](/docs/csf/book)
[^4]: See [`sheet_to_csv` in "Utilities"](/docs/api/utilities/csv#csv-output)

@ -0,0 +1,118 @@
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include "mujs.h"
/* -------------------- */
/* these helper functions are from the official repl example */
static void jsB_print(js_State *J) {
int i = 1, top = js_gettop(J);
for (; i < top; ++i) {
const char *s = js_tostring(J, i);
if (i > 1) putchar(' ');
/* note: the official example uses `fputs`, but `puts` makes more sense */
puts(s);
}
putchar('\n');
js_pushundefined(J);
}
static void report(js_State *J, const char *msg) { fprintf(stderr, "REPORT MSG: %s\n", msg); }
/* -------------------- */
/* base64 encoder */
const char Base64_map[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
static int Base64_encode(char *dst, const char *src, int len) {
unsigned char c1 = 0, c2 = 0, c3 = 0;
char *p = dst;
size_t i = 0;
for(; i < len;) {
c1 = src[i++];
*p++ = Base64_map[(c1 >> 2)];
c2 = src[i++];
*p++ = Base64_map[((c1 & 3) << 4) | (c2 >> 4)];
c3 = src[i++];
*p++ = Base64_map[((c2 & 15) << 2) | (c3 >> 6)];
*p++ = Base64_map[c3 & 0x3F];
}
if(i < len) {
c1 = src[i++];
*p++ = Base64_map[(c1 >> 2)];
if(i == len) {
*p++ = Base64_map[(c1 & 3) << 4];
*p++ = '=';
} else {
c2 = src[i++];
*p++ = Base64_map[((c1 & 3) << 4) | (c2 >> 4)];
*p++ = Base64_map[(c2 & 15) << 2];
}
*p++ = '=';
}
*p++ = '\0';
return p - dst;
}
/* -------------------- */
/* read file from filesystem */
static char *read_file(const char *filename, size_t *sz) {
FILE *f = fopen(filename, "rb");
if(!f) return NULL;
long fsize; { fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); }
char *buf = (char *)malloc(fsize * sizeof(char));
*sz = fread((void *) buf, 1, fsize, f);
fclose(f);
return buf;
}
/* -------------------- */
int main(int argc, char **argv) {
/* initialize mujs */
js_State *J = js_newstate(NULL, NULL, 0);
js_setreport(J, report);
/* create `console.log` */
js_newcfunction(J, jsB_print, "print", 0);
js_setglobal(J, "print");
js_dostring(J, "var console = { log: print };");
/* create `global` variable */
js_dostring(J, "var global = (function() { return this; })(null);");
/* load scripts */
js_dofile(J, "shim.min.js");
js_dofile(J, "xlsx.full.min.js");
/* read file */
size_t dlen; char *dbuf = read_file(argv[1], &dlen);
/* base64 encode the file */
int sz = ((dlen + 2) / 3) * 4 + 1;
char *b64 = malloc(sz+1);
sz = Base64_encode(b64, dbuf, dlen);
/* create `buf` global from the data */
js_pushlstring(J, b64, sz);
js_setglobal(J, "buf");
/* parse file */
js_dostring(J, "var wb = XLSX.read(buf, {type: 'base64'});");
/* print CSV from first worksheet */
js_dostring(J, "var ws = wb.Sheets[wb.SheetNames[0]]");
js_dostring(J, "console.log(XLSX.utils.sheet_to_csv(ws));");
/* cleanup */
free(b64);
js_freestate(J);
return 0;
}