diff --git a/docz/docs/03-demos/06-desktop/09-cli.md b/docz/docs/03-demos/06-desktop/09-cli.md index b982f2c..8db5c89 100644 --- a/docz/docs/03-demos/06-desktop/09-cli.md +++ b/docz/docs/03-demos/06-desktop/09-cli.md @@ -44,7 +44,7 @@ This demo was tested in the following deployments: |:-------------|:--------|:------------|:-----------| | `darwin-x64` | `5.8.1` | `18.5.0` | 2023-05-08 | | `darwin-arm` | `5.8.1` | `18.5.0` | 2023-09-25 | -| `win10-x64` | `5.8.1` | `18.5.0` | 2023-05-08 | +| `win10-x64` | `5.8.1` | `18.5.0` | 2023-10-09 | | `win11-arm` | `5.8.1` | `18.5.0` | 2023-09-25 | | `linux-x64` | `5.8.1` | `18.5.0` | 2023-05-08 | | `linux-arm` | `5.8.1` | `18.5.0` | 2023-09-25 | @@ -55,7 +55,7 @@ This demo was tested in the following deployments: |:-------------|:-------------|:------------|:-----------| | `darwin-x64` | `4.0.0-rc.2` | `14.15.3` | 2023-05-08 | | `darwin-arm` | `4.0.0-rc.2` | `20.7.0` | 2023-09-25 | -| `win10-x64` | `4.0.0-rc.2` | `14.15.3` | 2023-05-08 | +| `win10-x64` | `4.0.0-rc.2` | `14.15.3` | 2023-10-09 | | `win11-arm` | `4.0.0-rc.2` | `18.17.1` | 2023-09-25 | | `linux-x64` | `4.0.0-rc.2` | `14.15.3` | 2023-05-08 | | `linux-arm` | `4.0.0-rc.2` | `20.7.0` | 2023-09-25 | @@ -66,7 +66,7 @@ This demo was tested in the following deployments: |:-------------|:--------|:------------|:-----------| | `darwin-x64` | `2.0.1` | `20.1.0` | 2023-05-08 | | `darwin-arm` | `2.1.1` | `20.7.0` | 2023-09-25 | -| `win10-x64` | `2.1.1` | `16.20.2` | 2023-08-27 | +| `win10-x64` | `2.1.2` | `16.20.2` | 2023-10-09 | | `linux-x64` | `2.0.1` | `20.1.0` | 2023-05-08 | | `linux-arm` | `2.1.1` | `20.7.0` | 2023-09-25 | @@ -161,7 +161,7 @@ npx boxednode@2.1.1 -s xlsx-cli.js -t xlsx-cli ```bash -npx boxednode@2.1.1 -s xlsx-cli.js -t xlsx-cli.exe -n 16.20.2 +npx boxednode@2.1.2 -s xlsx-cli.js -t xlsx-cli.exe -n 16.20.2 ``` :::info pass @@ -169,6 +169,8 @@ npx boxednode@2.1.1 -s xlsx-cli.js -t xlsx-cli.exe -n 16.20.2 The Windows 10 build requires Visual Studio with "Desktop development with C++" workload, Python 3, and NASM[^1]. +The build command should be run in "x64 Native Tools Command Prompt" + ::: :::caution pass @@ -186,11 +188,11 @@ Studio installer. :::caution pass -In the most recent Windows test against NodeJS `20.5.1`, the build failed due +In the most recent Windows test against NodeJS `20.8.0`, the build failed due to an issue in the OpenSSL dependency: ``` -...\node-v20.5.1\deps\openssl\openssl\crypto\info.c(176,16): error C2153: integer literals must have at least one digit [...\node-v20.5.1\deps\openssl\openssl.vcxproj] +...\node-v20.8.0\deps\openssl\openssl\crypto\cversion.c(75,33): error C2153: integer literals must have at least one digit [...\node-v20.8.0\deps\openssl\openssl.vcxproj] ``` SheetJS libraries are compatible with NodeJS versions dating back to `v0.8`. The @@ -202,8 +204,6 @@ was chosen since NodeJS `v18` upgraded the OpenSSL dependency. - - @@ -229,14 +229,14 @@ V8 engine. This demo uses the Rust integration to generate a command line tool. This demo was last tested in the following deployments: -| Architecture | V8 Version | Crate | Date | -|:-------------|:-------------|:---------|:-----------| -| `darwin-x64` | `11.4.183.2` | `0.71.2` | 2023-05-22 | -| `darwin-arm` | `11.4.183.2` | `0.71.2` | 2023-05-22 | -| `win10-x64` | `11.4.183.2` | `0.71.2` | 2023-05-23 | -| `win11-x64` | `11.7.439.6` | `0.75.1` | 2023-08-31 | -| `linux-x64` | `11.4.183.2` | `0.71.2` | 2023-05-23 | -| `linux-arm` | `11.7.439.6` | `0.75.1` | 2023-08-30 | +| Architecture | V8 Version | Crate | Date | +|:-------------|:--------------|:---------|:-----------| +| `darwin-x64` | `11.4.183.2` | `0.71.2` | 2023-05-22 | +| `darwin-arm` | `11.4.183.2` | `0.71.2` | 2023-05-22 | +| `win10-x64` | `11.8.172.13` | `0.79.2` | 2023-10-09 | +| `win11-x64` | `11.7.439.6` | `0.75.1` | 2023-08-31 | +| `linux-x64` | `11.4.183.2` | `0.71.2` | 2023-05-23 | +| `linux-arm` | `11.7.439.6` | `0.75.1` | 2023-08-30 | :::caution pass @@ -344,7 +344,7 @@ This demo was last tested in the following deployments: |:-------------|:---------|:-----------| | `darwin-x64` | `1.33.2` | 2023-05-08 | | `darwin-arm` | `1.34.1` | 2023-06-05 | -| `win10-x64` | `1.33.2` | 2023-05-08 | +| `win10-x64` | `1.37.1` | 2023-10-09 | | `win11-arm` | `1.37.0` | 2023-09-26 | | `linux-x64` | `1.33.2` | 2023-05-08 | | `linux-arm` | `1.36.3` | 2023-08-30 | diff --git a/docz/docs/03-demos/32-extensions/10-stata.md b/docz/docs/03-demos/32-extensions/10-stata.md new file mode 100644 index 0000000..6b7ea1c --- /dev/null +++ b/docz/docs/03-demos/32-extensions/10-stata.md @@ -0,0 +1,306 @@ +--- +title: Modern Spreadsheets in Stata +sidebar_label: Stata +pagination_prev: demos/cloud/index +pagination_next: demos/bigdata/index +--- + +import current from '/version.js'; +import CodeBlock from '@theme/CodeBlock'; + +export const b = {style: {color:"blue"}}; + +[Stata](https://www.stata.com/) is a statistical software package. It offers a +robust C-based extension system. + +[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing +data from spreadsheets. + +This demo uses SheetJS to pull data from a spreadsheet for further analysis +within Stata. We'll create a Stata native extension that loads the +[Duktape](/docs/demos/engines/duktape) JavaScript engine and uses the SheetJS +library to read data from spreadsheets and converts to a Stata-friendly format. + +```mermaid +flowchart LR + ofile[(workbook\nXLSB file)] + nfile[(clean file\nXLSX)] + data[[Stata\nVariables]] + ofile --> |Stata Extension\nSheetJS + Duktape| nfile + nfile --> |Stata command\nimport excel|data +``` + +The demo will read [a Numbers workbook](https://sheetjs.com/pres.numbers) and +generate variables for each column. A sample Stata session is shown below: + +![Stata commands](pathname:///stata/commands.png) + +:::note + +This demo was last tested by SheetJS users on 2023 October 09. + +::: + +:::info pass + +Stata has limited support for processing spreadsheets through the `import excel` +command[^1]. At the time of writing, it lacked support for XLSB, NUMBERS, and +other common spreadsheet formats. + +SheetJS libraries help fill the gap by normalizing spreadsheets to a form that +Stata can understand. + +::: + +## Integration Details + +The current recommendation involves a native plugin that reads arbitrary files +and generates clean XLSX files that Stata can import. + +The extension function ultimately pairs the SheetJS `read`[^2] and `write`[^3] +methods to read data from the old file and write a new file: + +```js +var wb = XLSX.read(original_file_data, {type: "buffer"}); +var new_file_data = XLSX.write(wb, {type: "array", bookType: "xlsx"}); +``` + +The extension function `cleanfile` will take one or two arguments: + +`plugin call cleanfile, "pres.numbers"` will generate `sheetjs.tmp.xlsx` from +the first argument (`"pres.numbers"`) and print instructions to load the file. + +`plugin call cleanfile, "pres.numbers" verbose` will additionally print CSV +contents of each worksheet in the workbook. + +```mermaid +flowchart LR + ofile{{File\nName}} + subgraph JS Operations + ojbuf[(Buffer\nFile Bytes)] + wb(((SheetJS\nWorkbook))) + njbuf[(Buffer\nXLSX bytes)] + end + obuf[(File\nbytes)] + nbuf[(New file\nbytes)] + nfile[(XLSX\nFile)] + ofile --> |C\nRead File| obuf + obuf --> |Duktape\nBuffer Ops| ojbuf + ojbuf --> |SheetJS\n`read`| wb + wb --> |SheetJS\n`write`| njbuf + njbuf --> |Duktape\nBuffer Ops| nbuf + nbuf --> |C\nWrite File| nfile +``` + +### C Extensions + +Stata C extensions are shared libraries or DLLs that use special Stata methods +for parsing arguments and returning values. + +Arguments are passed to the `stata_call` function in the DLL. + +`SF_display` and `SF_error` display text and error messages respectively. + +### Duktape JS Engine + +This demo uses the [Duktape JavaScript engine](/docs/demos/engines/duktape). The +SheetJS + Duktape demo covers engine integration details in more detail. + +The [SheetJS Standalone scripts](/docs/getting-started/installation/standalone) +can be loaded in Duktape by reading the source from the filesystem. + +## Complete Demo + +:::info pass + +This demo was tested in Windows x64. The path names and build commands will +differ in other platforms and operating systems. + +::: + +The [`cleanfile.c`](pathname:///stata/cleanfile.c) extension defines one plugin +function. It can be chained with `import excel`: + +```stata +program cleanfile, plugin +plugin call cleanfile, "pres.numbers" verbose +program drop cleanfile +import excel "sheetjs.tmp.xlsx", firstrow +``` + +### Create Plugin + +0) Ensure "Windows Subsystem for Linux" (WSL) and Visual Studio are installed. + +1) Open a new "x64 Native Tools Command Prompt" window and create a project +folder `c:\sheetjs-stata`: + +```powershell +cd c:\ +mkdir sheetjs-stata +cd sheetjs-stata +``` + +2) Enter WSL: + +```powershell +bash +``` + +3) Download [`stplugin.c`](https://www.stata.com/plugins/stplugin.c) and +[`stplugin.h`](https://www.stata.com/plugins/stplugin.h) from the Stata website: + +```bash +curl -LO https://www.stata.com/plugins/stplugin.c +curl -LO https://www.stata.com/plugins/stplugin.h +``` + +4) Still within WSL, install Duktape: + +```bash +curl -LO https://duktape.org/duktape-2.7.0.tar.xz +tar -xJf duktape-2.7.0.tar.xz +mv duktape-2.7.0/src/*.{c,h} . +``` + +5) Still within WSL, download the demo source +[`cleanfile.c`](https://docs.sheetjs.com/stata/cleanfile.c): + +```bash +curl -LO https://docs.sheetjs.com/stata/cleanfile.c +``` + +6) Exit WSL: + +```bash +exit +``` + +The window will return to the command prompt. + +7) Build the DLL: + +```powershell +cl /LD cleanfile.c stplugin.c duktape.c +``` + +### Install Plugin + +8) Copy the DLL to `cleanfile.plugin` in the Stata data directory. For example, +with a shared data directory `c:\data`: + +```powershell +mkdir c:\data +copy cleanfile.dll c:\data\cleanfile.plugin +``` + +### Download SheetJS Scripts + +9) Move to the `c:\data` directory + +```powershell +cd c:\data +``` + +10) Enter WSL + +```powershell +bash +``` + +11) Within WSL, download SheetJS scripts and the test file. + +{`\ +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/shim.min.js +curl -LO https://cdn.sheetjs.com/xlsx-${current}/package/dist/xlsx.full.min.js +curl -LO https://sheetjs.com/pres.numbers`} + + +12) Exit WSL: + +```bash +exit +``` + +The window will return to the command prompt. + +### Stata Test + +:::note pass + +The screenshot in the introduction shows the result of steps 13 - 19 + +::: + +13) Open Stata + +14) Move to the `c:\data` directory in Stata: + +```stata +cd c:\data +``` + +15) Load the `cleanfile` plugin: + +```stata +program cleanfile, plugin +``` + +16) Read the `pres.numbers` test file: + +```stata +plugin call cleanfile, "pres.numbers" verbose +``` + +The result will show the data from `pres.numbers`: + +
+. plugin call cleanfile, "pres.numbers" verbose{'\n'}
+Worksheet 0 Name: Sheet1{'\n'}
+Name,Index{'\n'}
+Bill Clinton,42{'\n'}
+GeorgeW Bush,43{'\n'}
+Barack Obama,44{'\n'}
+Donald Trump,45{'\n'}
+Joseph Biden,46{'\n'}
+{'\n'}
+Saved to `sheetjs.tmp.xlsx`{'\n'}
+import excel "sheetjs.tmp.xlsx", firstrow will read the first sheet and use headers{'\n'}
+for more help, see import excel
+
+ +17) Close the plugin: + +```stata +program drop cleanfile +``` + +18) Clear the current session: + +```stata +clear +``` + +

19) In the result of Step 16, click the link on import +excel "sheetjs.tmp.xlsx", firstrow

+ +Alternatively, manually type the command: + +```stata +import excel "sheetjs.tmp.xlsx", firstrow +``` + +The output will show the import result: + +
+. import excel "sheetjs.tmp.xlsx", firstrow{'\n'}
+(2 vars, 5 obs)
+
+ +20) Open the Data Editor (in Browse or Edit mode) and compare to the screenshot: + +![Data Editor showing data from the file](pathname:///stata/data-editor.png) + +[^1]: Run `help import excel` in Stata or see ["import excel"](https://www.stata.com/manuals/dimportexcel.pdf) in the Stata documentation. +[^2]: See [`read` in "Reading Files"](/docs/api/parse-options) +[^3]: See [`write` in "Writing Files"](/docs/api/write-options) \ No newline at end of file diff --git a/docz/docs/03-demos/37-bigdata/02-worker.md b/docz/docs/03-demos/37-bigdata/02-worker.md index 25b808e..d9fdef9 100644 --- a/docz/docs/03-demos/37-bigdata/02-worker.md +++ b/docz/docs/03-demos/37-bigdata/02-worker.md @@ -194,7 +194,7 @@ Each browser demo was tested in the following environments: | Edge 116 | 2023-09-02 | | | Safari 16.6 | 2023-09-02 | File System Access API is not supported | | Brave 1.57 | 2023-09-02 | File System Access API is not supported | -| Firefox 113 | 2023-05-22 | File System Access API is not supported | +| Firefox 118 | 2023-10-09 | File System Access API is not supported | ::: diff --git a/docz/docs/03-demos/42-engines/02-v8.md b/docz/docs/03-demos/42-engines/02-v8.md index 1260798..682557f 100644 --- a/docz/docs/03-demos/42-engines/02-v8.md +++ b/docz/docs/03-demos/42-engines/02-v8.md @@ -787,7 +787,7 @@ This demo was last tested in the following deployments: |:-------------|:---------|:-----------| | `darwin-x64` | `0.75.1` | 2023-08-26 | | `darwin-arm` | `0.73.0` | 2023-06-05 | -| `win10-x64` | `0.71.2` | 2023-05-23 | +| `win10-x64` | `0.79.2` | 2023-10-09 | | `linux-x64` | `0.71.2` | 2023-05-23 | | `linux-arm` | `0.75.1` | 2023-08-30 | diff --git a/docz/docs/03-demos/42-engines/08-quickjs.md b/docz/docs/03-demos/42-engines/08-quickjs.md index 439dc5e..8cbc6f3 100644 --- a/docz/docs/03-demos/42-engines/08-quickjs.md +++ b/docz/docs/03-demos/42-engines/08-quickjs.md @@ -264,7 +264,7 @@ This demo was tested in the following deployments: |:-------------|:-----------|:-----------| | `darwin-x64` | `2788d71` | 2023-07-24 | | `darwin-arm` | `2788d71` | 2023-06-05 | -| `win10-x64` | `2788d71` | 2023-07-24 | +| `win10-x64` | `2788d71` | 2023-10-09 | | `win11-arm` | `2788d71` | 2023-09-25 | | `linux-x64` | `2788d71` | 2023-06-02 | | `linux-arm` | `2788d71` | 2023-08-29 | diff --git a/docz/docs/03-demos/index.md b/docz/docs/03-demos/index.md index 7925768..af80d75 100644 --- a/docz/docs/03-demos/index.md +++ b/docz/docs/03-demos/index.md @@ -82,6 +82,10 @@ in the [issue tracker](https://git.sheetjs.com/sheetjs/docs.sheetjs.com/issues) - [`Chrome and Chromium Extensions`](/docs/demos/extensions/chromium) - [`Google Sheets + Apps Script`](/docs/demos/extensions/gsheet) - [`AppleScript and OSA`](/docs/demos/extensions/osa) +- [`Mathematica`](/docs/demos/extensions/mathematica) +- [`MATLAB`](/docs/demos/extensions/matlab) +- [`Stata`](/docs/demos/extensions/stata) +- [`Maple`](/docs/demos/extensions/maple) ### Cloud Platforms diff --git a/docz/docs/09-miscellany/02-errors.md b/docz/docs/09-miscellany/02-errors.md index c0d2cf2..f6c6141 100644 --- a/docz/docs/09-miscellany/02-errors.md +++ b/docz/docs/09-miscellany/02-errors.md @@ -443,6 +443,16 @@ function add_to_sheet(sheet, cell) { +#### Some decimal values are rounded + +Excel appears to round values in certain cases. It is suspected that the XLSX +parser handles 15 decimal digits of precision. This results in inaccuracies such +as `7581185.559999999` rounding to `7581185.56` and `7581185.5599999903` +rounding to `7581185.55999999`. + +See [Issue 3003](https://git.sheetjs.com/sheetjs/sheetjs/issues/3003) in the +main SheetJS CE repo for details. + #### Corrupt files Third-party build tools and frameworks may post-process SheetJS scripts. The diff --git a/docz/static/cli/Cargo.toml b/docz/static/cli/Cargo.toml index 4a269ed..b7649ff 100644 --- a/docz/static/cli/Cargo.toml +++ b/docz/static/cli/Cargo.toml @@ -4,7 +4,7 @@ version = "0.1.0" edition = "2021" [dependencies] -v8 = "0.75.1" +v8 = "0.79.2" [[bin]] name = "sheet2csv" diff --git a/docz/static/stata/cleanfile.c b/docz/static/stata/cleanfile.c new file mode 100644 index 0000000..517c97c --- /dev/null +++ b/docz/static/stata/cleanfile.c @@ -0,0 +1,131 @@ +#include "stplugin.h" +#include "duktape.h" + +#define DOIT(cmd) duk_eval_string_noresult(ctx, cmd); + +#define FAIL_DUK(cmd) { \ + const char *errmsg = duk_safe_to_string(ctx, -1); \ + duk_destroy_heap(ctx); \ + snprintf(failbuf, 255, "error in %s: %s", cmd, errmsg); \ + SF_error(failbuf); \ + return NULL; \ +} + +#define FAIL_LOAD { \ + duk_push_undefined(ctx); \ + SF_error("Error in load_file"); \ + return 1; \ +} + +static char *read_file(const char *filename, size_t *sz) { + FILE *f = fopen(filename, "rb"); + if(!f) return NULL; + long fsize; { fseek(f, 0, SEEK_END); fsize = ftell(f); fseek(f, 0, SEEK_SET); } + char *buf = (char *)malloc(fsize * sizeof(char)); + *sz = fread((void *) buf, 1, fsize, f); + fclose(f); + return buf; +} + +static duk_int_t eval_file(duk_context *ctx, const char *filename) { + size_t len; char *buf = read_file(filename, &len); + if(!buf) FAIL_LOAD + + duk_push_lstring(ctx, (const char *)buf, (duk_size_t)len); + duk_int_t retval = duk_peval(ctx); + duk_pop(ctx); + return retval; +} + +static duk_int_t load_file(duk_context *ctx, const char *filename, const char *var) { + size_t len; char *buf = read_file(filename, &len); + if(!buf) FAIL_LOAD + + duk_push_external_buffer(ctx); + duk_config_buffer(ctx, -1, buf, len); + duk_put_global_string(ctx, var); + return 0; +} + +static duk_int_t save_file(duk_context *ctx, const char *filename, const char *var) { + duk_get_global_string(ctx, var); + duk_size_t sz; + char *buf = (char *)duk_get_buffer_data(ctx, -1, &sz); + + if(!buf) return 1; + FILE *f = fopen(filename, "wb"); fwrite(buf, 1, sz, f); fclose(f); + return 0; +} + +STDLL stata_call(int argc, char *argv[]) +{ + duk_int_t res = 0; + char failbuf[255]; + + /* initialize duktape */ + duk_context *ctx = duk_create_heap_default(); + /* duktape does not expose a standard "global" by default */ + DOIT("var global = (function(){ return this; }).call(null);") + + /* load SheetJS library */ + res = eval_file(ctx, "shim.min.js"); + if(res != 0) FAIL_DUK("shim load") + res = eval_file(ctx, "xlsx.full.min.js"); + if(res != 0) FAIL_DUK("library load") + + /* print SheetJS version number */ + //duk_eval_string(ctx, "XLSX.version"); + //char verbuf[255]; + //snprintf(verbuf, 255, "SheetJS library version: %s\n", duk_get_string(ctx, -1)); + //SF_display(verbuf); + + /* read file */ + res = load_file(ctx, argv[0], "buf"); + if(res != 0) FAIL_DUK("file load") + + /* parse workbook */ + DOIT("wb = XLSX.read(buf.slice(0, buf.length), {type:'buffer'});"); + + /* print CSV */ + duk_eval_string(ctx, "wb.SheetNames.length"); + duk_uint_t wscnt = duk_get_uint(ctx, -1); + duk_pop(ctx); + + /* if argument 2 is "verbose", print CSV contents */ + if(argc>1 && !strncmp(argv[1], "verbose", 7)) for(uint32_t wsidx = 0; wsidx < wscnt; ++wsidx) { + /* select n-th worksheet */ + char wsbuf[80]; + snprintf(wsbuf, 80, "ws = wb.Sheets[wsname = wb.SheetNames[%d]]", wsidx); \ + DOIT(wsbuf); + + duk_eval_string(ctx, "wsname"); + char namebuf[60]; + snprintf(namebuf, 60, "Worksheet %d Name: %s\n", wsidx, duk_get_string(ctx, -1)); + duk_pop(ctx); + SF_display(namebuf); + + /* convert to CSV */ + duk_eval_string(ctx, "XLSX.utils.sheet_to_csv(ws)"); + const char *csv = duk_get_string(ctx, -1); + + /* print each row in a separate line */ + char *tok = strtok(csv, "\n"); + while(tok != NULL) { + SF_display(tok); + SF_display("\n"); + tok = strtok(NULL, "\n"); + } + duk_pop(ctx); + } + + /* write to sheetjs.tmp.xlsx */ + DOIT("newbuf = (XLSX.write(wb, {type:'array', bookType:'xlsx'}));");\ + res = save_file(ctx, "sheetjs.tmp.xlsx", "newbuf");\ + if(res != 0) FAIL_DUK("save sheetjsw.xlsx") + + SF_display("\n"); + SF_display("Saved to `sheetjs.tmp.xlsx`\n"); + SF_display("{stata import excel \"sheetjs.tmp.xlsx\", firstrow} will read the first sheet and use headers\n"); + SF_display("for more help, see {help import excel}\n"); + return(0) ; +} diff --git a/docz/static/stata/commands.png b/docz/static/stata/commands.png new file mode 100644 index 0000000..311ce41 Binary files /dev/null and b/docz/static/stata/commands.png differ diff --git a/docz/static/stata/data-editor.png b/docz/static/stata/data-editor.png new file mode 100644 index 0000000..454bf7e Binary files /dev/null and b/docz/static/stata/data-editor.png differ