From 90300cd6b7fe6942be9c8c24b5e3cc1f9d0f7e00 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Thu, 14 Sep 2023 04:19:13 -0400 Subject: [PATCH] math --- docz/docs/03-demos/03-net/08-headless.md | 77 ++++- docz/docs/03-demos/04-static/02-gatsbyjs.md | 18 +- docz/docs/03-demos/08-local/01-file.md | 117 ++++++-- .../03-demos/32-extensions/09-mathematica.md | 160 +++++++++-- docz/docs/03-demos/32-extensions/11-matlab.md | 269 ++++++++++++++++++ docz/docs/07-csf/07-features/06-nf.md | 176 +++++++++--- docz/docusaurus.config.js | 2 +- 7 files changed, 709 insertions(+), 110 deletions(-) create mode 100644 docz/docs/03-demos/32-extensions/11-matlab.md diff --git a/docz/docs/03-demos/03-net/08-headless.md b/docz/docs/03-demos/03-net/08-headless.md index feac0aa..3541cd5 100644 --- a/docz/docs/03-demos/03-net/08-headless.md +++ b/docz/docs/03-demos/03-net/08-headless.md @@ -11,10 +11,10 @@ Headless automation involves controlling "headless browsers" to access websites and submit or download data. It is also possible to automate browsers using custom browser extensions. -The [SheetJS standalone script](/docs/getting-started/installation/standalone) can be added to -any website by inserting a `SCRIPT` tag. Headless browsers usually provide -utility functions for running custom snippets in the browser and passing data -back to the automation script. +The [SheetJS standalone script](/docs/getting-started/installation/standalone) +can be added to any website by inserting a `SCRIPT` tag. Headless browsers +usually provide utility functions for running custom snippets in the browser and +passing data back to the automation script. ## Use Case @@ -128,19 +128,23 @@ const puppeteer = require('puppeteer'); :::note -This demo was last tested on 2023 April 29 against Puppeteer 19.11.1. +This demo was last tested on 2023 September 14 against Puppeteer 21.2.1. ::: 1) Install SheetJS and Puppeteer: {`\ -npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz puppeteer@19.11.1`} +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz puppeteer@21.2.1`} 2) Save the `SheetJSPuppeteer.js` code snippet to `SheetJSPuppeteer.js`. -3) Run `node SheetJSPuppeteer.js`. +3) Run the script: + +```bash +node SheetJSPuppeteer.js +``` When the script finishes, the file `SheetJSPuppeteer.xlsb` will be created. This file can be opened with Excel. @@ -199,7 +203,7 @@ await browser.close();`} :::note -This demo was last tested on 2023 April 29 against deno-puppeteer 16.2.0. +This demo was last tested on 2023 September 14 against deno-puppeteer 16.2.0. ::: @@ -209,9 +213,24 @@ This demo was last tested on 2023 April 29 against deno-puppeteer 16.2.0. env PUPPETEER_PRODUCT=chrome deno run -A --unstable https://deno.land/x/puppeteer@16.2.0/install.ts ``` +:::note pass + +In PowerShell, the environment variable should be set separately: + +```powershell +[Environment]::SetEnvironmentVariable('PUPPETEER_PRODUCT', 'chrome') +deno run -A --unstable https://deno.land/x/puppeteer@16.2.0/install.ts +``` + +::: + 2) Save the `SheetJSPuppeteer.ts` code snippet to `SheetJSPuppeteer.ts`. -3) Run `deno run -A --unstable SheetJSPuppeteer.ts`. +3) Run the script: + +```bash +deno run -A --unstable SheetJSPuppeteer.ts +``` When the script finishes, the file `SheetJSPuppeteer.xlsb` will be created. This file can be opened with Excel. @@ -272,7 +291,7 @@ const { webkit } = require('playwright'); // import desired browser :::note -This demo was last tested on 2023 April 29 against Playwright 1.33.0. +This demo was last tested on 2023 September 14 against Playwright 1.38.0. ::: @@ -284,11 +303,40 @@ npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz playwri 2) Save the `SheetJSPlaywright.js` code snippet to `SheetJSPlaywright.js`. -3) Run `node SheetJSPlaywright.js`. +3) Run the script + +```bash +node SheetJSPlaywright.js +``` When the script finishes, the file `SheetJSPlaywright.xlsb` will be created. This file can be opened with Excel. +:::caution pass + +In the latest Windows 10 test, the commmand failed with a clear error message: + +``` +╔═════════════════════════════════════════════════════════════════════════╗ +║ Looks like Playwright Test or Playwright was just installed or updated. ║ +║ Please run the following command to download new browsers: ║ +║ ║ +║ npx playwright install ║ +║ ║ +║ <3 Playwright Team ║ +╚═════════════════════════════════════════════════════════════════════════╝ +``` + +As recommended, the command + +```bash +npx playwright install +``` + +will download and install the browsers. + +::: + ## PhantomJS PhantomJS is a headless web browser powered by WebKit. @@ -351,7 +399,7 @@ strongly recommended to add verbose logging and to lint scripts before use. :::note -This demo was last tested on 2023 August 16 against PhantomJS 2.1.1 +This demo was last tested on 2023 September 14 against PhantomJS 2.1.1 ::: @@ -359,9 +407,10 @@ This demo was last tested on 2023 August 16 against PhantomJS 2.1.1 2) Save the `SheetJSPhantom.js` code snippet to `SheetJSPhantom.js`. -3) Run the command. +3) Run the `phantomjs` program and pass the script as the first argument. -In macOS: +For example, if the macOS Archive Utility unzipped the `2.1.1` release, binaries +will be placed in `phantomjs-2.1.1-macosx/bin/` and the command will be: ```bash ./phantomjs-2.1.1-macosx/bin/phantomjs SheetJSPhantom.js diff --git a/docz/docs/03-demos/04-static/02-gatsbyjs.md b/docz/docs/03-demos/04-static/02-gatsbyjs.md index 4e0fea3..d9f030e 100644 --- a/docz/docs/03-demos/04-static/02-gatsbyjs.md +++ b/docz/docs/03-demos/04-static/02-gatsbyjs.md @@ -9,8 +9,8 @@ sidebar_custom_props: import current from '/version.js'; import CodeBlock from '@theme/CodeBlock'; -Gatsby is a framework for creating websites. It uses React components for page -templates and GraphQL for loading data. +[GatsbyJS](https://www.gatsbyjs.com/) is a framework for creating websites. It +uses React components for page templates and GraphQL for loading data. [`gatsby-transformer-excel`](https://www.gatsbyjs.com/plugins/gatsby-transformer-excel/) is a transformer that generates GraphQL nodes for each row of each worksheet. @@ -82,8 +82,8 @@ The following query pulls the `Name` and `Index` fields from each row: :::note -This demo was tested on 2023 April 06 against `create-gatsby@3.8.0`. The -generated project used `gatsby@5.8.1` and `react@18.2.0`. +This demo was tested on 2023 September 13 against `create-gatsby@3.12.0`. The +generated project used `gatsby@5.12.4` and `react@18.2.0`. ::: @@ -270,7 +270,7 @@ Save the file and notice that the table has refreshed with the new data: ### Static site 11) Stop the development server and run `npm run build`. Once the build is -finished, the display will confirm that the `/pres` route is static: +finished, the output will confirm that the `/pres` route is static: ``` Pages @@ -293,9 +293,11 @@ Pages ╰────────────────────────────────────────────────────────────────╯ ``` -The built page will be placed in `public/pres/index.html`. Open the page with a -text editor and search for "SheetJS" to verify raw HTML was generated: +The generated page will be placed in `public/pres/index.html`. -```html +12) Open `public/pres/index.html` with a text editor and search for "SheetJS". +There will be a HTML row: + +```html title="public/pres/index.html" SheetJS Dev47 ``` diff --git a/docz/docs/03-demos/08-local/01-file.md b/docz/docs/03-demos/08-local/01-file.md index 8ff5cb5..b2e4250 100644 --- a/docz/docs/03-demos/08-local/01-file.md +++ b/docz/docs/03-demos/08-local/01-file.md @@ -36,30 +36,52 @@ Other demos cover APIs for local file access on special platforms: JavaScript engines represent binary data in a number of structures. -### `Uint8Array` +The `type` option for SheetJS `read` function[^1] controls how the data should +be interpreted. This parameter distinguishes [binary strings](#binary-strings) +from [Base64 strings](#base64-strings). + +The `type` option for SheetJS `write` function[^2] controls the output storage. + +### `Uint8Array` and `Buffer` A `Uint8Array` is a Typed Array where each value is a 8-bit unsigned integer. Server-side platforms including NodeJS typically use `Uint8Array`, or a subclass -such as `Buffer`, to represent data from files. +such as `Buffer`[^3], to represent data from files. + +The SheetJS `read` method can read data from `Uint8Array` without any options: + +```js +const wb = XLSX.read(u8); +``` + +The SheetJS `write` method can generate workbooks stored in +`Uint8Array` structures with the option `type: "buffer"`: + +```js +const u8 = XLSX.write(wb, {bookType: "xlsx", type: "buffer"}); +``` + +:::note pass + +In NodeJS, the `write` method will generate a `Buffer` instance. + +::: -The SheetJS `read` method can read data from `Uint8Array` without special -options. The SheetJS `write` method can generate workbooks stored in -`Uint8Array` structures with the option `bookType: "buffer"` ### `ArrayBuffer` -An `ArrayBuffer` represents an array of bytes. Unlike `Uint8Array`, the bytes -are not immediately available. Typically the underlying data is pulled using -the `Uint8Array` constructor: +An `ArrayBuffer` represents an array of bytes. The `Uint8Array` constructor can +synchronously create a view without copying the underlying data: ```js +/* create a Uint8Array "view" */ const u8 = new Uint8Array(array_buffer); ``` The SheetJS `read` method can read data from `ArrayBuffer` without special options, as it performs the aforementioned conversion. The SheetJS `write` method can generate workbooks stored in `ArrayBuffer` structures with the -option `bookType: "array"` +option `type: "array"` ### `Blob` and `File` @@ -81,7 +103,6 @@ async function blob_to_wb(blob) { } ``` - B) For broader browser support, the `FileReader` API can pull `ArrayBuffer` data using the `readAsArrayBuffer` method: @@ -123,23 +144,31 @@ The SheetJS `write` method can generate a `Uint8Array` which can be passed to the `Blob` constructor: ```js -/* write workbook to Uint8Array */ -const u8 = XLSX.write(wb, { bookType: "xlsx", type: "buffer" }); -/* create array of parts */ -const parts = [ u8 ]; // `Blob` constructor expects this -/* create Blob */ -const blob = new Blob(parts, { type: "application/vnd.ms-excel" }); +function wb_to_blob(wb, bookType) { + /* write workbook to Uint8Array */ + const u8 = XLSX.write(wb, { bookType: bookType || "xlsx", type: "buffer" }); + /* create array of parts */ + const parts = [ u8 ]; // `Blob` constructor expects this + /* create Blob */ + const blob = new Blob(parts, { type: "application/vnd.ms-excel" }); + return blob; +} ``` The `File` constructor accepts an additional `name` argument: ```js -/* write workbook to Uint8Array */ -const u8 = XLSX.write(wb, { bookType: "xlsx", type: "buffer" }); -/* create array of parts */ -const parts = [ u8 ]; // `Blob` constructor expects this -/* create Blob */ -const blob = new File(parts, "SheetJSFileExport.xlsx", { type: "application/vnd.ms-excel" }); +function wb_to_file(wb, filename) { + /* impute bookType from file extension */ + const ext = filename.slice(filename.lastIndexOf(".") + 1); + /* write workbook to Uint8Array */ + const u8 = XLSX.write(wb, { bookType: ext, type: "buffer" }); + /* create array of parts */ + const parts = [ u8 ]; // `File` constructor expects this + /* create File */ + const file = new File(parts, filename, { type: "application/vnd.ms-excel" }); + return file; +} ``` ### Binary Strings @@ -197,6 +226,46 @@ The SheetJS `write` method can generate Base64 strings using `type: "base64"`: const b64 = XLSX.write(wb, { bookType: "xlsx", type: "base64" }); ``` +### Arrays of Numbers + +Some platforms represent binary data as arrays of numbers, where each number +represents one byte in the file. + +The SheetJS `read` method supports arrays of unsigned bytes (where each value +is between `0` and `255`) with `type: "array"`. + +:::caution Java and Signed Bytes + +[Google Sheets](/docs/demos/extensions/gsheet) follows Java signed data type +conventions. Byte arrays include values from `-128` to `127`. + +
How to Fix Signed Arrays (click to show) + +The unsigned value for a negative byte can be calculated with a bitwise AND +(`&`) operation against `0xFF`: + +```js +const unsigned_byte = signed_byte & 0xFF; +``` + +For legacy platforms including [NetSuite](/docs/demos/cloud/netsuite) 2.0, the +bitwise AND assignment operator (`&=`) can rectify an array in place: + +```js +/* convert a signed byte array to an unsigned byte array in place */ +for(var i = 0; i < array.length; ++i) array[i] &= 0xFF; +``` + +For modern platforms, the `Uint8Array` constructor understands signed bytes: + +```js +/* copy data into a new Uint8Array */ +const u8 = new Uint8Array(array); +``` + +
+ +::: ## Web Browsers @@ -684,3 +753,7 @@ Desktop and mobile apps have their own specific APIs covered in separate demos: - [Electron and other desktop apps](/docs/demos/desktop) - [React Native and other mobile apps](/docs/demos/mobile) + +[^1]: See ["Input Type" in "Reading Files"](/docs/api/parse-options#input-type) +[^2]: See ["Supported Output Formats" type in "Writing Files"](/docs/api/write-options#supported-output-formats) +[^3]: See ["Buffers and TypedArrays"](https://nodejs.org/api/buffer.html#buffers-and-typedarrays) in the NodeJS documentation. \ No newline at end of file diff --git a/docz/docs/03-demos/32-extensions/09-mathematica.md b/docz/docs/03-demos/32-extensions/09-mathematica.md index f8abfe3..ce559aa 100644 --- a/docz/docs/03-demos/32-extensions/09-mathematica.md +++ b/docz/docs/03-demos/32-extensions/09-mathematica.md @@ -31,16 +31,119 @@ The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be loaded in NodeJS scripts, including scripts invoked using the `"NodeJS"` mode of the `ExternalEvaluate`[^1] Mathematica function. -:::caution pass +However, the current cross-platform recommendation involves a dedicated command +line tool that leverages SheetJS libraries to to perform spreadsheet processing. -In local testing, there were incompatibilities with recent NodeJS versions. +### External Engines -**This is a Mathematica bug.** +The following diagram depicts the workbook waltz: + +```mermaid +flowchart LR + subgraph `ExternalEvaluate` + file[(workbook\nfile)] + csvstr(CSV\nString) + end + data[(Dataset)] + file --> |NodeJS\nSheetJS Ops| csvstr + csvstr --> |ImportString\nMathematica| data +``` + +_Mathematica_ + +NodeJS can be activated from Mathematica using `RegisterExternalEvaluator`[^2]. +Once activated, JavaScript code can be run using `ExternalEvaluate`[^3]. If the +NodeJS code returns CSV data, `ImportString`[^4] can generate a `Dataset`[^5]. + +_SheetJS_ + +For a file residing on the filesystem, the SheetJS `readFile` function[^6] can +generate a workbook object. The exact location can be determined by printing +`require("process").cwd()`[^7] in `ExternalEvaluate`: + +```mathematica +In[1]:= ExternalEvaluate["NodeJS", "require('process').cwd()"] +Out[1]= "C:\Users\Me\Documents" +``` + +After pulling the first worksheet[^8], the SheetJS `sheet_to_csv` function[^9] +generates a CSV string. + +_Complete Function_ + +The following function reads a file, parses the first worksheet and returns a +Dataset object assuming one header row. + +```mathematica title="Complete Function" +(* Import file stored in the Documents folder (e.g. C:\Users\Me\Documents) *) +SheetJSImportFileEE[filename_]:=Module[{csv}, ( + (* This was required in local testing *) + RegisterExternalEvaluator["NodeJS","C:\\Program Files\\nodejs\\node.exe"]; + + (* Generate CSV from first sheet *) + csv:=ExternalEvaluate["NodeJS", StringJoin[ + (* module installed in home directory *) + "var XLSX = require('xlsx');", + (* read specified filename *) + "var wb = XLSX.readFile('",filename,"');", + (* grab first worksheet *) + "var ws = wb.Sheets[wb.SheetNames[0]];", + (* convert to CSV *) + "XLSX.utils.sheet_to_csv(ws)" + ]]; + + (* Parse CSV into a dataset *) + ImportString[csv, "Dataset", "HeaderLines"->1]; +)] +``` + +
How to run the example (click to hide) + +:::note + +This example was last tested on 2023 September 13 with Mathematica 13.3. ::: -The current recommendation involves a dedicated command-line tool that leverages -SheetJS libraries to to perform spreadsheet processing. +0) Install NodeJS. When the demo was tested, version `18.14.1` was installed. + +1) Install dependencies in the Home folder (`~` or `$HOME` or `%HOMEPATH%`): + +{`\ +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz zeromq@6.0.0-beta.17`} + + +2) Open a new Mathematica Notebook and register NodeJS. When the example was +tested in Windows, the commands were: + +```mathematica +RegisterExternalEvaluator["NodeJS","C:\\Program Files\\nodejs\\node.exe"] +FindExternalEvaluators["NodeJS"] +``` + +The second argument to `RegisterExternalEvaluator` should be the path to the +`node` or `node.exe` binary. + +If NodeJS is registered, the value in the "Registered" column will be "True". + +4) To determine the base folder, run `require("process").cwd()` from NodeJS: + +```mathematica +ExternalEvaluate["NodeJS", "require('process').cwd()"] +``` + +5) Download [`pres.numbers`](https://sheetjs.com/pres.numbers) and move the file +to the base folder as shown in the previous step. + +6) Copy and evaluate the "Complete Function" in the previous codeblock. + +7) Run the function and confirm the result is a proper Dataset: + +```mathematica +SheetJSImportFileEE["pres.numbers"] +``` + +
### Command-Line Tools @@ -48,8 +151,8 @@ The ["Command-Line Tools" demo](/docs/demos/desktop/cli) creates `xlsx-cli`, a command-line tool that reads a spreadsheet file and generates CSV rows from the first worksheet. -`ExternalEvaluate`[^2] can run command-line tools and capture standard output. -The following snippet processes `~/Downloads.pres.numbers` and pulls CSV data +`ExternalEvaluate`[^10] can run command-line tools and capture standard output. +The following snippet processes `~/Downloads/pres.numbers` and pulls CSV data into a variable in Mathematica: ```mathematica @@ -57,8 +160,8 @@ cmd = "/usr/local/bin/xlsx-cli ~/Downloads/pres.numbers" csvdata = ExternalEvaluate["Shell" -> "StandardOutput", cmd]; ``` -`ImportString`[^3] can interpret the CSV data as a `Dataset`[^4]. Typically the -first row of the CSV output is the header row. The `HeaderLines`[^5] option +`ImportString`[^11] can interpret the CSV data as a `Dataset`[^12]. Typically the +first row of the CSV output is the header row. The `HeaderLines`[^13] option controls how Mathematica parses the data: ```mathematica @@ -69,14 +172,12 @@ The following diagram depicts the workbook waltz: ```mermaid flowchart LR - subgraph SheetJS operations + subgraph `ExternalEvaluate` file[(workbook\nfile)] - csv(CSV) + csvstr(CSV\nString) end - csvstr(CSV\nString) data[(Dataset)] - file --> |`xlsx-cli`\nSheetJS Ops| csv - csv --> |ExternalEvaluate\nMathematica| csvstr + file --> |`xlsx-cli`\nSheetJS Ops| csvstr csvstr --> |ImportString\nMathematica| data ``` @@ -88,7 +189,7 @@ This demo was tested in macOS. The path names will differ in other platforms. ::: -1) Create the standalone `xlsx-cli` binary[^6]: +1) Create the standalone `xlsx-cli` binary[^14]: {`\ cd /tmp @@ -115,7 +216,12 @@ SheetJSImportFile[x_] := ImportString[Block[{Print}, ExternalEvaluate[ ]], "Dataset", "HeaderLines" -> 1] ``` -4) Download and save to Downloads folder. +4) Download and save to Downloads folder: + +```bash +cd ~/Downloads/ +curl -LO https://sheetjs.com/pres.numbers +``` 5) In the Mathematica notebook, run the new function. If the file was saved to the Downloads folder, the path will be `"~/Downloads/pres.numbers"` in macOS: @@ -128,7 +234,7 @@ The result should be displayed in a concise table. ### Reading from a URL -`FetchURL`[^7] downloads a file from a specified URL and returns a path to the +`FetchURL`[^15] downloads a file from a specified URL and returns a path to the file. This function will be wrapped in a new function called `SheetJSImportURL`. 6) In the same notebook, run the following: @@ -148,9 +254,17 @@ data = SheetJSImportURL["https://sheetjs.com/pres.numbers"] ``` [^1]: See [the `ExternalEvaluate` Node.js example](https://reference.wolfram.com/language/ref/ExternalEvaluate.html#:~:text=Evaluate%20a%20basic%20math%20function%20in%20JavaScript%20using%20Node.js%3A) in the Mathematica documentation. -[^2]: See [`ExternalEvaluate`](https://reference.wolfram.com/language/ref/ExternalEvaluate.html) in the Mathematica documentation. -[^3]: See [`ImportString`](https://reference.wolfram.com/language/ref/ImportString.html) in the Mathematica documentation. -[^4]: A [`Dataset`](https://reference.wolfram.com/language/ref/Dataset.html) will be created when using the [`"Dataset"` element in `ImportString`](https://reference.wolfram.com/language/ref/format/CSV.html) -[^5]: See [`HeaderLines`](https://reference.wolfram.com/language/ref/HeaderLines.html) in the Mathematica documentation. -[^6]: See ["Command-line Tools"](/docs/demos/desktop/cli) for more details. -[^7]: Mathematica 11 introduced new methods including [`URLRead`](https://reference.wolfram.com/language/ref/URLRead.html). \ No newline at end of file +[^2]: See [`RegisterExternalEvaluator`](https://reference.wolfram.com/language/ref/RegisterExternalEvaluator.html) in the Mathematica documentation. +[^3]: See [`ExternalEvaluate`](https://reference.wolfram.com/language/ref/ExternalEvaluate.html) in the Mathematica documentation. +[^4]: See [`ImportString`](https://reference.wolfram.com/language/ref/ImportString.html) in the Mathematica documentation. +[^5]: A [`Dataset`](https://reference.wolfram.com/language/ref/Dataset.html) will be created when using the [`"Dataset"` element in `ImportString`](https://reference.wolfram.com/language/ref/format/CSV.html) +[^6]: See [`readFile` in "Reading Files"](/docs/api/parse-options) +[^7]: See [`process.cwd()`](https://nodejs.org/api/process.html#processcwd) in the NodeJS documentation. +[^8]: The `Sheets` and `SheetNames` properties of workbook objects are described in ["Workbook Object"](/docs/csf/book) +[^9]: See [`sheet_to_csv` in "CSV and Text"](/docs/api/utilities/csv#delimiter-separated-output) +[^10]: See [`ExternalEvaluate`](https://reference.wolfram.com/language/ref/ExternalEvaluate.html) in the Mathematica documentation. +[^11]: See [`ImportString`](https://reference.wolfram.com/language/ref/ImportString.html) in the Mathematica documentation. +[^12]: A [`Dataset`](https://reference.wolfram.com/language/ref/Dataset.html) will be created when using the [`"Dataset"` element in `ImportString`](https://reference.wolfram.com/language/ref/format/CSV.html) +[^13]: See [`HeaderLines`](https://reference.wolfram.com/language/ref/HeaderLines.html) in the Mathematica documentation. +[^14]: See ["Command-line Tools"](/docs/demos/desktop/cli) for more details. +[^15]: Mathematica 11 introduced new methods including [`URLRead`](https://reference.wolfram.com/language/ref/URLRead.html). \ No newline at end of file diff --git a/docz/docs/03-demos/32-extensions/11-matlab.md b/docz/docs/03-demos/32-extensions/11-matlab.md new file mode 100644 index 0000000..e730739 --- /dev/null +++ b/docz/docs/03-demos/32-extensions/11-matlab.md @@ -0,0 +1,269 @@ +--- +title: Modern Spreadsheets in MATLAB +sidebar_label: MATLAB +description: Build complex data pipelines in MATLAB M-Files. Seamlessly create MATLAB tables with SheetJS. Leverage the MATLAB toolbox ecosystem to analyze data from Excel workbooks. +pagination_prev: demos/cloud/index +pagination_next: demos/bigdata/index +--- + +import current from '/version.js'; +import CodeBlock from '@theme/CodeBlock'; + +[MATLAB](https://www.mathworks.com/products/matlab.html) is a numeric computing +platform. It has a native `table` type with limited support for spreadsheets. + +[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing +data from spreadsheets. + +This demo uses SheetJS to pull data from a spreadsheet for further analysis +within MATLAB. We'll explore how to run an external tool to convert complex +spreadsheets into simple XLSX files for MATLAB. + +:::note + +This demo was last tested in 2023 September 12 in MATLAB R2023a. + +::: + +:::info pass + +MATLAB has limited support for processing spreadsheets through `readtable`[^1] +and `writetable`[^2]. At the time of writing, it lacked support for XLSB, +NUMBERS, and other common spreadsheet formats. + +SheetJS libraries help fill the gap by normalizing spreadsheets to a form that +MATLAB can understand. + +::: + +## Integration Details + +:::note pass + +MATLAB does not currently provide a way to parse a CSV string or a character +array representing file data. `readtable`, `writetable`, `csvread`, and +`csvwrite` work with the file system directly. `strread` and `textscan` are +designed specifically for reading numbers. + +::: + +The current recommendation involves a dedicated command-line tool that leverages +SheetJS libraries to to perform spreadsheet processing. + +The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be +loaded in NodeJS scripts and bundled in standalone command-line tools. + +### Command-Line Tools + +The ["Command-Line Tools" demo](/docs/demos/desktop/cli) creates `xlsx-cli`, a +command-line tool that reads a spreadsheet file and generates output. The +examples in the "NodeJS" section are able to generate XLSX spreadsheets using +the `--xlsx` command line flag: + +```bash +$ xlsx-cli --xlsx ./pres.numbers ## generates pres.numbers.xlsx +``` + +:::note pass + +The command-line tool supports a number of formats including XLSB (`--xlsb`). + +::: + +The tools pair the SheetJS `readFile`[^3] and `writeFile`[^4] methods to read +data from arbitrary spreadsheet files and convert to XLSX: + +```js +const XLSX = require("xlsx"); // load the SheetJS library +const wb = XLSX.readFile("input.xlsb"); // read input.xlsb +XLSX.writeFile(wb, "output.xlsx"); // export to output.xlsx +``` + +### MATLAB commands + +The MATLAB `system` command[^5] can run command-line tools in M-files. For +example, if the `xlsx-cli` tool is placed in the workspace folder and the +test file `pres.numbers` is in the Downloads folder, the following command +generates the XLSX file `pres.numbers.xlsx` : + +```matlab +% generate ~/Downloads/pres.numbers.xlsx from ~/Downloads/pres.numbers +system("./xlsx-cli --xlsx ~/Downloads/pres.numbers"); +``` + +:::note pass + +In an interactive session, the exclamation point operator `!`[^6] can be used: + +```matlab +% generate ~/Downloads/pres.numbers.xlsx from ~/Downloads/pres.numbers +!./xlsx-cli --xlsx ~/Downloads/pres.numbers +``` + +::: + +### Reading Files + +Starting from an arbitrary spreadsheet, `xlsx-cli` can generate a XLSX workbook. +Once the workbook is written, the XLSX file can be parsed with `readtable`: + +```matlab +% `filename` points to the file to be parsed +filename = "~/Downloads/pres.numbers"; +% generate filename+".xlsx" +system("./xlsx-cli --xlsx " + filename) +% read using `readtable` +tbl = readtable(filename + ".xlsx"); +``` + +The following diagram depicts the workbook waltz: + +```mermaid +flowchart LR + subgraph MATLAB `system` invocation + file[(workbook\nunknown type)] + xlsx(XLSX\nNormalized Data) + end + data[(table)] + file --> |`xlsx-cli`\nSheetJS| xlsx + xlsx --> |`readtable`\nMATLAB| data +``` + +### Write Files + +Starting from an MATLAB table, `writetable` can generate a XLSX workbook. Once +the workbook is written, `xlsx-cli` can translate to NUMBERS or other formats: + +```matlab +% tbl is the table +tbl = table({"Sheet";"JS"}, [72;62], 'VariableNames', ["Name", "Index"]) +% `filename` points to the file to be written +filename = "~/Downloads/sorted.xlsx"; +% write using `writetable` +writetable(tbl, filename); +% generate filename+".xlsb" +system("./xlsx-cli --xlsb " + filename); +``` + +The following diagram depicts the workbook waltz: + +```mermaid +flowchart LR + subgraph MATLAB `system` invocation + file[(XLSB\nworkbook)] + xlsx(XLSX\nNormalized Data) + end + data[(table)] + data --> |`writetable`\nMATLAB| xlsx + xlsx --> |`xlsx-cli`\nSheetJS| file +``` + +## Complete Demo + +:::info pass + +This demo was tested in macOS. The path names will differ in other platforms. + +::: + +This demo uses the [`pres.numbers` test file](https://sheetjs.com/pres.numbers). +There are 3 parts to the demo: + +A) "Import": SheetJS tooling will read the test file and generate a clean XLSX +file. MATLAB will read the file using `readtable`. + +B) "Process": Using `sortrows`, MATLAB will reverse the table order. + +C) "Export": The modified table will be exported to XLSX using `writetable`. +SheetJS tooling will convert the file to XLSB. + +```mermaid +flowchart LR + ifile[(NUMBERS)] + ixlsx(XLSX) + ofile[(XLSB)] + oxlsx(XLSX) + data[(table)] + ifile --> |`xlsx-cli`\nSheetJS| ixlsx + ixlsx --> |`readtable`\nMATLAB| data + data -.-> |Data Processing| data + data --> |`writetable`\nMATLAB| oxlsx + oxlsx --> |`xlsx-cli`\nSheetJS| ofile + +``` + +1) Create the standalone `xlsx-cli` binary[^7]: + +{`\ +cd /tmp +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz exit-on-epipe commander@2 +curl -LO https://docs.sheetjs.com/cli/xlsx-cli.js +npx nexe -t 14.15.3 xlsx-cli.js`} + + +2) Move the generated `xlsx-cli` to the MATLAB workspace folder. On macOS, this +folder is typically `~/Documents/MATLAB/`: + +```bash +mkdir -p ~/Documents/MATLAB/ +mv xlsx-cli ~/Documents/MATLAB/ +``` + +3) Download and save to Downloads folder: + +```bash +cd ~/Downloads/ +curl -LO https://sheetjs.com/pres.numbers +``` + +4) Save the following to `SheetJSMATLAB.m` in the workspace folder: + +```matlab title="SheetJSMATLAB.m" +% Import data from NUMBERS file +system("./xlsx-cli --xlsx ~/Downloads/pres.numbers"); +tbl = readtable("~/Downloads/pres.numbers.xlsx"); +% Process data (reverse sort) +sorted = sortrows(tbl,"Index", "descend"); +% Export data to XLSB workbook +writetable(sorted,"~/Downloads/sorted.xlsx"); +system("./xlsx-cli --xlsb ~/Downloads/sorted.xlsx"); +``` + +5) In a MATLAB desktop session, run the `SheetJSMATLAB` command: + +```matlab +>> SheetJSMATLAB +``` + +It will create the file `sorted.xlsx.xlsb` in the `~/Downloads` folder. Open the +file and confirm that the table is sorted by Index in descending order: + +``` +Name Index +Joseph Biden 46 +Donald Trump 45 +Barack Obama 44 +GeorgeW Bush 43 +Bill Clinton 42 +``` + +:::tip pass + +If the `matlab` command is available on the system `PATH`, the "headless" +version of the command is: + +```bash +cd ~/Documents/MATLAB +matlab -batch SheetJSMATLAB +``` + +::: + +[^1]: See [`readtable`](https://www.mathworks.com/help/matlab/ref/readtable.html) in the MATLAB documentation. +[^2]: See [`writetable`](https://www.mathworks.com/help/matlab/ref/writetable.html) in the MATLAB documentation. +[^3]: See [`readFile` in "Reading Files"](/docs/api/parse-options) +[^4]: See [`writeFile` in "Writing Files"](/docs/api/write-options) +[^5]: See [`system`](https://www.mathworks.com/help/matlab/ref/system.html) in the MATLAB documentation. +[^6]: See ["MATLAB Operators and Special Characters](https://www.mathworks.com/help/matlab/matlab_prog/matlab-operators-and-special-characters.html) in the MATLAB documentation. +[^7]: See ["Command-line Tools"](/docs/demos/desktop/cli) for more details. + diff --git a/docz/docs/07-csf/07-features/06-nf.md b/docz/docs/07-csf/07-features/06-nf.md index d1d6748..220d447 100644 --- a/docz/docs/07-csf/07-features/06-nf.md +++ b/docz/docs/07-csf/07-features/06-nf.md @@ -57,13 +57,45 @@ The letter R (R) marks features parsed but not written in the format. -This example generates a worksheet with common number formats. `sheet_to_html` -uses the number formats in generating the HTML table. The "Export" button -generates workbooks with number formatting. +Typically spreadsheets will include formatted text such as currencies (`$3.50`) +or large numbers with thousands separators (`7,262`) or percentages (`2.19%`). + +To simplify editing, the applications will store the underlying values and the +number formats separately. For example, `$3.50` will be represented as the value +`3.5` with a number format that mandates a `$` sigil and 2 decimal places. + +Number format metadata can be attached to each cell object in the `z` property: + +```js +/* set the format of cell B2 to "0.00%" */ +worksheet["B2"].z = "0.00%"; +``` + +When requested, the cell formatted text will be stored in the `w` property. + +## Live Demo + +This example generates a worksheet with common number formats. +The number formats are explicitly assigned: + +```js +/* assign number formats */ +ws["B2"].z = '"$"#,##0.00_);\\("$"#,##0.00\\)'; // Currency format +ws["B3"].z = '#,##0'; // Number with thousands separator +ws["B4"].z = "0.00%"; // Percentage with up to 2 decimal places +``` + +`sheet_to_html` uses the number formats and values to compute the formatted text +when generating the HTML table. + +The "Export" button will write a workbook with number formats. The file can be +opened in Excel or another spreadsheet editor. The values in column B will be +proper numbers with the assigned number formats. ```jsx live function SheetJSSimpleNF(props) { const [ws, setWS] = React.useState(); + const [__html, setHTML] = React.useState(""); const fmt = React.useRef(null); /* when the page is loaded, create worksheet and show table */ @@ -81,7 +113,10 @@ function SheetJSSimpleNF(props) { ws["B3"].z = '#,##0'; ws["B4"].z = "0.00%"; + /* save worksheet object for the export */ setWS(ws); + /* generate the HTML table */ + setHTML(XLSX.utils.sheet_to_html(ws)); }, []); const xport = (fmt) => { @@ -93,46 +128,14 @@ function SheetJSSimpleNF(props) { const fmts = ["xlsx", "xls", "csv", "xlsb", "html", "ods"]; return ( <> - - -
+ File format: + +
+
); } ``` -## Values and Formatting - -Typically spreadsheets will include formatted text such as currencies (`$3.50`) -or large numbers with thousands separators (`7,262`) or percentages (`2.19%`). - -To simplify editing, the applications will store the underlying values and the -number formats separately. For example, `$3.50` will be represented as the value -`3.5` with a number format that mandates a `$` sigil and 2 decimal places. - -CSV and other formats only support the formatted text. Applications reading CSV -files are expected to interpret the values as numbers or dates. - -### Dates and Times - -Many spreadsheet formats store dates and times using a number that represents -the number of seconds or days after some epoch. Dates are covered in more detail -[in the dedicated section](/docs/csf/features/dates). - -### Percentages - -Percentage formats automatically scale values by 100. Multiple percent symbols -repeat the effect. For example, a cell with value `2.19%` is typically stored as -a numeric cell with value `0.0219` and number format `0.00%` - -The following table uses the `en-US` locale (`.` as the decimal point symbol): - -| Number | Format | `en-US` Text | -|:---------|---------:|-------------:| -| `0.0219` | `0.00%` | `2.19%` | -| `2.19` | `0.00%` | `219%` | -| `0.0219` | `0.00%%` | `219%%` | -| `2.19` | `0.00%%` | `21900%%` | - ## SheetJS Representation Number formats and values are attached to cells. The following keys are used: @@ -152,8 +155,7 @@ instructs `XLSX.read` or `XLSX.readFile` to save the formats. ### Number Format Strings The `z` format string follows the Excel persistence rules as described in -ECMA-376 18.8.31 (Number Formats). For more info, see the Excel documentation -article `Create or delete a custom number format` +ECMA-376 18.8.31 (Number Formats)[^1] The rules are slightly different from how Excel displays custom number formats. In particular, literal characters must be wrapped in double quotes or preceded @@ -195,6 +197,94 @@ function SheetJSExtractNF(props) { } ``` +## Values and Formatting + +### Dates and Times + +In XLS and other file formats that extended the Lotus 1-2-3 worksheet file +format, dates and times are stored as numeric codes. The application uses the +number format to determine whether the value should be interpreted as a date. + +:::note pass + +Interpretation of date codes is covered in ["Dates and Times"](/docs/csf/features/dates). + +::: + +The following repeatable tokens force a date interpretation: + +| Tokens | Description | +|:-----------------|:-------------------------------------------------------| +| `Y` | Year | +| `M` | Month or Minute (contextual) | +| `D` | Day | +| `H` | Hours (0-23 normally, but 1-12 if meridiem is present) | +| `S` | Seconds | +| `A/P` or `AM/PM` | Meridiem | +| `[h]` or `[hh]` | Absolute hours (duration) | +| `[m]` or `[mm]` | Absolute minutes (duration) | +| `[s]` or `[ss]` | Absolute seconds (duration) | +| `B1` or `B2` | Use Gregorian Calendar (`B1`) or Hijri Calendar (`B2`) | +| `E` | "Era Year" or standard year depending on locale | +| `G` | "Era" modifier or empty string depending on locale | + +If a format is detected to be a date, the decimal tokens `.0`, `.00` and `.000` +represent the sub-second portion of the time. + +### Percentages + +Percentage formats automatically scale values by 100. Multiple percent symbols +repeat the effect. For example, a cell with value `2.19%` is typically stored as +a numeric cell with value `0.0219` and number format `0.00%` + +The following table uses the `en-US` locale (`.` as the decimal point symbol). +Formatted text is rendered using the embedded SheetJS `SSF` formatting library. + +```jsx live +function SheetJSPCT() { + const data = [ + { n: 0.0219, z: "0.00%"}, + { n: 2.19, z: "0.00%"}, + { n: 0.0219, z: "0.00%%"}, + { n: 2.19, z: "0.00%%"}, + ]; + return ( + {data.map(r => ( + + + + ))} +
NumberFormatText
{r.n}{r.z}{XLSX.SSF.format(r.z, r.n)}
); +} +``` + +### Fractions + +Some applications support displaying numbers in fractional form. + +Fractions with a fixed denominator are calculated by scaling and rounding the +fractional part of the number. + +Fractions with a variable denominator are typically specified by the number of +digits in the denominator (for example, "Up to one digit"). + +:::info pass + +The optimal solution from a mathematical perspective is the "Mediant" method. +This algorithm can be very slow in the worst case, so spreadsheet applications +tend to use a continued fraction approach. + +The common algorithm produces unexpected results for "Up to one digit": + +| Value | Mediant | Excel 2019 | +|:------|--------:|-----------:| +| `0.3` | `2/7` | `2/7` | +| `1.3` | `1 2/7` | `1 1/3` | +| `2.3` | `2 2/7` | `2 2/7` | +| `3.3` | `3 2/7` | `3 2/7` | + +::: + ## Miscellany The default formats are listed in ECMA-376 18.8.30: @@ -270,4 +360,6 @@ desired format and testing with [the Number Format Strings demo](#number-format- ### HTML Override -[**This feature is discussed in the HTML utilities section**](/docs/api/utilities/html#value-override) \ No newline at end of file +[**This feature is discussed in the HTML utilities section**](/docs/api/utilities/html#value-override) + +[^1]: On 2023 September 14, [the "Review guidelines for customizing a number format" page](https://support.microsoft.com/en-us/office/review-guidelines-for-customizing-a-number-format-c0a1d1fa-d3f4-4018-96b7-9c9354dd99f5) in the Excel documentation covered custom number format minutiae. \ No newline at end of file diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js index 54da310..dd54aa6 100644 --- a/docz/docusaurus.config.js +++ b/docz/docusaurus.config.js @@ -141,7 +141,7 @@ const config = { prism: { theme: lightCodeTheme, darkTheme: darkCodeTheme, - additionalLanguages: [ "visual-basic", "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram" ], + additionalLanguages: [ "visual-basic", "swift", "java", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab" ], }, liveCodeBlock: { playgroundPosition: 'top'