From d54a1e4ee07e6259a9fa2722569bd73c0f2c1510 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Fri, 24 Feb 2023 02:46:48 -0500 Subject: [PATCH] alasql --- docz/docs/03-demos/05-grid.md | 1 + docz/docs/03-demos/06-data/01-websql.md | 247 ++++++ docz/docs/03-demos/06-data/02-storageapi.md | 73 ++ docz/docs/03-demos/06-data/03-indexeddb.md | 31 + docz/docs/03-demos/06-data/09-alasql.md | 161 ++++ docz/docs/03-demos/06-data/10-sql.md | 307 +++++++ docz/docs/03-demos/06-data/25-mongodb.md | 127 +++ docz/docs/03-demos/06-data/26-redis.md | 211 +++++ docz/docs/03-demos/06-data/29-pouchdb.md | 105 +++ docz/docs/03-demos/06-data/_category_.json | 5 + docz/docs/03-demos/06-data/index.md | 222 +++++ docz/docs/03-demos/06-database.md | 859 -------------------- docz/docs/03-demos/07-worker.md | 1 + docz/docs/03-demos/11-static/10-astro.md | 8 + docz/docs/03-demos/41-nosql.md | 313 ------- docz/docs/03-demos/index.md | 10 +- docz/docs/06-solutions/01-input.md | 2 +- docz/docs/06-solutions/05-output.md | 2 +- docz/docusaurus.config.js | 3 + 19 files changed, 1509 insertions(+), 1179 deletions(-) create mode 100644 docz/docs/03-demos/06-data/01-websql.md create mode 100644 docz/docs/03-demos/06-data/02-storageapi.md create mode 100644 docz/docs/03-demos/06-data/03-indexeddb.md create mode 100644 docz/docs/03-demos/06-data/09-alasql.md create mode 100644 docz/docs/03-demos/06-data/10-sql.md create mode 100644 docz/docs/03-demos/06-data/25-mongodb.md create mode 100644 docz/docs/03-demos/06-data/26-redis.md create mode 100644 docz/docs/03-demos/06-data/29-pouchdb.md create mode 100644 docz/docs/03-demos/06-data/_category_.json create mode 100644 docz/docs/03-demos/06-data/index.md delete mode 100644 docz/docs/03-demos/06-database.md delete mode 100644 docz/docs/03-demos/41-nosql.md diff --git a/docz/docs/03-demos/05-grid.md b/docz/docs/03-demos/05-grid.md index cec0615..1a44f4e 100644 --- a/docz/docs/03-demos/05-grid.md +++ b/docz/docs/03-demos/05-grid.md @@ -1,6 +1,7 @@ --- title: Data Grids and Tables pagination_prev: demos/cloud/index +pagination_next: demos/data/index --- Various JavaScript UI components provide a more interactive editing experience. diff --git a/docz/docs/03-demos/06-data/01-websql.md b/docz/docs/03-demos/06-data/01-websql.md new file mode 100644 index 0000000..250e2eb --- /dev/null +++ b/docz/docs/03-demos/06-data/01-websql.md @@ -0,0 +1,247 @@ +--- +title: WebSQL and SQLite +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + type: web + sql: true +--- + +WebSQL is a popular SQL-based in-browser database available on Chrome. In +practice, it is powered by SQLite, and most simple SQLite-compatible queries +work as-is in WebSQL. + +The public demo generates a database from workbook. + +## WebSQL Details + +Importing data from spreadsheets is straightforward using the `generate_sql` +helper function from ["Generating Tables"](/docs/demos/data/sql#generating-tables): + +```js +const db = openDatabase('sheetql', '1.0', 'SheetJS WebSQL Test', 2097152); +const stmts = generate_sql(ws, wsname); +// NOTE: tx.executeSql and db.transaction use callbacks. This wraps in Promises +for(var i = 0; i < stmts.length; ++i) await new Promise((res, rej) => { + db.transaction(tx => + tx.executeSql(stmts[i], [], + (tx, data) => res(data), // if the query is successful, return the data + (tx, err) => rej(err) // if the query fails, reject with the error + )); +}); +``` + +The result of a SQL SELECT statement is a `SQLResultSet`. The `rows` property +is a `SQLResultSetRowList`. It is an "array-like" structure that has `length` +and properties like `0`, `1`, etc. However, this is not a real Array object. +A real Array can be created using `Array.from`: + +```js +const db = openDatabase('sheetql', '1.0', 'SheetJS WebSQL Test', 2097152); +db.readTransaction(tx => + tx.executeSQL("SELECT * FROM DatabaseTable", [], (tx, data) => { + // data.rows is "array-like", so `Array.from` can make it a real array + const aoo = Array.from(data.rows); + const ws = XLSX.utils.json_to_sheet(aoo); + // ... it is recommended to perform an export here OR wrap in a Promise + }) +); +``` + +### Live Demo + +The following demo generates a database with 5 fixed SQL statements. Queries +can be changed in the Live Editor. The WebSQL database can be inspected in the +"WebSQL" section of the "Application" Tab of Developer Tools: + +![WebSQL view in Developer Tools](pathname:///files/websql.png) + +```jsx live +function SheetQL() { + const [out, setOut] = React.useState(""); + const queries = [ + 'DROP TABLE IF EXISTS Presidents', + 'CREATE TABLE Presidents (Name TEXT, Idx REAL)', + 'INSERT INTO Presidents (Name, Idx) VALUES ("Barack Obama", 44)', + 'INSERT INTO Presidents (Name, Idx) VALUES ("Donald Trump", 45)', + 'INSERT INTO Presidents (Name, Idx) VALUES ("Joseph Biden", 46)' + ]; + const xport = React.useCallback(async() => { + // prep database + const db = openDatabase('sheetql', '1.0', 'SheetJS WebSQL Test', 2097152); + + for(var i = 0; i < queries.length; ++i) await new Promise((res, rej) => { + db.transaction((tx) => { + tx.executeSql(queries[i], [], (tx, data) => res(data), (tx, err) => rej(err)); + }); + }); + + // pull data and generate rows + db.readTransaction(tx => { + tx.executeSql("SELECT * FROM Presidents", [], (tx, data) => { + const aoo = Array.from(data.rows); + setOut("QUERY RESULT:\n" + aoo.map(r => JSON.stringify(r)).join("\n") + "\n") + const ws = XLSX.utils.json_to_sheet(aoo); + const wb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(wb, ws, "Presidents"); + XLSX.writeFile(wb, "SheetQL.xlsx"); + }); + }); + }); + return (
{out}
); +} +``` + +## Server-Side SQLite + +Most platforms offer a simple way to query SQLite database files. + +The following example shows how to query for each table in an SQLite database, +query for the data for each table, add each non-empty table to a workbook, and +export as XLSX. + +[The Northwind database is available in SQLite form](https://raw.githubusercontent.com/jpwhite3/northwind-SQLite3/master/dist/northwind.db). + +### NodeJS + +The **`better-sqlite3`** module provides a very simple API for working with +SQLite databases. `Statement#all` runs a prepared statement and returns an array +of JS objects. + +1) Install the dependencies: + +```bash +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz better-sqlite3 +``` + +2) Save the following to `node.mjs`: + +```js title="node.mjs" +/* Load SQLite3 connector library */ +import Database from "better-sqlite3"; + +/* Load SheetJS library */ +import * as XLSX from 'xlsx'; +import * as fs from 'fs'; +XLSX.set_fs(fs); + +/* Initialize database */ +var db = Database("northwind.db"); + +/* Create new workbook */ +var wb = XLSX.utils.book_new(); + +/* Get list of table names */ +var sql = db.prepare("SELECT name FROM sqlite_master WHERE type='table'"); +var result = sql.all(); + +/* Loop across each name */ +result.forEach(function(row) { + /* Get first 100K rows */ + var aoo = db.prepare("SELECT * FROM '" + row.name + "' LIMIT 100000").all(); + if(aoo.length > 0) { + /* Create Worksheet from the row objects */ + var ws = XLSX.utils.json_to_sheet(aoo, {dense: true}); + /* Add to Workbook */ + XLSX.utils.book_append_sheet(wb, ws, row.name); + } +}); + +/* Write File */ +XLSX.writeFile(wb, "node.xlsx"); +``` + +3) Run `node node.mjs` and open `node.xlsx` + +### Bun + +Bun ships with a built-in high-performance module `bun:sqlite`. + +1) Install the dependencies: + +```bash +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz +``` + +2) Save the following to `bun.mjs`: + +```js title="bun.mjs" +/* Load SQLite3 connector library */ +import { Database } from "bun:sqlite"; + +/* Load SheetJS library */ +import * as XLSX from 'xlsx'; +import * as fs from 'fs'; +XLSX.set_fs(fs); + +/* Initialize database */ +var db = Database.open("northwind.db"); + +/* Create new workbook */ +var wb = XLSX.utils.book_new(); + +/* Get list of table names */ +var sql = db.prepare("SELECT name FROM sqlite_master WHERE type='table'"); +var result = sql.all(); + +/* Loop across each name */ +result.forEach(function(row) { + /* Get first 100K rows */ + var aoo = db.prepare("SELECT * FROM '" + row.name + "' LIMIT 100000").all(); + if(aoo.length > 0) { + /* Create Worksheet from the row objects */ + var ws = XLSX.utils.json_to_sheet(aoo, {dense: true}); + /* Add to Workbook */ + XLSX.utils.book_append_sheet(wb, ws, row.name); + } +}); + +/* Write File */ +XLSX.writeFile(wb, "bun.xlsx"); +``` + +3) Run `bun bun.mjs` and open `bun.xlsx` + +### Deno + +Deno `sqlite` library returns raw arrays of arrays. + +1) Save the following to `deno.ts`: + +```ts title="deno.ts" +/* Load SQLite3 connector library */ +import { DB } from "https://deno.land/x/sqlite/mod.ts"; + +/* Load SheetJS library */ +// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts" +import * as XLSX from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs'; + +/* Initialize database */ +var db = new DB("northwind.db"); + +/* Create new workbook */ +var wb = XLSX.utils.book_new(); + +/* Get list of table names */ +var sql = db.prepareQuery("SELECT name FROM sqlite_master WHERE type='table'"); +var result = sql.all(); +/* Loop across each name */ +result.forEach(function(row) { + /* Get first 100K rows */ + var query = db.prepareQuery("SELECT * FROM '" + row[0] + "' LIMIT 100000") + var aoa = query.all(); + if(aoa.length > 0) { + /* Create array of arrays */ + var data = [query.columns().map(x => x.name)].concat(aoa); + /* Create Worksheet from the aoa */ + var ws = XLSX.utils.aoa_to_sheet(data, {dense: true}); + /* Add to Workbook */ + XLSX.utils.book_append_sheet(wb, ws, row[0]); + } +}); + +/* Write File */ +XLSX.writeFile(wb, "deno.xlsx"); +``` + +3) Run `deno run --allow-read --allow-write deno.ts` and open `deno.xlsx` diff --git a/docz/docs/03-demos/06-data/02-storageapi.md b/docz/docs/03-demos/06-data/02-storageapi.md new file mode 100644 index 0000000..cb691d5 --- /dev/null +++ b/docz/docs/03-demos/06-data/02-storageapi.md @@ -0,0 +1,73 @@ +--- +title: Local Storage API +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + type: web +--- + +The Storage API, encompassing `localStorage` and `sessionStorage`, describes +simple key-value stores that only support string values and keys. + +Arrays of objects can be stored using `JSON.stringify` using row index as key: + +```js +const aoo = XLSX.utils.sheet_to_json(ws); +for(var i = 0; i < aoo.length; ++i) localStorage.setItem(i, JSON.stringify(aoo[i])); +``` + +Recovering the array of objects is possible by using `JSON.parse`: + +```js +const aoo = []; +for(var i = 0; i < localStorage.length; ++i) aoo.push(JSON.parse(localStorage.getItem(i))); +const ws = XLSX.utils.json_to_sheet(aoo); +``` + +This example will fetch , fill `localStorage` with +rows, then generate a worksheet from the rows and write to a new file. + +:::caution + +This example is for illustration purposes. If array of objects is available, it +is strongly recommended to convert that array to a worksheet directly. + +::: + +```jsx live +function SheetJStorage() { + const [url, setUrl] = React.useState("https://sheetjs.com/data/cd.xls"); + const set_url = React.useCallback((evt) => setUrl(evt.target.value)); + const [out, setOut] = React.useState(""); + const xport = React.useCallback(async() => { + // get first worksheet data as array of objects + const ab = await (await fetch(url)).arrayBuffer(); + const wb = XLSX.read(ab), wsname = wb.SheetNames[0]; + const aoo = XLSX.utils.sheet_to_json(wb.Sheets[wsname]); + + // reset and populate localStorage + localStorage.clear(); + for(var i = 0; i < aoo.length; ++i) localStorage.setItem(i, JSON.stringify(aoo[i])); + + // create new array of objects from localStorage + const new_aoo = []; + for(var i = 0; i < localStorage.length; ++i) { + const row = JSON.parse(localStorage.getItem(i)); + new_aoo.push(row); + } + + setOut(`Number of rows in LocalStorage: ${localStorage.length}`); + + // create and export workbook + const new_ws = XLSX.utils.json_to_sheet(new_aoo); + const new_wb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(new_wb, new_ws, "Sheet1"); + XLSX.writeFile(new_wb, "SheetJStorage.xlsx"); + }); + + return ( <> {out && (<>{url}
{out}
)} + URL: +
+ ); +} +``` diff --git a/docz/docs/03-demos/06-data/03-indexeddb.md b/docz/docs/03-demos/06-data/03-indexeddb.md new file mode 100644 index 0000000..35f7569 --- /dev/null +++ b/docz/docs/03-demos/06-data/03-indexeddb.md @@ -0,0 +1,31 @@ +--- +title: IndexedDB API +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + type: web +--- + +:::warning + +IndexedDB is a very low-level API. It is strongly recommended to use a wrapper +library or [WebSQL](/docs/demos/data/websql) in production applications. + +::: + +`localForage` is a IndexedDB wrapper that presents an async Storage interface. + +Arrays of objects can be stored using `JSON.stringify` using row index as key: + +```js +const aoo = XLSX.utils.sheet_to_json(ws); +for(var i = 0; i < aoo.length; ++i) await localForage.setItem(i, JSON.stringify(aoo[i])); +``` + +Recovering the array of objects is possible by using `JSON.parse`: + +```js +const aoo = []; +for(var i = 0; i < localForage.length; ++i) aoo.push(JSON.parse(await localForage.getItem(i))); +const wb = XLSX.utils.json_to_sheet(aoo); +``` diff --git a/docz/docs/03-demos/06-data/09-alasql.md b/docz/docs/03-demos/06-data/09-alasql.md new file mode 100644 index 0000000..b16fa21 --- /dev/null +++ b/docz/docs/03-demos/06-data/09-alasql.md @@ -0,0 +1,161 @@ +--- +title: AlaSQL +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + sql: true +--- + +import current from '/version.js'; + +AlaSQL is a pure JavaScript in-memory SQL database. It has built-in support for +SheetJS through the `XLSX` target operator. + +This demo covers basic concepts pertaining to data import and export. The +official documentation includes advanced examples and deployment tips as well as +strategies for general data processing in AlaSQL expressions. + +## NodeJS Usage + +:::caution + +`alasql` uses an older version of the library. It can be overridden through a +`package.json` override in the latest versions of NodeJS: + +
{`\
+{
+  "overrides": {
+    "xlsx": "https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz"
+  }
+}`}
+
+ +::: + +#### Reading Files + +By default, the `XLSX` "from" target automatically adds a `.xlsx` extension. To +read files with an arbitrary filename, the `autoExt: false` option should be +passed as the second argument: + +```sql +SELECT `Name`, `Index` FROM XLSX( + "pres.numbers" --<< filename is "pres.numbers" +// highlight-start + , { --<< options are supplied as the second argument to XLSX operator + autoExt: false --<< do not automatically add ".xlsx" extension! + } +// highlight-end +) WHERE `Index` < 45 +``` + +By default the workbook is parsed and `sheet_to_json` is used to pull data: + +```js +const { promise: alasql } = require("alasql"); + +(async() => { + const aoo = await alasql(`SELECT * from XLSX("pres.xlsb", {autoExt: false})`); + console.log(aoo); // [ { Name: "Bill Clinton", Index: 42 }, ...] +})(); +``` + +#### Writing Files + +The `XLSX` "into" target calls `XLSX.writeFile` under the hood: + +```js +const { promise: alasql } = require("alasql"); + +(async() => { + const data = [ + { Name: "Bill Clinton", Index: 42 }, + { Name: "Someone Else", Index: 47 } + ]; + await alasql(`SELECT * INTO XLSX("PresMod5.xlsb") FROM ?`, [data]); + /* PresMod5.xlsb will be created */ +})(); +``` + +### NodeJS Example + +:::note + +This demo was tested on 2023 February 23 against AlaSQL 3.1.0 + +::: + +1) Create an empty folder for the project: + +```bash +mkdir alasql +cd alasql +``` + +2) In the folder, create a stub `package.json` with the `xlsx` override: + +```json title="package.json" +{ + "overrides": { + "xlsx": "https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz" + } +} +``` + +3) Install SheetJS and AlaSQL: + +```bash +npm i --save alasql@3.1.0 https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz +``` + +4) Download the test file : + +```bash +curl -LO https://sheetjs.com/pres.numbers +``` + +5) Save the following test script to `SheetJSAlaSQL.js`: + +```js title="SheetJSAlaSQL.js" +const { promise: alasql } = require("alasql"); + +(async() => { + /* read data from spreadsheet to JS */ + const data = await alasql(` + SELECT \`Name\`, \`Index\` + FROM XLSX("pres.numbers", {autoExt:false}) + WHERE \`Index\` < 45 + `); + console.log(data); + + /* write data from JS to spreadsheet */ + data.push({Name: "Someone Else", Index: 47}); + await alasql(`SELECT * INTO XLSX("SheetJSAlaSQL1.xlsx") FROM ?`, [data]); +})(); +``` + +6) Run the test script + +```bash +node SheetJSAlaSQL.js +``` + +The output should display: + +``` +[ + { Name: 'Bill Clinton', Index: 42 }, + { Name: 'GeorgeW Bush', Index: 43 }, + { Name: 'Barack Obama', Index: 44 } +] +``` + +The script should generate `SheetJSAlaSQL1.xlsx` with the additional row: + +```csv +Name,Index +Bill Clinton,42 +GeorgeW Bush,43 +Barack Obama,44 +Someone Else,47 +``` diff --git a/docz/docs/03-demos/06-data/10-sql.md b/docz/docs/03-demos/06-data/10-sql.md new file mode 100644 index 0000000..94e1a46 --- /dev/null +++ b/docz/docs/03-demos/06-data/10-sql.md @@ -0,0 +1,307 @@ +--- +title: SQL Connectors +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + sql: true +--- + +import Tabs from '@theme/Tabs'; +import TabItem from '@theme/TabItem'; + +### Generating Tables + +This example will fetch , scan the columns of the +first worksheet to determine data types, and generate 6 PostgreSQL statements. + +
Explanation (click to show) + +The relevant `generate_sql` function takes a worksheet name and a table name: + +```js +// define mapping between determined types and PostgreSQL types +const PG = { "n": "float8", "s": "text", "b": "boolean" }; + +function generate_sql(ws, wsname) { + + // generate an array of objects from the data + const aoo = XLSX.utils.sheet_to_json(ws); + + // types will map column headers to types, while hdr holds headers in order + const types = {}, hdr = []; + + // loop across each row object + aoo.forEach(row => + // Object.entries returns a row of [key, value] pairs. Loop across those + Object.entries(row).forEach(([k,v]) => { + + // If this is first time seeing key, mark unknown and append header array + if(!types[k]) { types[k] = "?"; hdr.push(k); } + + // skip null and undefined + if(v == null) return; + + // check and resolve type + switch(typeof v) { + case "string": // strings are the broadest type + types[k] = "s"; break; + case "number": // if column is not string, number is the broadest type + if(types[k] != "s") types[k] = "n"; break; + case "boolean": // only mark boolean if column is unknown or boolean + if("?b".includes(types[k])) types[k] = "b"; break; + default: types[k] = "s"; break; // default to string type + } + }) + ); + + // The final array consists of the CREATE TABLE query and a series of INSERTs + return [ + // generate CREATE TABLE query and return batch + `CREATE TABLE \`${wsname}\` (${hdr.map(h => + // column name must be wrapped in backticks + `\`${h}\` ${PG[types[h]]}` + ).join(", ")});` + ].concat(aoo.map(row => { // generate INSERT query for each row + // entries will be an array of [key, value] pairs for the data in the row + const entries = Object.entries(row); + // fields will hold the column names and values will hold the values + const fields = [], values = []; + // check each key/value pair in the row + entries.forEach(([k,v]) => { + // skip null / undefined + if(v == null) return; + // column name must be wrapped in backticks + fields.push(`\`${k}\``); + // when the field type is numeric, `true` -> 1 and `false` -> 0 + if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); + // otherwise, + else values.push(`'${v.toString().replaceAll("'", "''")}'`); + }) + if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; + })).filter(x => x); // filter out skipped rows +} +``` + +
+ +```jsx live +function SheetJSQLWriter() { + // define mapping between determined types and PostgreSQL types + const PG = { "n": "float8", "s": "text", "b": "boolean" }; + function generate_sql(ws, wsname) { + const aoo = XLSX.utils.sheet_to_json(ws); + const types = {}, hdr = []; + // loop across each key in each column + aoo.forEach(row => Object.entries(row).forEach(([k,v]) => { + // set up type if header hasn't been seen + if(!types[k]) { types[k] = "?"; hdr.push(k); } + // check and resolve type + switch(typeof v) { + case "string": types[k] = "s"; break; + case "number": if(types[k] != "s") types[k] = "n"; break; + case "boolean": if("?b".includes(types[k])) types[k] = "b"; break; + default: types[k] = "s"; break; + } + })); + return [ + // generate CREATE TABLE query and return batch + `CREATE TABLE \`${wsname}\` (${hdr.map(h => `\`${h}\` ${PG[types[h]]}`).join(", ")});` + ].concat(aoo.map(row => { + const entries = Object.entries(row); + const fields = [], values = []; + entries.forEach(([k,v]) => { + if(v == null) return; + fields.push(`\`${k}\``); + if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); + else values.push(`'${v.toString().replaceAll("'", "''")}'`); + }) + if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; + })).filter(x => x).slice(0, 6); + } + const [url, setUrl] = React.useState("https://sheetjs.com/data/cd.xls"); + const set_url = React.useCallback((evt) => setUrl(evt.target.value)); + const [out, setOut] = React.useState(""); + const xport = React.useCallback(async() => { + const ab = await (await fetch(url)).arrayBuffer(); + const wb = XLSX.read(ab), wsname = wb.SheetNames[0]; + setOut(generate_sql(wb.Sheets[wsname], wsname).join("\n")); + }); + + return ( <> {out && (<>{url}
{out}
)} + URL: +
+ ); +} +``` + +## Databases + +### Query Builders + +Query builders are designed to simplify query generation and normalize field +types and other database minutiae. + +**Knex** + +The result of a `SELECT` statement is an array of objects: + +```js +const aoo = await connection.select("*").from("DataTable"); +const worksheet = XLSX.utils.json_to_sheet(aoa); +``` + +Knex wraps primitive types when creating a table. `generate_sql` takes a `knex` +connection object and uses the API: + +
Generating a Table (click to show) + +```js +// define mapping between determined types and Knex types +const PG = { "n": "float", "s": "text", "b": "boolean" }; + +async function generate_sql(knex, ws, wsname) { + + // generate an array of objects from the data + const aoo = XLSX.utils.sheet_to_json(ws); + + // types will map column headers to types, while hdr holds headers in order + const types = {}, hdr = []; + + // loop across each row object + aoo.forEach(row => + // Object.entries returns a row of [key, value] pairs. Loop across those + Object.entries(row).forEach(([k,v]) => { + + // If this is first time seeing key, mark unknown and append header array + if(!types[k]) { types[k] = "?"; hdr.push(k); } + + // skip null and undefined + if(v == null) return; + + // check and resolve type + switch(typeof v) { + case "string": // strings are the broadest type + types[k] = "s"; break; + case "number": // if column is not string, number is the broadest type + if(types[k] != "s") types[k] = "n"; break; + case "boolean": // only mark boolean if column is unknown or boolean + if("?b".includes(types[k])) types[k] = "b"; break; + default: types[k] = "s"; break; // default to string type + } + }) + ); + + await knex.schema.dropTableIfExists(wsname); + await knex.schema.createTable(wsname, (table) => { hdr.forEach(h => { table[PG[types[h]] || "text"](h); }); }); + for(let i = 0; i < aoo.length; ++i) { + if(!aoo[i] || !Object.keys(aoo[i]).length) continue; + try { await knex.insert(aoo[i]).into(wsname); } catch(e) {} + } + return knex; +} +``` + +
+ + +### Other SQL Databases + +The `generate_sql` function from ["Building Schemas from Worksheets"](#building-schemas-from-worksheets) +can be adapted to generate SQL statements for a variety of databases, including: + +**PostgreSQL** + +The `pg` connector library was tested against the `generate_sql` output as-is. + +The `rows` property of a query result is an array of objects that plays nice +with `json_to_sheet`: + +```js +const aoa = await connection.query(`SELECT * FROM DataTable`).rows; +const worksheet = XLSX.utils.json_to_sheet(aoa); +``` + +**MySQL / MariaDB** + +The `mysql2` connector library was tested. The differences are shown below, +primarily stemming from the different quoting requirements and field types. + +
Differences (click to show) + +```js +// highlight-start +// define mapping between determined types and MySQL types +const PG = { "n": "REAL", "s": "TEXT", "b": "TINYINT" }; +// highlight-end + +function generate_sql(ws, wsname) { + + // generate an array of objects from the data + const aoo = XLSX.utils.sheet_to_json(ws); + + // types will map column headers to types, while hdr holds headers in order + const types = {}, hdr = []; + + // loop across each row object + aoo.forEach(row => + // Object.entries returns a row of [key, value] pairs. Loop across those + Object.entries(row).forEach(([k,v]) => { + + // If this is first time seeing key, mark unknown and append header array + if(!types[k]) { types[k] = "?"; hdr.push(k); } + + // skip null and undefined + if(v == null) return; + + // check and resolve type + switch(typeof v) { + case "string": // strings are the broadest type + types[k] = "s"; break; + case "number": // if column is not string, number is the broadest type + if(types[k] != "s") types[k] = "n"; break; + case "boolean": // only mark boolean if column is unknown or boolean + if("?b".includes(types[k])) types[k] = "b"; break; + default: types[k] = "s"; break; // default to string type + } + }) + ); + + // The final array consists of the CREATE TABLE query and a series of INSERTs + return [ + // generate CREATE TABLE query and return batch + // highlight-next-line + `CREATE TABLE ${wsname} (${hdr.map(h => + // highlight-next-line + `${h} ${PG[types[h]]}` + ).join(", ")});` + ].concat(aoo.map(row => { // generate INSERT query for each row + // entries will be an array of [key, value] pairs for the data in the row + const entries = Object.entries(row); + // fields will hold the column names and values will hold the values + const fields = [], values = []; + // check each key/value pair in the row + entries.forEach(([k,v]) => { + // skip null / undefined + if(v == null) return; + // highlight-next-line + fields.push(`${k}`); + // when the field type is numeric, `true` -> 1 and `false` -> 0 + if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); + // otherwise, + // highlight-next-line + else values.push(`"${v.toString().replaceAll('"', '""')}"`); + }) + if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; + })).filter(x => x); // filter out skipped rows +} +``` + +
+ +The first property of a query result is an array of objects that plays nice +with `json_to_sheet`: + +```js +const aoa = await connection.query(`SELECT * FROM DataTable`)[0]; +const worksheet = XLSX.utils.json_to_sheet(aoa); +``` diff --git a/docz/docs/03-demos/06-data/25-mongodb.md b/docz/docs/03-demos/06-data/25-mongodb.md new file mode 100644 index 0000000..e3b2450 --- /dev/null +++ b/docz/docs/03-demos/06-data/25-mongodb.md @@ -0,0 +1,127 @@ +--- +title: MongoDB +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + type: document +--- + +MongoDB is a popular document-oriented database engine. + +It is straightforward to treat collections as worksheets. Each object maps to +a row in the table. + +## Integration Details + +The official NodeJS connector is `mongodb`. + +#### Importing Data + +Data stored in an array of objects can be added to MongoDB Collections using +`Collection#insertMany`. `sheet_to_json` can generate data from worksheets: + +```js +/* import data from a worksheet to a collection */ +const aoo = XLSX.utils.sheet_to_json(ws); +await collection.insertMany(aoo, {ordered: true}); +``` +#### Exporting Data + +`Collection#find` can pull an array of objects from a Mongo Collection. + +Normally the method adds a `_id` field to each object. The recommended way to +remove the field is to use a `projection` to suppress the ID: + +```js +/* generate an array of objects from a collection */ +const aoo = await collection.find({}, {projection:{_id:0}}).toArray(); +``` + +Worksheets can be generated from the result using `json_to_sheet`: + +```js +/* generate a worksheet from a collection */ +const aoo = await collection.find({}, {projection:{_id:0}}).toArray(); +const ws = utils.json_to_sheet(aoo); +``` + +## Complete Example + +:::note + +This demo was last tested on 2023 February 23 with MongoDB CE 6.0.4, MongoDB +connector module 5.1.0 and NodeJS 18.14.2. + +::: + +0) Install MongoDB 6.0 Community Edition. The macOS steps required `brew`: + +```bash +brew tap mongodb/brew +brew update +brew install mongodb-community@6.0 +``` + +1) Start a MongoDB server on `localhost` (follow official instructions). To run +in the foreground on Intel MacOS: + +```bash +/usr/local/opt/mongodb-community/bin/mongod --config /usr/local/etc/mongod.conf +``` + +2) Create base project and install the dependencies: + +```bash +mkdir sheetjs-mongo +cd sheetjs-mongo +npm init -y +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz mongodb@5.1.0 +``` + +3) Save the following to `SheetJSMongoCRUD.mjs` (the key step is highlighted): + +```js title="SheetJSMongoCRUD.mjs" +import { writeFile, set_fs, utils } from 'xlsx'; +import * as fs from 'fs'; set_fs(fs); +import { MongoClient } from 'mongodb'; + +const url = 'mongodb://localhost:27017/sheetjs'; +const db_name = 'sheetjs'; + +/* Connect to mongodb server */ +const client = await MongoClient.connect(url, { useUnifiedTopology: true }); + +/* Sample data table */ +const db = client.db(db_name); +try { await db.collection('pres').drop(); } catch(e) {} +const pres = db.collection('pres'); +await pres.insertMany([ + { name: "Barack Obama", idx: 44 }, + { name: "Donald Trump", idx: 45 }, + { name: "Joseph Biden", idx: 46 } +], {ordered: true}); + +// highlight-start +/* Create worksheet from collection */ +const aoo = await pres.find({}, {projection:{_id:0}}).toArray(); +const ws = utils.json_to_sheet(aoo); +// highlight-end + +/* Export to XLSX */ +const wb = utils.book_new(); +utils.book_append_sheet(wb, ws, "Presidents"); +writeFile(wb, "SheetJSMongoCRUD.xlsx"); + +/* Close connection */ +client.close(); +``` + +This script: + +- connects to the local MongoDB server using database `sheetjs` +- removes the `pres` collection if it already exists +- creates a new collection `pres` with sample data +- creates a SheetJS worksheet from the collection (highlighted in the snippet) +- creates a SheetJS workbook, adds the worksheet, and exports to XLSX + +4) Run `node SheetJSMongoCRUD.mjs` and open `SheetJSMongoCRUD.xlsx` diff --git a/docz/docs/03-demos/06-data/26-redis.md b/docz/docs/03-demos/06-data/26-redis.md new file mode 100644 index 0000000..41b4ea5 --- /dev/null +++ b/docz/docs/03-demos/06-data/26-redis.md @@ -0,0 +1,211 @@ +--- +title: Redis +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + type: nosql +--- + +Redis has 5 core data types: "String", List", "Set", "Sorted Set", and "Hash". +Since the keys and values are limited to simple strings (and numbers), it is +possible to store complete databases in a single worksheet. + +![SheetJSRedis.xlsx](pathname:///nosql/sheetjsredis.png) + +## Integration Details + +:::note + +[`SheetJSRedis.mjs`](pathname:///nosql/SheetJSRedis.mjs) exports the methods: +- `redis_to_ws` creates a SheetJS worksheet by querying a redis client +- `ws_to_redis` creates an array of query objects from the SheetJS worksheet + +::: + +The first row holds the data type and the second row holds the property name. + +The "Exporting Data" snippets generate arrays of arrays that can be added to a +worksheet using `sheet_add_aoa`. Since the data is column-oriented, the goal is +to add the data starting in the first row of the column after the data: + +```js +function add_aoa_to_next_column(worksheet, aoa) { + /* get range of worksheet */ + const range = XLSX.utils.decode_range(worksheet["!ref"]) + /* the origin to write new data will start in the column after the range */ + const origin = XLSX.utils.encode_cell({ + r: 0, // start on first row + c: range.e.c + 1 // column after end + }); + /* add data */ + XLSX.utils.sheet_add_aoa(worksheet, aoa, { origin }); +} +``` + +The "Importing Data" snippets generate redis queries. The `ws_to_redis` function +first generates an array of arrays with `sheet_to_json`: + +```js +const aoa = XLSX.utils.sheet_to_json(worksheet, { header: 1 }); +``` + +#### Strings + +Strings can be stored in a unified String table. The first column holds keys +and the second column holds values: + +``` +XXX| A | B | +---+---------+-------+ + 1 | Strings | | + 2 | | | + 3 | Hello | World | + 4 | Sheet | JS | +``` + +The SheetJS array-of-arrays representation of the string table is an array of +key/value pairs: + +```js +const aoa = ["Strings"]; aoa.length = 2; // [ "Strings", empty ] +const keys = await client.KEYS("*"); +for(let key of keys) { + const type = await client.TYPE(key); + // highlight-start + if(type == "string") aoa.push([key, await client.GET(key)]); + // highlight-end +} +``` + +#### Lists + +Lists are unidimensional and can be stored in their own columns. + +``` +XXX| C | +---+---------+ + 1 | List | + 2 | List1 | + 3 | List1V1 | + 4 | List1V2 | +``` + +The SheetJS array-of-arrays representation of lists is a column of values. +`LRANGE` returns a simple array of values. `sheet_add_aoa` interprets the result +as one row. The code transposes the result with `values.map(v => [v])`. + +```js +const values = await client.LRANGE(key, 0, -1); +const aoa = [ ["List"], [key] ].concat(values.map(v => [v])); +``` + +#### Sets + +Sets are unidimensional and can be stored in their own columns. + +``` +XXX| D | +---+-------+ + 1 | Set | + 2 | Set1 | + 3 | Set1A | + 4 | Set1B | +``` + +The SheetJS array-of-arrays representation of sets is a column of values. +`SMEMBERS` returns a simple array of values. `sheet_add_aoa` interprets result +as one row. The code transposes the result with `values.map(v => [v])`. + +```js +const values = await client.SMEMBERS(key); +const aoa = [ ["Set"], [key] ].concat(values.map(v => [v])); +``` + +#### Sorted Sets + +Sorted Sets have an associated score which can be stored in the second column. + +``` +XXX| E | F | +---+---------+---+ + 1 | Sorted | | + 2 | ZSet1 | | + 3 | Key1 | 1 | + 4 | Key2 | 2 | +``` + +The SheetJS array-of-arrays representation is an array of key/score pairs. +`ZRANGE_WITHSCORES` returns an array of objects which can be reshaped. + +```js +const values = await client.ZRANGE_WITHSCORES(key, 0, -1); +const aoa = [ ["Sorted"], [key] ].concat(values.map(v => [v.value, v.score])); +``` + +#### Hashes + +Hashes are stored like the string table, with key and value columns in order. + +``` +XXX| G | H | +---+-------+-------+ + 1 | Hash | | + 2 | Hash1 | | + 3 | Key1 | Val1 | + 4 | Key2 | Val2 | +``` + +The SheetJS array-of-arrays representation is an array of key/value pairs. +`HGETALL` returns a plain object which can be converted using `Object.entries`: + +```js +const values = await client.HGETALL(key); +const aoa = [ ["Hash"], [key] ].concat(Object.entries(values)); +``` + +## Complete Example + +:::note + +This demo was last tested on 2023 February 23 with Redis 7.0.8, Redis connector +module 4.6.4 and NodeJS 18.14.2. + +::: + +:::warning + +The most recent version of the `redis` node module does not work with most +versions of NodeJS. It is "ESM-only", requiring NodeJS 18 or later. As a result, +this demo also requires NodeJS version 18. + +Questions regarding the `redis` library and the decision to drop traditional +NodeJS "CommonJS" module support should be directed to the Redis team. + +::: + +0) Set up and start a local Redis server. On Intel macOS: + +```bash +brew install redis@7.0.8 +``` + +1) Download the following scripts: + +- [`SheetJSRedis.mjs`](pathname:///nosql/SheetJSRedis.mjs) +- [`SheetJSRedisTest.mjs`](pathname:///nosql/SheetJSRedisTest.mjs) + +```bash +curl -LO https://docs.sheetjs.com/nosql/SheetJSRedis.mjs +curl -LO https://docs.sheetjs.com/nosql/SheetJSRedisTest.mjs +``` + +2) Install dependencies and run: + +```bash +npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz redis@4.6.4 +node SheetJSRedisTest.mjs +``` + +Inspect the output and compare with the data in `SheetJSRedisTest.mjs`. + +Open `SheetJSRedis.xlsx` and verify the columns have the correct data diff --git a/docz/docs/03-demos/06-data/29-pouchdb.md b/docz/docs/03-demos/06-data/29-pouchdb.md new file mode 100644 index 0000000..c8c2b0d --- /dev/null +++ b/docz/docs/03-demos/06-data/29-pouchdb.md @@ -0,0 +1,105 @@ +--- +title: PouchDB +pagination_prev: demos/grid +pagination_next: demos/worker +sidebar_custom_props: + type: nosql +--- + +PouchDB is a pure JS database with built-in synchronization features. + +## Integration Details + +`Database#allDocs` is the standard approach for bulk data export. The generated +row objects have additional `_id` and `_rev` keys that should be removed. + +Nested objects must be flattened. The ["Tutorial"](/docs/getting-started/example) +includes an example of constructing a simple array. + +```js +function export_pouchdb_to_xlsx(db) { + /* fetch all rows, including the underlying data */ + db.allDocs({include_docs: true}, function(err, doc) { + + /* pull the individual data rows */ + const aoo = doc.rows.map(r => { + /* `rest` will include every field from `r` except for _id and _rev */ + const { _id, _rev, ...rest } = r; + return rest; + }); + + /* generate worksheet */ + const ws = XLSX.utils.json_to_sheet(aoo); + + /* generate workbook and export */ + const wb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); + XLSX.writeFile(wb, "SheetJSPouch.xlsx"); + }); +} +``` + +## Complete Example + +0) Download the "Working Version" from the Getting Started guide. + +The ZIP file should have `MD5` checksum `ac4da7cb0cade1be293ba222462f109c`: + +```bash +curl -LO https://github.com/nickcolley/getting-started-todo/archive/master.zip +md5sum master.zip || md5 master.zip +### the checksum will be printed +``` + +If the download is unavailable, a mirror is available at + + +1) Unzip the `master.zip` file and enter the folder: + +```bash +unzip master.zip +cd getting-started-todo-master +``` + +2) Edit `index.html` to reference the SheetJS library and add a button: + +```html title="index.html" + + + + + +
+``` + +3) Just before the end of `app.js`, add a `click` event listener: + +```js title="app.js" + if (remoteCouch) { + sync(); + } + + // highlight-start + document.getElementById("xport").addEventListener("click", function() { + db.allDocs({include_docs: true}, function(err, doc) { + const aoo = doc.rows.map(r => { + const { _id, _rev, ... rest } = r.doc; + return rest; + }); + const ws = XLSX.utils.json_to_sheet(aoo); + const wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); + XLSX.writeFile(wb, "SheetJSPouch.xlsx"); + }); + }); + // highlight-end +})(); +``` + +4) Start a local web server: + +```bash +npx http-server . +``` + +Access `http://localhost:8080` from your browser. Add a few items and click +the "Export!" button to generate a new file. diff --git a/docz/docs/03-demos/06-data/_category_.json b/docz/docs/03-demos/06-data/_category_.json new file mode 100644 index 0000000..119bcd9 --- /dev/null +++ b/docz/docs/03-demos/06-data/_category_.json @@ -0,0 +1,5 @@ +{ + "label": "Databases and Stores", + "position": 6, + "collapsed": false +} \ No newline at end of file diff --git a/docz/docs/03-demos/06-data/index.md b/docz/docs/03-demos/06-data/index.md new file mode 100644 index 0000000..b1211d0 --- /dev/null +++ b/docz/docs/03-demos/06-data/index.md @@ -0,0 +1,222 @@ +--- +title: Databases and Stores +pagination_prev: demos/grid +pagination_next: demos/worker +--- + +import DocCardList from '@theme/DocCardList'; +import {useCurrentSidebarCategory} from '@docusaurus/theme-common'; + +"Database" is a catch-all term referring to traditional RDBMS as well as K/V +stores, document databases, and other "NoSQL" storages. There are many external +database systems as well as browser APIs like WebSQL and `localStorage` + +## Data Storage + +### Structured Tables + +Database tables are a common import and export target for spreadsheets. One +common representation of a database table is an array of JS objects whose keys +are column headers and whose values are the underlying data values. For example, + +| Name | Index | +| :----------- | ----: | +| Barack Obama | 44 | +| Donald Trump | 45 | +| Joseph Biden | 46 | + +is naturally represented as an array of objects + +```js +[ + { Name: "Barack Obama", Index: 44 }, + { Name: "Donald Trump", Index: 45 }, + { Name: "Joseph Biden", Index: 46 } +] +``` + +The `sheet_to_json` and `json_to_sheet` helper functions work with objects of +similar shape, converting to and from worksheet objects. The corresponding +worksheet would include a header row for the labels: + +``` +XXX| A | B | +---+--------------+-------+ + 1 | Name | Index | + 2 | Barack Obama | 44 | + 3 | Donald Trump | 45 | + 3 | Joseph Biden | 46 | +``` + +### Unstructured Data + +"Schema-less" / "NoSQL" databases allow for arbitrary keys and values within the +entries in the database. K/V stores and Objects add additional restrictions. + +There is no natural way to translate arbitrarily shaped schemas to worksheets +in a workbook. One common trick is to dedicate one worksheet to holding named +keys. For example, considering the JS object: + +```json +{ + "title": "SheetDB", + "metadata": { + "author": "SheetJS", + "code": 7262 + }, + "data": [ + { "Name": "Barack Obama", "Index": 44 }, + { "Name": "Donald Trump", "Index": 45 }, + ] +} +``` + +A dedicated worksheet should store the one-off named values: + +``` +XXX| A | B | +---+-----------------+---------+ + 1 | Path | Value | + 2 | title | SheetDB | + 3 | metadata.author | SheetJS | + 4 | metadata.code | 7262 | +``` + +## Data Interchange + +### Exporting Data + +There are NodeJS connector libraries for many popular RDBMS systems. Libraries +have facilities for connecting to a database, executing queries, and obtaining +results as arrays of JS objects that can be passed to `json_to_sheet`. The main +differences surround API shape and supported data types. + +For example, `better-sqlite3` is a connector library for SQLite. The result of +a `SELECT` query is an array of objects suitable for `json_to_sheet`: + +```js +var aoo = db.prepare("SELECT * FROM 'Presidents' LIMIT 100000").all(); +// highlight-next-line +var worksheet = XLSX.utils.json_to_sheet(aoo); +``` + +Other databases will require post-processing. For example, MongoDB results +include the Object ID (usually stored in the `_id` key). This can be removed +before generating a worksheet: + +```js +const aoo = await db.collection('coll').find({}).toArray(); +// highlight-next-line +aoo.forEach((x) => delete x._id); +const ws = XLSX.utils.json_to_sheet(aoo); +``` + +### Importing Data + +When a strict schema is needed, the `sheet_to_json` helper function generates +arrays of JS objects that can be scanned to determine the column "types". + +:::note + +Document databases like MongoDB tend not to require schemas. Arrays of objects +can be used directly without setting up a schema: + +```js +const aoo = XLSX.utils.sheet_to_json(ws); +// highlight-next-line +await db.collection('coll').insertMany(aoo, { ordered: true }); +``` + +::: + +The ["SQL Connectors"](/docs/demos/data/sql) demo includes sample functions for +generating SQL CREATE TABLE and INSERT queries. + +## DSV Interchange + +Many databases offer utilities for reading and writing CSV, pipe-separated +documents, and other simple data files. They enable workflows where the library +generates CSV data for the database to process or where the library parses CSV +files created by the database. + +#### Worksheet to CSV + +CSV data can be generated from worksheets using `XLSX.utils.sheet_to_csv`. + +```js +// starting from a worksheet object +const csv = XLSX.utils.sheet_to_json(ws); + +// whole workbook conversion +const csv_arr = wb.SheetNames.map(n => XLSX.utils.sheet_to_json(wb.Sheets[n])); +``` + +#### CSV to Worksheet + +`XLSX.read` can read strings with CSV data. It will generate single-sheet +workbooks with worksheet name `Sheet1`. + +Where supported, `XLSX.readFile` can read files. + +```js +// starting from a CSV string +const ws_str = XLSX.read(csv_str, {type: "string"}).Sheets.Sheet1; + +// starting from a CSV binary string (e.g. `FileReader#readAsBinaryString`) +const ws_bstr = XLSX.read(csv_bstr, {type: "binary"}).Sheets.Sheet1; + +// starting from a CSV file in NodeJS or Bun or Deno +const ws_file = XLSX.readFile("test.csv").Sheets.Sheet1; +``` + +## Demos + +### Web APIs + +The following Web APIs are featured in separate demos: + +
    {useCurrentSidebarCategory().items.filter(item => item.customProps?.type == "web").map(item => { + const listyle = (item.customProps?.icon) ? { + listStyleImage: `url("${item.customProps.icon}")` + } : {}; + return (
  • + {item.label}{item.customProps?.summary && (" - " + item.customProps.summary)} +
  • ); +})}
+ +### SQL Databases + +The following SQL-related topics are covered in separate demos: + +
    {useCurrentSidebarCategory().items.filter(item => item.customProps?.sql).map(item => { + const listyle = (item.customProps?.icon) ? { + listStyleImage: `url("${item.customProps.icon}")` + } : {}; + return (
  • + {item.label}{item.customProps?.summary && (" - " + item.customProps.summary)} +
  • ); +})}
+ +### NoSQL Data Stores + +Demos for the following "NoSQL" data stores apply structured access patterns: + +
    {useCurrentSidebarCategory().items.filter(item => item.customProps?.type == "document").map(item => { + const listyle = (item.customProps?.icon) ? { + listStyleImage: `url("${item.customProps.icon}")` + } : {}; + return (
  • + {item.label}{item.customProps?.summary && (" - " + item.customProps.summary)} +
  • ); +})}
+ +Demos for the following "NoSQL" data stores apply unstructured access patterns: + +
    {useCurrentSidebarCategory().items.filter(item => item.customProps?.type == "nosql").map(item => { + const listyle = (item.customProps?.icon) ? { + listStyleImage: `url("${item.customProps.icon}")` + } : {}; + return (
  • + {item.label}{item.customProps?.summary && (" - " + item.customProps.summary)} +
  • ); +})}
diff --git a/docz/docs/03-demos/06-database.md b/docz/docs/03-demos/06-database.md deleted file mode 100644 index 3386b52..0000000 --- a/docz/docs/03-demos/06-database.md +++ /dev/null @@ -1,859 +0,0 @@ ---- -title: Databases and SQL ---- - -import current from '/version.js'; -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - - -"Database" is a catch-all term referring to traditional RDBMS as well as K/V -stores, document databases, and other "NoSQL" storages. There are many external -database systems as well as browser APIs like WebSQL and `localStorage` - -This demo discusses general strategies and provides examples for a variety of -database systems. The examples are merely intended to demonstrate very basic -functionality. - -Key-value stores, unstructured use of Document Databases, and other schema-less -databases are covered in the [NoSQL demo](/docs/demos/nosql). - - -## Structured Tables - -Database tables are a common import and export target for spreadsheets. One -common representation of a database table is an array of JS objects whose keys -are column headers and whose values are the underlying data values. For example, - -| Name | Index | -| :----------- | ----: | -| Barack Obama | 44 | -| Donald Trump | 45 | -| Joseph Biden | 46 | - -is naturally represented as an array of objects - -```js -[ - { Name: "Barack Obama", Index: 44 }, - { Name: "Donald Trump", Index: 45 }, - { Name: "Joseph Biden", Index: 46 } -] -``` - -The `sheet_to_json` and `json_to_sheet` helper functions work with objects of -similar shape, converting to and from worksheet objects. The corresponding -worksheet would include a header row for the labels: - -``` -XXX| A | B | ----+--------------+-------+ - 1 | Name | Index | - 2 | Barack Obama | 44 | - 3 | Donald Trump | 45 | - 3 | Joseph Biden | 46 | -``` - - -### Building Worksheets from Structured Tables - -There are NodeJS connector libraries for many popular RDBMS systems. Libraries -have facilities for connecting to a database, executing queries, and obtaining -results as arrays of JS objects that can be passed to `json_to_sheet`. The main -differences surround API shape and supported data types. - -For example, `better-sqlite3` is a connector library for SQLite. The result of -a `SELECT` query is an array of objects suitable for `json_to_sheet`: - -```js -var aoo = db.prepare("SELECT * FROM 'Presidents' LIMIT 100000").all(); -// highlight-next-line -var worksheet = XLSX.utils.json_to_sheet(aoo); -``` - -Other databases will require post-processing. For example, MongoDB results -include the Object ID (usually stored in the `_id` key). This can be removed -before generating a worksheet: - -```js -const aoo = await db.collection('coll').find({}).toArray(); -// highlight-next-line -aoo.forEach((x) => delete x._id); -const ws = XLSX.utils.json_to_sheet(aoo); -``` - -### Building Schemas from Worksheets - -When a strict schema is needed, the `sheet_to_json` helper function generates -arrays of JS objects that can be scanned to determine the column "types". - -:::note - -Document databases like MongoDB tend not to require schemas. Arrays of objects -can be used directly without setting up a schema: - -```js -const aoo = XLSX.utils.sheet_to_json(ws); -// highlight-next-line -await db.collection('coll').insertMany(aoo, { ordered: true }); -``` - -::: - -This example will fetch , scan the columns of the -first worksheet to determine data types, and generate 6 PostgreSQL statements. - -
Explanation (click to show) - -The relevant `generate_sql` function takes a worksheet name and a table name: - -```js -// define mapping between determined types and PostgreSQL types -const PG = { "n": "float8", "s": "text", "b": "boolean" }; - -function generate_sql(ws, wsname) { - - // generate an array of objects from the data - const aoo = XLSX.utils.sheet_to_json(ws); - - // types will map column headers to types, while hdr holds headers in order - const types = {}, hdr = []; - - // loop across each row object - aoo.forEach(row => - // Object.entries returns a row of [key, value] pairs. Loop across those - Object.entries(row).forEach(([k,v]) => { - - // If this is first time seeing key, mark unknown and append header array - if(!types[k]) { types[k] = "?"; hdr.push(k); } - - // skip null and undefined - if(v == null) return; - - // check and resolve type - switch(typeof v) { - case "string": // strings are the broadest type - types[k] = "s"; break; - case "number": // if column is not string, number is the broadest type - if(types[k] != "s") types[k] = "n"; break; - case "boolean": // only mark boolean if column is unknown or boolean - if("?b".includes(types[k])) types[k] = "b"; break; - default: types[k] = "s"; break; // default to string type - } - }) - ); - - // The final array consists of the CREATE TABLE query and a series of INSERTs - return [ - // generate CREATE TABLE query and return batch - `CREATE TABLE \`${wsname}\` (${hdr.map(h => - // column name must be wrapped in backticks - `\`${h}\` ${PG[types[h]]}` - ).join(", ")});` - ].concat(aoo.map(row => { // generate INSERT query for each row - // entries will be an array of [key, value] pairs for the data in the row - const entries = Object.entries(row); - // fields will hold the column names and values will hold the values - const fields = [], values = []; - // check each key/value pair in the row - entries.forEach(([k,v]) => { - // skip null / undefined - if(v == null) return; - // column name must be wrapped in backticks - fields.push(`\`${k}\``); - // when the field type is numeric, `true` -> 1 and `false` -> 0 - if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); - // otherwise, - else values.push(`'${v.toString().replaceAll("'", "''")}'`); - }) - if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; - })).filter(x => x); // filter out skipped rows -} -``` - -
- -```jsx live -function SheetJSQLWriter() { - // define mapping between determined types and PostgreSQL types - const PG = { "n": "float8", "s": "text", "b": "boolean" }; - function generate_sql(ws, wsname) { - const aoo = XLSX.utils.sheet_to_json(ws); - const types = {}, hdr = []; - // loop across each key in each column - aoo.forEach(row => Object.entries(row).forEach(([k,v]) => { - // set up type if header hasn't been seen - if(!types[k]) { types[k] = "?"; hdr.push(k); } - // check and resolve type - switch(typeof v) { - case "string": types[k] = "s"; break; - case "number": if(types[k] != "s") types[k] = "n"; break; - case "boolean": if("?b".includes(types[k])) types[k] = "b"; break; - default: types[k] = "s"; break; - } - })); - return [ - // generate CREATE TABLE query and return batch - `CREATE TABLE \`${wsname}\` (${hdr.map(h => `\`${h}\` ${PG[types[h]]}`).join(", ")});` - ].concat(aoo.map(row => { - const entries = Object.entries(row); - const fields = [], values = []; - entries.forEach(([k,v]) => { - if(v == null) return; - fields.push(`\`${k}\``); - if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); - else values.push(`'${v.toString().replaceAll("'", "''")}'`); - }) - if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; - })).filter(x => x).slice(0, 6); - } - const [url, setUrl] = React.useState("https://sheetjs.com/data/cd.xls"); - const set_url = React.useCallback((evt) => setUrl(evt.target.value)); - const [out, setOut] = React.useState(""); - const xport = React.useCallback(async() => { - const ab = await (await fetch(url)).arrayBuffer(); - const wb = XLSX.read(ab), wsname = wb.SheetNames[0]; - setOut(generate_sql(wb.Sheets[wsname], wsname).join("\n")); - }); - - return ( <> {out && (<>{url}
{out}
)} - URL: -
- ); -} -``` - -### DSV Interchange - -Many databases offer utilities for reading and writing CSV, pipe-separated -documents, and other simple data files. They enable workflows where the library -generates CSV data for the database to process or where the library parses CSV -files created by the database. - -#### Worksheet to CSV - -CSV data can be generated from worksheets using `XLSX.utils.sheet_to_csv`. - -```js -// starting from a worksheet object -const csv = XLSX.utils.sheet_to_json(ws); - -// whole workbook conversion -const csv_arr = wb.SheetNames.map(n => XLSX.utils.sheet_to_json(wb.Sheets[n])); -``` - -#### CSV to Worksheet - -`XLSX.read` can read strings with CSV data. It will generate single-sheet -workbooks with worksheet name `Sheet1`. - -Where supported, `XLSX.readFile` can read files. - -```js -// starting from a CSV string -const ws_str = XLSX.read(csv_str, {type: "string"}).Sheets.Sheet1; - -// starting from a CSV binary string (e.g. `FileReader#readAsBinaryString`) -const ws_bstr = XLSX.read(csv_bstr, {type: "binary"}).Sheets.Sheet1; - -// starting from a CSV file in NodeJS or Bun or Deno -const ws_file = XLSX.readFile("test.csv").Sheets.Sheet1; -``` - -## Databases - -### SQLite - -Most platforms offer a simple way to query SQLite databases. - -The following example shows how to query for each table in an SQLite database, -query for the data for each table, add each non-empty table to a workbook, and -export as XLSX. - -[The Northwind database is available in SQLite form](https://raw.githubusercontent.com/jpwhite3/northwind-SQLite3/master/dist/northwind.db). - - - - -The **`better-sqlite3`** module provides a very simple API for working with -SQLite databases. `Statement#all` runs a prepared statement and returns an array -of JS objects. - -1) Install the dependencies: - -```bash -npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz better-sqlite3 -``` - -2) Save the following to `node.mjs`: - -```js title="node.mjs" -/* Load SQLite3 connector library */ -import Database from "better-sqlite3"; - -/* Load SheetJS library */ -import * as XLSX from 'xlsx'; -import * as fs from 'fs'; -XLSX.set_fs(fs); - -/* Initialize database */ -var db = Database("northwind.db"); - -/* Create new workbook */ -var wb = XLSX.utils.book_new(); - -/* Get list of table names */ -var sql = db.prepare("SELECT name FROM sqlite_master WHERE type='table'"); -var result = sql.all(); - -/* Loop across each name */ -result.forEach(function(row) { - /* Get first 100K rows */ - var aoo = db.prepare("SELECT * FROM '" + row.name + "' LIMIT 100000").all(); - if(aoo.length > 0) { - /* Create Worksheet from the row objects */ - var ws = XLSX.utils.json_to_sheet(aoo, {dense: true}); - /* Add to Workbook */ - XLSX.utils.book_append_sheet(wb, ws, row.name); - } -}); - -/* Write File */ -XLSX.writeFile(wb, "node.xlsx"); -``` - -3) Run `node node.mjs` and open `node.xlsx` - - - - -Bun ships with a built-in high-performance module `bun:sqlite`. - -1) Install the dependencies: - -```bash -npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz -``` - -2) Save the following to `bun.mjs`: - -```js title="bun.mjs" -/* Load SQLite3 connector library */ -import { Database } from "bun:sqlite"; - -/* Load SheetJS library */ -import * as XLSX from 'xlsx'; -import * as fs from 'fs'; -XLSX.set_fs(fs); - -/* Initialize database */ -var db = Database.open("northwind.db"); - -/* Create new workbook */ -var wb = XLSX.utils.book_new(); - -/* Get list of table names */ -var sql = db.prepare("SELECT name FROM sqlite_master WHERE type='table'"); -var result = sql.all(); - -/* Loop across each name */ -result.forEach(function(row) { - /* Get first 100K rows */ - var aoo = db.prepare("SELECT * FROM '" + row.name + "' LIMIT 100000").all(); - if(aoo.length > 0) { - /* Create Worksheet from the row objects */ - var ws = XLSX.utils.json_to_sheet(aoo, {dense: true}); - /* Add to Workbook */ - XLSX.utils.book_append_sheet(wb, ws, row.name); - } -}); - -/* Write File */ -XLSX.writeFile(wb, "bun.xlsx"); -``` - -3) Run `bun bun.mjs` and open `bun.xlsx` - - - - -Deno `sqlite` library returns raw arrays of arrays. - -1) Save the following to `deno.ts`: - -```ts title="deno.ts" -/* Load SQLite3 connector library */ -import { DB } from "https://deno.land/x/sqlite/mod.ts"; - -/* Load SheetJS library */ -// @deno-types="https://cdn.sheetjs.com/xlsx-latest/package/types/index.d.ts" -import * as XLSX from 'https://cdn.sheetjs.com/xlsx-latest/package/xlsx.mjs'; - -/* Initialize database */ -var db = new DB("northwind.db"); - -/* Create new workbook */ -var wb = XLSX.utils.book_new(); - -/* Get list of table names */ -var sql = db.prepareQuery("SELECT name FROM sqlite_master WHERE type='table'"); -var result = sql.all(); -/* Loop across each name */ -result.forEach(function(row) { - /* Get first 100K rows */ - var query = db.prepareQuery("SELECT * FROM '" + row[0] + "' LIMIT 100000") - var aoa = query.all(); - if(aoa.length > 0) { - /* Create array of arrays */ - var data = [query.columns().map(x => x.name)].concat(aoa); - /* Create Worksheet from the aoa */ - var ws = XLSX.utils.aoa_to_sheet(data, {dense: true}); - /* Add to Workbook */ - XLSX.utils.book_append_sheet(wb, ws, row[0]); - } -}); - -/* Write File */ -XLSX.writeFile(wb, "deno.xlsx"); -``` - -3) Run `deno run --allow-read --allow-write deno.ts` and open `deno.xlsx` - - - - - -### WebSQL - -:::warning - -This information is included for legacy deployments. Web SQL is deprecated. - -:::: - -WebSQL was a popular SQL-based in-browser database available on Chrome. In -practice, it is powered by SQLite, and most simple SQLite-compatible queries -work as-is in WebSQL. - -The public demo generates a database from workbook. - -Importing data from spreadsheets is straightforward using the `generate_sql` -helper function from ["Building Schemas"](#building-schemas-from-worksheets): - -```js -const db = openDatabase('sheetql', '1.0', 'SheetJS WebSQL Test', 2097152); -const stmts = generate_sql(ws, wsname); -// NOTE: tx.executeSql and db.transaction use callbacks. This wraps in Promises -for(var i = 0; i < stmts.length; ++i) await new Promise((res, rej) => { - db.transaction(tx => - tx.executeSql(stmts[i], [], - (tx, data) => res(data), // if the query is successful, return the data - (tx, err) => rej(err) // if the query fails, reject with the error - )); -}); -``` - -The result of a SQL SELECT statement is a `SQLResultSet`. The `rows` property -is a `SQLResultSetRowList`. It is an "array-like" structure that has `length` -and properties like `0`, `1`, etc. However, this is not a real Array object. -A real Array can be created using `Array.from`: - -```js -const db = openDatabase('sheetql', '1.0', 'SheetJS WebSQL Test', 2097152); -db.readTransaction(tx => - tx.executeSQL("SELECT * FROM DatabaseTable", [], (tx, data) => { - // data.rows is "array-like", so `Array.from` can make it a real array - const aoo = Array.from(data.rows); - const ws = XLSX.utils.json_to_sheet(aoo); - // ... it is recommended to perform an export here OR wrap in a Promise - }) -); -``` - -The following demo generates a database with 5 fixed SQL statements. Queries -can be changed in the Live Editor. The WebSQL database can be inspected in the -"WebSQL" section of the "Application" Tab of Developer Tools: - -![WebSQL view in Developer Tools](pathname:///files/websql.png) - -```jsx live -function SheetQL() { - const [out, setOut] = React.useState(""); - const queries = [ - 'DROP TABLE IF EXISTS Presidents', - 'CREATE TABLE Presidents (Name TEXT, Idx REAL)', - 'INSERT INTO Presidents (Name, Idx) VALUES ("Barack Obama", 44)', - 'INSERT INTO Presidents (Name, Idx) VALUES ("Donald Trump", 45)', - 'INSERT INTO Presidents (Name, Idx) VALUES ("Joseph Biden", 46)' - ]; - const xport = React.useCallback(async() => { - // prep database - const db = openDatabase('sheetql', '1.0', 'SheetJS WebSQL Test', 2097152); - - for(var i = 0; i < queries.length; ++i) await new Promise((res, rej) => { - db.transaction((tx) => { - tx.executeSql(queries[i], [], (tx, data) => res(data), (tx, err) => rej(err)); - }); - }); - - // pull data and generate rows - db.readTransaction(tx => { - tx.executeSql("SELECT * FROM Presidents", [], (tx, data) => { - const aoo = Array.from(data.rows); - setOut("QUERY RESULT:\n" + aoo.map(r => JSON.stringify(r)).join("\n") + "\n") - const ws = XLSX.utils.json_to_sheet(aoo); - const wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(wb, ws, "Presidents"); - XLSX.writeFile(wb, "SheetQL.xlsx"); - }); - }); - }); - return (
{out}
); -} -``` - -### LocalStorage and SessionStorage - -The Storage API, encompassing `localStorage` and `sessionStorage`, describes -simple key-value stores that only support string values and keys. - -Arrays of objects can be stored using `JSON.stringify` using row index as key: - -```js -const aoo = XLSX.utils.sheet_to_json(ws); -for(var i = 0; i < aoo.length; ++i) localStorage.setItem(i, JSON.stringify(aoo[i])); -``` - -Recovering the array of objects is possible by using `JSON.parse`: - -```js -const aoo = []; -for(var i = 0; i < localStorage.length; ++i) aoo.push(JSON.parse(localStorage.getItem(i))); -const ws = XLSX.utils.json_to_sheet(aoo); -``` - -This example will fetch , fill `localStorage` with -rows, then generate a worksheet from the rows and write to a new file. - -:::caution - -This example is for illustration purposes. If array of objects is available, it -is strongly recommended to convert that array to a worksheet directly. - -::: - -```jsx live -function SheetJStorage() { - const [url, setUrl] = React.useState("https://sheetjs.com/data/cd.xls"); - const set_url = React.useCallback((evt) => setUrl(evt.target.value)); - const [out, setOut] = React.useState(""); - const xport = React.useCallback(async() => { - // get first worksheet data as array of objects - const ab = await (await fetch(url)).arrayBuffer(); - const wb = XLSX.read(ab), wsname = wb.SheetNames[0]; - const aoo = XLSX.utils.sheet_to_json(wb.Sheets[wsname]); - - // reset and populate localStorage - localStorage.clear(); - for(var i = 0; i < aoo.length; ++i) localStorage.setItem(i, JSON.stringify(aoo[i])); - - // create new array of objects from localStorage - const new_aoo = []; - for(var i = 0; i < localStorage.length; ++i) { - const row = JSON.parse(localStorage.getItem(i)); - new_aoo.push(row); - } - - setOut(`Number of rows in LocalStorage: ${localStorage.length}`); - - // create and export workbook - const new_ws = XLSX.utils.json_to_sheet(new_aoo); - const new_wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(new_wb, new_ws, "Sheet1"); - XLSX.writeFile(new_wb, "SheetJStorage.xlsx"); - }); - - return ( <> {out && (<>{url}
{out}
)} - URL: -
- ); -} -``` - - -### IndexedDB - -`localForage` is a IndexedDB wrapper that presents an async Storage interface. - -Arrays of objects can be stored using `JSON.stringify` using row index as key: - -```js -const aoo = XLSX.utils.sheet_to_json(ws); -for(var i = 0; i < aoo.length; ++i) await localForage.setItem(i, JSON.stringify(aoo[i])); -``` - -Recovering the array of objects is possible by using `JSON.parse`: - -```js -const aoo = []; -for(var i = 0; i < localForage.length; ++i) aoo.push(JSON.parse(await localForage.getItem(i))); -const wb = XLSX.utils.json_to_sheet(aoo); -``` - -### Other SQL Databases - -The `generate_sql` function from ["Building Schemas from Worksheets"](#building-schemas-from-worksheets) -can be adapted to generate SQL statements for a variety of databases, including: - -**PostgreSQL** - -The `pg` connector library was tested against the `generate_sql` output as-is. - -The `rows` property of a query result is an array of objects that plays nice -with `json_to_sheet`: - -```js -const aoa = await connection.query(`SELECT * FROM DataTable`).rows; -const worksheet = XLSX.utils.json_to_sheet(aoa); -``` - -**MySQL / MariaDB** - -The `mysql2` connector library was tested. The differences are shown below, -primarily stemming from the different quoting requirements and field types. - -
Differences (click to show) - -```js -// highlight-start -// define mapping between determined types and MySQL types -const PG = { "n": "REAL", "s": "TEXT", "b": "TINYINT" }; -// highlight-end - -function generate_sql(ws, wsname) { - - // generate an array of objects from the data - const aoo = XLSX.utils.sheet_to_json(ws); - - // types will map column headers to types, while hdr holds headers in order - const types = {}, hdr = []; - - // loop across each row object - aoo.forEach(row => - // Object.entries returns a row of [key, value] pairs. Loop across those - Object.entries(row).forEach(([k,v]) => { - - // If this is first time seeing key, mark unknown and append header array - if(!types[k]) { types[k] = "?"; hdr.push(k); } - - // skip null and undefined - if(v == null) return; - - // check and resolve type - switch(typeof v) { - case "string": // strings are the broadest type - types[k] = "s"; break; - case "number": // if column is not string, number is the broadest type - if(types[k] != "s") types[k] = "n"; break; - case "boolean": // only mark boolean if column is unknown or boolean - if("?b".includes(types[k])) types[k] = "b"; break; - default: types[k] = "s"; break; // default to string type - } - }) - ); - - // The final array consists of the CREATE TABLE query and a series of INSERTs - return [ - // generate CREATE TABLE query and return batch - // highlight-next-line - `CREATE TABLE ${wsname} (${hdr.map(h => - // highlight-next-line - `${h} ${PG[types[h]]}` - ).join(", ")});` - ].concat(aoo.map(row => { // generate INSERT query for each row - // entries will be an array of [key, value] pairs for the data in the row - const entries = Object.entries(row); - // fields will hold the column names and values will hold the values - const fields = [], values = []; - // check each key/value pair in the row - entries.forEach(([k,v]) => { - // skip null / undefined - if(v == null) return; - // highlight-next-line - fields.push(`${k}`); - // when the field type is numeric, `true` -> 1 and `false` -> 0 - if(types[k] == "n") values.push(typeof v == "boolean" ? (v ? 1 : 0) : v); - // otherwise, - // highlight-next-line - else values.push(`"${v.toString().replaceAll('"', '""')}"`); - }) - if(fields.length) return `INSERT INTO \`${wsname}\` (${fields.join(", ")}) VALUES (${values.join(", ")})`; - })).filter(x => x); // filter out skipped rows -} -``` - -
- -The first property of a query result is an array of objects that plays nice -with `json_to_sheet`: - -```js -const aoa = await connection.query(`SELECT * FROM DataTable`)[0]; -const worksheet = XLSX.utils.json_to_sheet(aoa); -``` - - -### Query Builders - -Query builders are designed to simplify query generation and normalize field -types and other database minutiae. - -**Knex** - -The result of a `SELECT` statement is an array of objects: - -```js -const aoo = await connection.select("*").from("DataTable"); -const worksheet = XLSX.utils.json_to_sheet(aoa); -``` - -Knex wraps primitive types when creating a table. `generate_sql` takes a `knex` -connection object and uses the API: - -
Generating a Table (click to show) - -```js -// define mapping between determined types and Knex types -const PG = { "n": "float", "s": "text", "b": "boolean" }; - -async function generate_sql(knex, ws, wsname) { - - // generate an array of objects from the data - const aoo = XLSX.utils.sheet_to_json(ws); - - // types will map column headers to types, while hdr holds headers in order - const types = {}, hdr = []; - - // loop across each row object - aoo.forEach(row => - // Object.entries returns a row of [key, value] pairs. Loop across those - Object.entries(row).forEach(([k,v]) => { - - // If this is first time seeing key, mark unknown and append header array - if(!types[k]) { types[k] = "?"; hdr.push(k); } - - // skip null and undefined - if(v == null) return; - - // check and resolve type - switch(typeof v) { - case "string": // strings are the broadest type - types[k] = "s"; break; - case "number": // if column is not string, number is the broadest type - if(types[k] != "s") types[k] = "n"; break; - case "boolean": // only mark boolean if column is unknown or boolean - if("?b".includes(types[k])) types[k] = "b"; break; - default: types[k] = "s"; break; // default to string type - } - }) - ); - - await knex.schema.dropTableIfExists(wsname); - await knex.schema.createTable(wsname, (table) => { hdr.forEach(h => { table[PG[types[h]] || "text"](h); }); }); - for(let i = 0; i < aoo.length; ++i) { - if(!aoo[i] || !Object.keys(aoo[i]).length) continue; - try { await knex.insert(aoo[i]).into(wsname); } catch(e) {} - } - return knex; -} -``` - -
- - -### MongoDB Structured Collections - -MongoDB is a popular document-oriented database engine. - -It is straightforward to treat collections as worksheets. Each object maps to -a row in the table. - -The official NodeJS connector is **`mongodb`**. - -Worksheets can be generated from collections by using `Collection#find`. A -`projection` can suppress the object ID field: - -```js -/* generate a worksheet from a collection */ -const aoo = await collection.find({}, {projection:{_id:0}}).toArray(); -const ws = utils.json_to_sheet(aoo); -``` - -Collections can be populated with data from a worksheet using `insertMany`: - -```js -/* import data from a worksheet to a collection */ -const aoo = XLSX.utils.sheet_to_json(ws); -await collection.insertMany(aoo, {ordered: true}); -``` - -
Complete Example (click to show) - -:::caution - -When this demo was last tested, the `mongodb` module did not work with Node 18. -It was verified in Node 16.16.0. - -::: - -1) Install the dependencies: - -```bash -npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz mongodb -``` - -2) Start a MongoDB server on `localhost` (follow official instructions) - -3) Save the following to `SheetJSMongoCRUD.mjs` (the key step is highlighted): - -```js title="SheetJSMongoCRUD.mjs" -import { writeFile, set_fs, utils } from 'xlsx'; -import * as fs from 'fs'; set_fs(fs); -import { MongoClient } from 'mongodb'; - -const url = 'mongodb://localhost:27017/sheetjs'; -const db_name = 'sheetjs'; - -(async() => { -/* Connect to mongodb server */ -const client = await MongoClient.connect(url, { useUnifiedTopology: true }); - -/* Sample data table */ -const db = client.db(db_name); -try { await db.collection('pres').drop(); } catch(e) {} -const pres = db.collection('pres'); -await pres.insertMany([ - { name: "Barack Obama", idx: 44 }, - { name: "Donald Trump", idx: 45 }, - { name: "Joseph Biden", idx: 46 } -], {ordered: true}); - -// highlight-start -/* Export database to XLSX */ -const wb = utils.book_new(); -const aoo = await pres.find({}, {projection:{_id:0}}).toArray(); -const ws = utils.json_to_sheet(aoo); -utils.book_append_sheet(wb, ws, "Presidents"); -writeFile(wb, "SheetJSMongoCRUD.xlsx"); -// highlight-end - -/* Close connection */ -client.close(); -})(); -``` - -4) Run `node SheetJSMongoCRUD.mjs` and open `SheetJSMongoCRUD.xlsx` - -
- - diff --git a/docz/docs/03-demos/07-worker.md b/docz/docs/03-demos/07-worker.md index a68b9a8..3b1c4e1 100644 --- a/docz/docs/03-demos/07-worker.md +++ b/docz/docs/03-demos/07-worker.md @@ -1,5 +1,6 @@ --- title: Web Workers +pagination_prev: demos/data/index --- Parsing and writing large spreadsheets takes time. During the process, if the diff --git a/docz/docs/03-demos/11-static/10-astro.md b/docz/docs/03-demos/11-static/10-astro.md index 38f5b00..eaa4ca3 100644 --- a/docz/docs/03-demos/11-static/10-astro.md +++ b/docz/docs/03-demos/11-static/10-astro.md @@ -203,8 +203,16 @@ declare module '*.xlsx' { 6) Add the highlighted lines to `astro.config.mjs`: ```js title="astro.config.mjs" +// highlight-start +/* import `readFileSync` at the top of the script*/ +import { readFileSync } from 'fs'; +// highlight-end +import { defineConfig } from 'astro/config'; + + export default defineConfig({ // highlight-start + /* this vite section should be added as a property of the object */ vite: { // this tells astro which extensions to handle assetsInclude: ['**/*.numbers', '**/*.xlsx'], diff --git a/docz/docs/03-demos/41-nosql.md b/docz/docs/03-demos/41-nosql.md deleted file mode 100644 index ba7c51b..0000000 --- a/docz/docs/03-demos/41-nosql.md +++ /dev/null @@ -1,313 +0,0 @@ ---- -title: NoSQL Data Stores ---- - -import Tabs from '@theme/Tabs'; -import TabItem from '@theme/TabItem'; - -So-called "Schema-less" databases allow for arbitrary keys and values within the -entries in the database. K/V stores and Objects add additional restrictions. - -:::note - -These data stores are capable of storing structured data. Those use cases are -covered in the [Database demo](/docs/demos/database). - -::: - -## Arbitrary Data to Spreadsheets - -There is no natural way to translate arbitrarily shaped schemas to worksheets -in a workbook. One common trick is to dedicate one worksheet to holding named -keys. For example, considering the JS object: - -```json -{ - "title": "SheetDB", - "metadata": { - "author": "SheetJS", - "code": 7262 - }, - "data": [ - { "Name": "Barack Obama", "Index": 44 }, - { "Name": "Donald Trump", "Index": 45 }, - ] -} -``` - -A dedicated worksheet should store the one-off named values: - -``` -XXX| A | B | ----+-----------------+---------+ - 1 | Path | Value | - 2 | title | SheetDB | - 3 | metadata.author | SheetJS | - 4 | metadata.code | 7262 | -``` - -## Data Stores - -### Redis - -Redis has 5 core data types: "String", List", "Set", "Sorted Set", and "Hash". -Since the keys and values are limited to simple strings (and numbers), it is -possible to store complete databases in a single worksheet. - -![SheetJSRedis.xlsx](pathname:///nosql/sheetjsredis.png) - -#### Mapping - -The first row holds the data type and the second row holds the property name. - - - - -Strings can be stored in a unified String table. The first column holds keys -and the second column holds values: - -``` -XXX| A | B | ----+---------+-------+ - 1 | Strings | | - 2 | | | - 3 | Hello | World | - 4 | Sheet | JS | -``` - -The SheetJS array-of-arrays representation of the string table is an array of -key/value pairs: - -```js -let aoa = ["Strings"]; aoa.length = 2; // [ "Strings", empty ] -const keys = await client.KEYS("*"); -for(let key of keys) { - const type = await client.TYPE(key); - if(type == "string") aoa.push([key, await client.GET(key)]); -} -``` - - - - -Lists are unidimensional and can be stored in their own columns. - -``` -XXX| C | ----+---------+ - 1 | List | - 2 | List1 | - 3 | List1V1 | - 4 | List1V2 | -``` - -The SheetJS array-of-arrays representation of lists is a column of values. - -```js -if(type == "list") { - let values = await client.LRANGE(key, 0, -1); - aoa = [ ["List"], [key] ].concat(values.map(v => [v])); -} -``` - - - - -Sets are unidimensional and can be stored in their own columns. - -``` -XXX| D | ----+-------+ - 1 | Set | - 2 | Set1 | - 3 | Set1A | - 4 | Set1B | -``` - -The SheetJS array-of-arrays representation of sets is a column of values. - -```js -if(type == "set") { - let values = await client.SMEMBERS(key); - aoa = [ ["Set"], [key] ].concat(values.map(v => [v])); -} -``` - - - - -Sorted Sets have an associated score which can be stored in the second column. - -``` -XXX| E | F | ----+---------+---+ - 1 | Sorted | | - 2 | ZSet1 | | - 3 | Key1 | 1 | - 4 | Key2 | 2 | -``` - -The SheetJS array-of-arrays representation is an array of key/score pairs. - -```js -if(type == "zset") { - let values = await client.ZRANGE_WITHSCORES(key, 0, -1); - aoa = [ ["Sorted"], [key] ].concat(values.map(v => [v.value, v.score])); -} -``` - - - - -Hashes are stored like the string table, with key and value columns in order. - -``` -XXX| G | H | ----+-------+-------+ - 1 | Hash | | - 2 | Hash1 | | - 3 | Key1 | Val1 | - 4 | Key2 | Val2 | -``` - -The SheetJS array-of-arrays representation is an array of key/value pairs. - -```js -if(type == "hash") { - let values = await client.HGETALL(key); - aoa = [ ["Hash"], [key] ].concat(Object.entries(values)); -} -``` - - - - -#### Example - -:::warning - -The most recent version of the `redis` node module does not work with most -versions of NodeJS. It is "ESM-only", requiring NodeJS 18 or later. As a result, -this demo also requires NodeJS version 18. - -Questions regarding the `redis` library and the decision to drop traditional -NodeJS "CommonJS" module support should be directed to the Redis team. - -::: - -0) Set up and start a local Redis server - -1) Download the following scripts: - -- [`SheetJSRedis.mjs`](pathname:///nosql/SheetJSRedis.mjs) -- [`SheetJSRedisTest.mjs`](pathname:///nosql/SheetJSRedisTest.mjs) - -2) Install dependencies and run: - -```bash -npm i --save https://cdn.sheetjs.com/xlsx-latest/xlsx-latest.tgz redis -node SheetJSRedisTest.mjs -``` - -Inspect the output and compare with the data in `SheetJSRedisTest.mjs`. - -Open `SheetJSRedis.xlsx` and verify the columns have the correct data - - -### PouchDB - -`Database#allDocs` is the standard approach for bulk data export. The generated -row objects have additional `_id` and `_rev` keys that should be removed. - -Nested objects must be flattened. The ["Tutorial"](/docs/getting-started/example) -includes an example of constructing a simple array. - -```js -function export_pouchdb_to_xlsx(db) { - /* fetch all rows, including the underlying data */ - db.allDocs({include_docs: true}, function(err, doc) { - - /* pull the individual data rows */ - const aoo = doc.rows.map(r => { - /* `rest` will include every field from `r` except for _id and _rev */ - const { _id, _rev, ...rest } = r; - return rest; - }); - - /* generate worksheet */ - const ws = XLSX.utils.json_to_sheet(aoo); - - /* generate workbook and export */ - const wb = XLSX.utils.book_new(); - XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); - XLSX.writeFile(wb, "SheetJSPouch.xlsx"); - }); -} -``` - -
Complete Example (click to show) - -0) Download the "Working Version" from the Getting Started guide. - -The ZIP file should have `MD5` checksum `ac4da7cb0cade1be293ba222462f109c`: - -```bash -curl -LO https://github.com/nickcolley/getting-started-todo/archive/master.zip -md5sum master.zip || md5 master.zip -### the checksum will be printed -``` - -If the download is unavailable, a mirror is available at - - -1) Unzip the `master.zip` file and enter the folder: - -```bash -unzip master.zip -cd getting-started-todo-master -``` - -2) Edit `index.html` to reference the SheetJS library and add a button: - -```html title="index.html" - - - - - -
-``` - -3) Just before the end of `app.js`, add a `click` event listener: - -```js title="app.js" - if (remoteCouch) { - sync(); - } - - // highlight-start - document.getElementById("xport").addEventListener("click", function() { - db.allDocs({include_docs: true}, function(err, doc) { - const aoo = doc.rows.map(r => { - const { _id, _rev, ... rest } = r.doc; - return rest; - }); - const ws = XLSX.utils.json_to_sheet(aoo); - const wb = XLSX.utils.book_new(); XLSX.utils.book_append_sheet(wb, ws, "Sheet1"); - XLSX.writeFile(wb, "SheetJSPouch.xlsx"); - }); - }); - // highlight-end -})(); -``` - -4) Start a local web server: - -```bash -npx http-server . -``` - -Access `http://localhost:8080` from your browser. Add a few items and click -the "Export!" button to generate a new file. - -
diff --git a/docz/docs/03-demos/index.md b/docz/docs/03-demos/index.md index 0d3b287..9cba2e2 100644 --- a/docz/docs/03-demos/index.md +++ b/docz/docs/03-demos/index.md @@ -16,9 +16,9 @@ run in the web browser, demos will include interactive examples. - [`Web Workers`](/docs/demos/worker) - [`Typed Arrays for Machine Learning`](/docs/demos/ml) - [`Local File Access`](/docs/demos/localfile) -- [`LocalStorage and SessionStorage`](/docs/demos/database#localstorage-and-sessionstorage) -- [`Web SQL Database`](/docs/demos/database#websql) -- [`IndexedDB`](/docs/demos/database#indexeddb) +- [`LocalStorage and SessionStorage`](/docs/demos/data/storageapi) +- [`Web SQL Database`](/docs/demos/data/websql) +- [`IndexedDB`](/docs/demos/data/indexeddb) ### Web Frameworks @@ -94,8 +94,8 @@ run in the web browser, demos will include interactive examples. - [`NodeJS Server-Side Processing`](/docs/demos/server#nodejs) - [`Deno Server-Side Processing`](/docs/demos/server#deno) - [`Headless Automation`](/docs/demos/headless) -- [`Databases and Structured Data Stores`](/docs/demos/database) -- [`NoSQL and Unstructured Data Stores`](/docs/demos/nosql) +- [`Databases and Structured Data Stores`](/docs/demos/data) +- [`NoSQL and Unstructured Data Stores`](/docs/demos/data) - [`Legacy Internet Explorer`](/docs/demos/frontend/legacy#internet-explorer) ### Bundlers and Tooling diff --git a/docz/docs/06-solutions/01-input.md b/docz/docs/06-solutions/01-input.md index 41767a2..fec7199 100644 --- a/docz/docs/06-solutions/01-input.md +++ b/docz/docs/06-solutions/01-input.md @@ -733,7 +733,7 @@ creating worksheets from ML library exports (datasets stored in Typed Arrays).
Records from a database query (SQL or no-SQL) (click to show) -The [`database` demo](/docs/demos/database/) includes examples of working with +The [`data` demo](/docs/demos/data/) includes examples of working with databases and query results.
diff --git a/docz/docs/06-solutions/05-output.md b/docz/docs/06-solutions/05-output.md index 36e9745..a4966f4 100644 --- a/docz/docs/06-solutions/05-output.md +++ b/docz/docs/06-solutions/05-output.md @@ -655,7 +655,7 @@ generating typed arrays and tensors from worksheet data.
Populating a database (SQL or no-SQL) (click to show) -The [`database` demo](/docs/demos/database/) includes examples of working with databases and query results. +The [`data` demo](/docs/demos/data/) includes examples of working with databases and query results.
diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js index aacb4e6..e07ffd0 100644 --- a/docz/docusaurus.config.js +++ b/docz/docusaurus.config.js @@ -198,6 +198,9 @@ const config = { /* hosting */ { from: '/docs/demos/hosting/dropbox', to: '/docs/demos/cloud/dropbox/' }, { from: '/docs/demos/hosting/github', to: '/docs/demos/cloud/github/' }, + /* data */ + { from: '/docs/demos/nosql', to: '/docs/demos/data/' }, + { from: '/docs/demos/database', to: '/docs/demos/data/' }, ] }] ]