From 5e3369edda5e1568c82e10faa2a9e613f051bc1a Mon Sep 17 00:00:00 2001 From: SheetJS Date: Mon, 30 Oct 2023 19:28:40 -0400 Subject: [PATCH] postgresql --- docz/docs/03-demos/07-data/10-sql.md | 10 +- docz/docs/03-demos/07-data/16-postgresql.md | 411 ++++++++++++++++++ docz/docs/08-api/index.md | 10 + .../01-ssf.md | 19 +- docz/docs/12-constellation/11-crc32.md | 16 + .../_category_.json | 2 +- .../index.md | 0 docz/static/postgresql/SheetJSPG.js | 114 +++++ 8 files changed, 566 insertions(+), 16 deletions(-) create mode 100644 docz/docs/03-demos/07-data/16-postgresql.md rename docz/docs/{09-constellation => 12-constellation}/01-ssf.md (71%) create mode 100644 docz/docs/12-constellation/11-crc32.md rename docz/docs/{09-constellation => 12-constellation}/_category_.json (62%) rename docz/docs/{09-constellation => 12-constellation}/index.md (100%) create mode 100644 docz/static/postgresql/SheetJSPG.js diff --git a/docz/docs/03-demos/07-data/10-sql.md b/docz/docs/03-demos/07-data/10-sql.md index 3137175..a8ee416 100644 --- a/docz/docs/03-demos/07-data/10-sql.md +++ b/docz/docs/03-demos/07-data/10-sql.md @@ -152,15 +152,7 @@ can be adapted to generate SQL statements for a variety of databases, including: **PostgreSQL** -The `pg` connector library was tested against the `generate_sql` output as-is. - -The `rows` property of a query result is an array of objects that plays nice -with `json_to_sheet`: - -```js -const aoa = await connection.query(`SELECT * FROM DataTable`).rows; -const worksheet = XLSX.utils.json_to_sheet(aoa); -``` +**[The exposition has been moved to a separate page.](/docs/demos/data/postgresql)** **MySQL / MariaDB** diff --git a/docz/docs/03-demos/07-data/16-postgresql.md b/docz/docs/03-demos/07-data/16-postgresql.md new file mode 100644 index 0000000..8bc8eee --- /dev/null +++ b/docz/docs/03-demos/07-data/16-postgresql.md @@ -0,0 +1,411 @@ +--- +title: Sheets with PostgreSQL +sidebar_label: PostgreSQL +pagination_prev: demos/desktop/index +pagination_next: demos/local/index +sidebar_custom_props: + sql: true +--- + +import current from '/version.js'; +import CodeBlock from '@theme/CodeBlock'; + +[PostgreSQL](https://postgresql.org/) (colloquially referenced as "Postgres") is +an open source object-relational database system. + +[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing +data from spreadsheets. + +This demo uses SheetJS to exchange data between spreadsheets and PostgreSQL +databases. We'll explore how to use save tables from a database to spreadsheets +and how to add data from spreadsheets into a database. + +:::caution pass + +**It is strongly recommended to use PostgreSQL with a query builder or ORM.** + +While it is possible to generate SQL statements directly, there are many subtle +details and pitfalls. Battle-tested solutions generally provide mitigations +against SQL injection and other vulnerabilities. + +::: + +:::note + +This demo was last tested on 2023 October 30 with PostgreSQL 16.0.1. The demo +uses `pg` connector module version 8.11.3. + +::: + +## Integration Details + +The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be +loaded in NodeJS scripts that connect to PostgreSQL databases. + +This demo uses the `pg` connector module[^1], but the same mechanics apply to +other PostgreSQL libraries. + +### Exporting Data + +`Client#query` returns a Promise that resolves to a result set. The `rows` +property of the result is an array of objects. + +The SheetJS `json_to_sheet` method[^2] can generate a worksheet object[^3] from +the array of objects: + +```js +const table_name = "Tabeller1"; // name of table + +/* fetch all data from specified table */ +const res = await client.query(`SELECT * FROM ${table_name}`); + +/* generate a SheetJS worksheet object from the data */ +const worksheet = XLSX.utils.json_to_sheet(res.rows); +``` + +A workbook object can be built from the worksheet using utility functions[^4]. +The workbook can be exported using the SheetJS `writeFile` method[^5]: + +```js +/* create a new workbook and add the worksheet */ +const wb = XLSX.utils.book_new(); +XLSX.utils.book_append_sheet(wb, worksheet, "Sheet1"); + +/* export workbook to XLSX */ +XLSX.writeFile(wb, "SheetJSPGExport.xlsx"); +``` + +### Importing Data + +The SheetJS `sheet_to_json` function[^6] takes a worksheet object and generates +an array of objects. + +Queries must be manually generated from the objects. Assuming the field names +in the object match the column headers, a loop can generate `INSERT` queries. + +:::warning pass + +**PostgreSQL does not allow parameterized queries with variable column names** + +```sql +INSERT INTO table_name (?) VALUES (?); +-- ---------------------^ variable column names are not valid +``` + +Queries are generated manually. To help prevent SQL injection vulnerabilities, +the `pg-format`[^7] module escapes identifiers and fields. + +::: + +```js +/* generate an array of arrays from the worksheet */ +const aoo = XLSX.utils.sheet_to_json(ws); + +const table_name = "Blatte1"; // name of table + +/* loop through the data rows */ +for(let row of aoo) { + + /* generate format helper strings */ + const ent = Object.entries(row); + const Istr = Array.from({length: entries.length}, ()=>"%I").join(", "); + const Lstr = Array.from({length: entries.length}, ()=>"%L").join(", "); + + /* generate INSERT statement */ + let query = format.withArray( + `INSERT INTO %I (${Istr}) VALUES(${Lstr})`, + [ table_name, ...ent.map(x => x[0]), ...ent.map(x => x[1]) ] + ); + + /* execute INSERT statement */ + await client.query(query); +} +``` + +### Creating a Table + +The array of objects can be scanned to determine column names and types. With +the names and types, a `CREATE TABLE` query can be written. + +
Implementation Details (click to show) + +The `aoo_to_pg_table` function: + +- scans each row object to determine column names and types +- drops and creates a new table with the determined column names and types +- loads the entire dataset into the new table + +```js +/* create table and load data given an array of objects and a PostgreSQL client */ +async function aoo_to_pg_table(client, aoo, table_name) { + /* define types that can be converted (e.g. boolean can be stored in float) */ + const T_FLOAT = ["float8", "boolean"]; + const T_BOOL = ["boolean"]; + + /* types is a map from column headers to Knex schema column type */ + const types = {}; + + /* names is an ordered list of the column header names */ + const names = []; + + /* loop across each row object */ + aoo.forEach(row => + /* Object.entries returns a row of [key, value] pairs */ + Object.entries(row).forEach(([k,v]) => { + + /* If this is first occurrence, mark unknown and append header to names */ + if(!types[k]) { types[k] = ""; names.push(k); } + + /* skip null and undefined values */ + if(v == null) return; + + /* check and resolve type */ + switch(typeof v) { + /* change type if it is empty or can be stored in a float */ + case "number": if(!types[k] || T_FLOAT.includes(types[k])) types[k] = "float8"; break; + /* change type if it is empty or can be stored in a boolean */ + case "boolean": if(!types[k] || T_BOOL.includes(types[k])) types[k] = "boolean"; break; + /* no other type can hold strings */ + case "string": types[k] = "text"; break; + default: types[k] = "text"; break; + } + }) + ); + + /* Delete table if it exists in the DB */ + const query = format("DROP TABLE IF EXISTS %I;", table_name); + await client.query(query); + + /* Create table */ + { + const entries = Object.entries(types); + const Istr = entries.map(e => format(`%I ${e[1]}`, e[0])).join(", "); + let query = format.withArray(`CREATE TABLE %I (${Istr});`, [ table_name ]); + await client.query(query); + } + + /* Insert each row */ + for(let row of aoo) { + const ent = Object.entries(row); + const Istr = Array.from({length: ent.length}, ()=>"%I").join(", "); + const Lstr = Array.from({length: ent.length}, ()=>"%L").join(", "); + let query = format.withArray( + `INSERT INTO %I (${Istr}) VALUES (${Lstr});`, + [ table_name, ...ent.map(x => x[0]), ...ent.map(x => x[1]) ] + ); + await client.query(query); + } + + return client; +} +``` + +
+ + +## Complete Example + +0) Install and start the PostgreSQL server. + +
Installation Notes (click to show) + +On macOS, install the `postgresql` formula with Homebrew: + +```bash +brew install postgresql@16 +``` + +The last few lines of the installer explains how to start the database: + +```text +Or, if you don't want/need a background service you can just run: +// highlight-next-line + LC_ALL="C" /usr/local/opt/postgresql@16/bin/postgres -D /usr/local/var/postgresql@16 +``` + +Run the command to start a local database instance. + +
+ +1) Drop any existing database with the name `SheetJSPG`: + +```bash +dropdb SheetJSPG +``` + +:::info pass + +If the server is running elsewhere, or if the username is different from the +current user, command-line flags can override the defaults. + +| Option | Explanation +|:--------------|:--------------------------| +| `-h HOSTNAME` | Name of the server | +| `-p PORT` | specifies the port number | +| `-U USERNAME` | specifies the username | + +::: + +2) Create an empty `SheetJSPG` database using the `createdb` command: + +```bash +createdb SheetJSPG +``` + +:::note pass + +`createdb` supports the same `-h`, `-p`, and `-U` flags as `dropdb`. + +::: + +### Connector Test + +3) Create a project folder: + +```bash +mkdir sheetjs-pg +cd sheetjs-pg +npm init -y +``` + +4) Install the `pg` connector module: + +```bash +npm i --save pg@8.11.3 +``` + +5) Save the following example codeblock to `PGTest.js`: + +```js title="PGTest.js" +const pg = require("pg"); +const client = new pg.Client({ + database:"SheetJSPG", +// highlight-start + host: "127.0.0.1", // localhost + port: 5432, + //user: "", + //password: "" +// highlight-end +}); +(async() => { + +await client.connect(); +const res = await client.query('SELECT $1::text as message', ['Hello world!']); +console.log(res.rows[0].message); // Hello world! +await client.end(); + +})(); +``` + +6) Edit the new `PGTest.js` script and modify the highlighted lines from the +codeblock to reflect the database deployment settings. + +The settings in the codeblock match the default configuration for macOS Homebrew +PostgreSQL server. For other deployments: + +- If the server is not running on your computer, set `host` and `port` to the +correct host name and port number. + +- If the server expects a different username and password, uncomment the `user` +and `password` lines and replace the values with the username and password. + +7) Run the script: + +```bash +node PGTest.js +``` + +It should print `Hello world!` + +:::caution pass + +If the output is not `Hello world!` or if there is an error, please report the +issue to the `pg` connector project for further diagnosis[^8] + +::: + +### Add SheetJS + +8) Install dependencies: + +{`\ +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz pg-format@1.0.4`} + + +9) Download [`SheetJSPG.js`](pathname:///postgresql/SheetJSPG.js): + +```bash +curl -LO https://docs.sheetjs.com/postgresql/SheetJSPG.js +``` + +This script will: +- read and parse the test file `pres.numbers` +- create a connection to the `SheetJSPG` database on a local PostgreSQL server +- load data from the first worksheet into a table with name `Presidents` +- disconnect and reconnect to the database +- dump data from the table `Presidents` +- export the dataset to `SheetJSPG.xlsx` + +10) Edit the `SheetJSPG.js` script. + +The script defines an `opts` object: + +```js title="SheetJSPG.js (configuration lines)" +const XLSX = require("xlsx"); +const opts = { + database:"SheetJSPG", +// highlight-start + host: "127.0.0.1", // localhost + port: 5432, + //user: "", + //password: "" +// highlight-end +}; +``` + +The settings in the codeblock match the default configuration for macOS Homebrew +PostgreSQL server. For other deployments: + +- If the server is not running on your computer, set `host` and `port` to the +correct host name and port number. + +- If the server expects a different username and password, uncomment the `user` +and `password` lines and replace the values with the username and password. + +11) Run the script: + +```bash +node SheetJSPG.js +``` + +12) Verify the result: + +- `SheetJSPG.xlsx` can be opened in a spreadsheet app or tested in the terminal + +```bash +npx xlsx-cli SheetJSPG.xlsx +``` + +- The database server can be queried using the `psql` command line tool. + +If the server is running locally, the command will be: + +```bash +psql SheetJSPG -c 'SELECT * FROM "Presidents";' +``` + +:::note pass + +`psql` supports the same `-h`, `-p`, and `-U` flags as `dropdb` and `createdb`. + +::: + + +[^1]: See [the official `pg` website](https://node-postgres.com/) for more info. +[^2]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input) +[^3]: See ["Sheet Objects"](/docs/csf/sheet) in "SheetJS Data Model" for more details. +[^4]: See ["Workbook Helpers" in "Utilities"](/docs/api/utilities/wb) for details on `book_new` and `book_append_sheet`. +[^5]: See [`writeFile` in "Writing Files"](/docs/api/write-options) +[^6]: See [`sheet_to_json` in "Utilities"](/docs/api/utilities/array#array-output) +[^7]: The [`pg-format`](https://npm.im/pg-format) package is available on the public NPM registry. Even though the project is marked as deprecated, the official [`pg` website still recommends `pg-format`](https://node-postgres.com/features/queries#parameterized-query:~:text=use%20pg%2Dformat%20package%20for%20handling%20escaping) +[^8]: The official [issue tracker](https://github.com/brianc/node-postgres/issues) is hosted on GitHub \ No newline at end of file diff --git a/docz/docs/08-api/index.md b/docz/docs/08-api/index.md index bc7c832..304ccc9 100644 --- a/docz/docs/08-api/index.md +++ b/docz/docs/08-api/index.md @@ -8,6 +8,16 @@ import current from '/version.js'; This section lists the functions defined in the library. +:::info pass + +**The ["SheetJS Data Model"](/docs/csf) section covers spreadsheet features.** + +The API functions primarily focus on conversions between data representations. + +::: + +## Library access + Using the ["Standalone" scripts](/docs/getting-started/installation/standalone), `XLSX` is added to the `window` or other `global` object. diff --git a/docz/docs/09-constellation/01-ssf.md b/docz/docs/12-constellation/01-ssf.md similarity index 71% rename from docz/docs/09-constellation/01-ssf.md rename to docz/docs/12-constellation/01-ssf.md index f64697f..8c51ab1 100644 --- a/docz/docs/09-constellation/01-ssf.md +++ b/docz/docs/12-constellation/01-ssf.md @@ -17,7 +17,7 @@ and `sheet_to_json`[^4]. The library is also available for standalone use on the SheetJS CDN[^5]. -The source code and project documentation is hosted on the SheetJS git server at +Source code and project documentation are hosted on the SheetJS git server at ## Live Demo @@ -30,21 +30,28 @@ particular format is not supported. function SheetJSSSF() { const [fmt, setFmt] = React.useState("#,##0"); const [val, setVal] = React.useState(7262); + const [text, setText] = React.useState(""); - const format = (fmt, val) => { try { - return XLSX.SSF.format(fmt, val); - } catch(e) { return "ERROR: " + (e && e.message || e); } }; + React.useEffect(() => { + let v = +val; + if(!isFinite(v)) return setText(`ERROR: ${val} is not a valid number!`); + try { + setText(XLSX.SSF.format(fmt, v)); + } catch(e) { setText("ERROR: " + (e && e.message || e)); } + }, [fmt, val]); + const goodstyle = { backgroundColor: "#C6EFCE", color: "#006100" }; + const badstyle = { backgroundColor: "#FFC7CE", color: "#9C0006" }; return (
Number Format setFmt(e.target.value)}/>
Number Value - setVal(+e.target.value)}/> + setVal(e.target.value)}/>
Formatted Text - {format(fmt, val)} + {text}
); } diff --git a/docz/docs/12-constellation/11-crc32.md b/docz/docs/12-constellation/11-crc32.md new file mode 100644 index 0000000..3ea7453 --- /dev/null +++ b/docz/docs/12-constellation/11-crc32.md @@ -0,0 +1,16 @@ +--- +title: CRC32 Checksum +hide_table_of_contents: true +--- + +The SheetJS `crc-32` library computes standard CRC32 and Castagnoli CRC32C +checksums. It is a core component in ZIP file processing, powering XLSX, XLSB, +ODS, NUMBERS, and other formats. + +The library is also available for standalone use on the SheetJS CDN[^1]. + +Source code and project documentation are hosted on the SheetJS git server at + + + +[^1]: See for more details. \ No newline at end of file diff --git a/docz/docs/09-constellation/_category_.json b/docz/docs/12-constellation/_category_.json similarity index 62% rename from docz/docs/09-constellation/_category_.json rename to docz/docs/12-constellation/_category_.json index d434a61..f8c7867 100644 --- a/docz/docs/09-constellation/_category_.json +++ b/docz/docs/12-constellation/_category_.json @@ -1,4 +1,4 @@ { "label": "Constellation", - "position": 9 + "position": 12 } \ No newline at end of file diff --git a/docz/docs/09-constellation/index.md b/docz/docs/12-constellation/index.md similarity index 100% rename from docz/docs/09-constellation/index.md rename to docz/docs/12-constellation/index.md diff --git a/docz/static/postgresql/SheetJSPG.js b/docz/static/postgresql/SheetJSPG.js new file mode 100644 index 0000000..916115e --- /dev/null +++ b/docz/static/postgresql/SheetJSPG.js @@ -0,0 +1,114 @@ +const pg = require("pg"), format = require("pg-format"); +const XLSX = require("xlsx"); +const opts = { + database:"SheetJSPG", + host: "127.0.0.1", // localhost + port: 5432, + //user: "", + //password: "" +}; + +/* create table and load data given an array of objects and a PostgreSQL client */ +async function aoo_to_pg_table(client, aoo, table_name) { + /* define types that can be converted (e.g. boolean can be stored in float) */ + const T_FLOAT = ["float8", "boolean"]; + const T_BOOL = ["boolean"]; + + /* types is a map from column headers to Knex schema column type */ + const types = {}; + + /* names is an ordered list of the column header names */ + const names = []; + + /* loop across each row object */ + aoo.forEach(row => + /* Object.entries returns a row of [key, value] pairs */ + Object.entries(row).forEach(([k,v]) => { + + /* If this is first occurrence, mark unknown and append header to names */ + if(!types[k]) { types[k] = ""; names.push(k); } + + /* skip null and undefined values */ + if(v == null) return; + + /* check and resolve type */ + switch(typeof v) { + /* change type if it is empty or can be stored in a float */ + case "number": if(!types[k] || T_FLOAT.includes(types[k])) types[k] = "float8"; break; + /* change type if it is empty or can be stored in a boolean */ + case "boolean": if(!types[k] || T_BOOL.includes(types[k])) types[k] = "boolean"; break; + /* no other type can hold strings */ + case "string": types[k] = "text"; break; + default: types[k] = "text"; break; + } + }) + ); + + /* Delete table if it exists in the DB */ + const query = format("DROP TABLE IF EXISTS %I;", table_name); + await client.query(query); + + /* Create table */ + { + const entries = Object.entries(types); + const Istr = entries.map(e => format(`%I ${e[1]}`, e[0])).join(", "); + let query = format.withArray(`CREATE TABLE %I (${Istr});`, [ table_name ]); + await client.query(query); + } + + /* Insert each row */ + for(let row of aoo) { + const ent = Object.entries(row); + const Istr = Array.from({length: ent.length}, ()=>"%I").join(", "); + const Lstr = Array.from({length: ent.length}, ()=>"%L").join(", "); + let query = format.withArray( + `INSERT INTO %I (${Istr}) VALUES (${Lstr});`, + [ table_name, ...ent.map(x => x[0]), ...ent.map(x => x[1]) ] + ); + await client.query(query); + } + + return client; +} + +(async() => { + +/* read file and get first worksheet */ +const oldwb = XLSX.readFile("pres.numbers"); +const oldws = oldwb.Sheets[oldwb.SheetNames[0]]; + +/* import data to postgres */ +let client = new pg.Client(opts); +try { + /* open connection to PostgreSQL database */ + await client.connect(); + + /* generate array of objects from worksheet */ + const aoo = XLSX.utils.sheet_to_json(oldws); + + /* create table and load data */ + await aoo_to_pg_table(client, aoo, "Presidents"); +} finally { + /* disconnect */ + await client.end(); +} + +/* export data to xlsx */ +client = new pg.Client(opts); +try { + /* open connection to PostgreSQL database */ + await client.connect(); + + /* fetch all data from specified table */ + const res = await client.query(format(`SELECT * FROM %I`, "Presidents")); + + /* export to file */ + const newws = XLSX.utils.json_to_sheet(res.rows); + const newwb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(newwb, newws, "Export"); + XLSX.writeFile(newwb, "SheetJSPGExport.xlsx"); +} finally { + /* disconnect */ + await client.end(); +} +})();