From 65479660169753b0b34b580d85176ed070e6ddfd Mon Sep 17 00:00:00 2001 From: Asad Date: Wed, 20 Nov 2024 18:33:20 -0500 Subject: [PATCH] new-script: added SheetJSPG.js demo for doc --- .gitignore | 3 +- SheetJSPG.js | 173 +++++++++++++++++++++++++++++++++++++++++++++++++++ sql-types.js | 46 ++++---------- test.js | 2 +- 4 files changed, 189 insertions(+), 35 deletions(-) create mode 100644 SheetJSPG.js diff --git a/.gitignore b/.gitignore index f4c97ae..7157897 100644 --- a/.gitignore +++ b/.gitignore @@ -3,4 +3,5 @@ node_modules/ .vscode/ .log :.prettierrc - +SheetJSPGExport.xlsx +pres.numbers \ No newline at end of file diff --git a/SheetJSPG.js b/SheetJSPG.js new file mode 100644 index 0000000..81ace3f --- /dev/null +++ b/SheetJSPG.js @@ -0,0 +1,173 @@ +const pg = require("pg"), format = require("pg-format"); +const XLSX = require("xlsx"); +const opts = { + database:"SheetJSPG", + host: "127.0.0.1", // localhost + port: 5432, + user: "postgres", + password: "7509" +}; + +function deduceType(cells) { + if (!cells || cells.length === 0) return 'text'; + + const nonEmptyCells = cells.filter(cell => cell && cell.v != null); + if (nonEmptyCells.length === 0) return 'text'; + + // Check for dates by looking at both cell type and formatted value + const isDateCell = cell => cell?.t === 'd' || (cell?.t === 'n' && cell.w && /\d{4}-\d{2}-\d{2}|\d{1,2}\/\d{1,2}\/\d{4}|\d{2}-[A-Za-z]{3}-\d{4}|[A-Za-z]{3}-\d{2}|\d{1,2}-[A-Za-z]{3}/.test(cell.w)); + + if (nonEmptyCells.some(isDateCell)) { return 'date'; } + + // Check for booleans + const allBooleans = nonEmptyCells.every(cell => cell.t === 'b'); + if (allBooleans) { return 'boolean'; } + + // Check for numbers + const allNumbers = nonEmptyCells.every(cell => cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, '')))); + + if (allNumbers) { + const numbers = nonEmptyCells.map(cell => { + if (cell.t === 'n') return cell.v; + return parseFloat(cell.v.replace(/[,$\s%()]/g, '')); + }); + + const needsPrecision = numbers.some(num => { + const str = num.toString(); + return str.includes('e') || + (str.includes('.') && str.split('.')[1].length > 6) || + Math.abs(num) > 1e15; + }); + + return needsPrecision ? 'numeric' : 'double precision'; + } + return 'text'; // default to string type +} + +function parseValue(cell, type) { + if (!cell || cell.v == null) return null; + + switch (type) { + case 'date': + if (cell.t === 'd') { return cell.v.toISOString().split('T')[0]; } + if (cell.t === 'n') { + const date = new Date((cell.v - 25569) * 86400 * 1000); + return date.toISOString().split('T')[0]; + } + return null; + + case 'numeric': + case 'double precision': + if (cell.t === 'n') return cell.v; + if (cell.t === 's') { + const cleaned = cell.v.replace(/[,$\s%()]/g, ''); + if (!isNaN(cleaned)) return parseFloat(cleaned); + } + return null; + + case 'boolean': + return cell.t === 'b' ? cell.v : null; + + default: + return String(cell.v); + } +} + +/* create table and load data given a worksheet and a PostgreSQL client */ +async function sheet_to_pg_table(client, worksheet, tableName) { + if (!worksheet['!ref']) return; + + const range = XLSX.utils.decode_range(worksheet['!ref']); + + /* Extract headers from first row, clean names for PostgreSQL */ + const headers = []; + for (let col = range.s.c; col <= range.e.c; col++) { + const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col }); + const cell = worksheet[cellAddress]; + const headerValue = cell ? String(cell.v).replace(/[^a-zA-Z0-9_]/g, '_') : `column_${col + 1}`; + headers.push(headerValue.toLowerCase()); + } + + /* Group cell values by column for type deduction */ + const columnValues = headers.map(() => []); + for (let row = range.s.r + 1; row <= range.e.r; row++) { + for (let col = range.s.c; col <= range.e.c; col++) { + const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); + const cell = worksheet[cellAddress]; + columnValues[col].push(cell); + } + } + + /* Deduce PostgreSQL type for each column */ + const types = {}; + headers.forEach((header, idx) => { + types[header] = deduceType(columnValues[idx]); + }); + + /* Delete table if it exists in the DB */ + await client.query(format('DROP TABLE IF EXISTS %I', tableName)); + + /* Create table */ + const createTableSQL = format( + 'CREATE TABLE %I (%s)', + tableName, + headers.map(header => format('%I %s', header, types[header])).join(', ') + ); + await client.query(createTableSQL); + + /* Insert data row by row */ + for (let row = range.s.r + 1; row <= range.e.r; row++) { + const values = headers.map((header, col) => { + const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); + const cell = worksheet[cellAddress]; + return parseValue(cell, types[header]); + }); + + const insertSQL = format( + 'INSERT INTO %I (%s) VALUES (%s)', + tableName, + headers.map(h => format('%I', h)).join(', '), + values.map(() => '%L').join(', ') + ); + await client.query(format(insertSQL, ...values)); + } +} + +(async() => { + +/* read file and get first worksheet */ +const oldwb = XLSX.readFile("pres.numbers"); +const oldws = oldwb.Sheets[oldwb.SheetNames[0]]; + +/* import data to postgres */ +let client = new pg.Client(opts); +try { + /* open connection to PostgreSQL database */ + await client.connect(); + + /* create table and load data given a worksheet */ + await sheet_to_pg_table(client, oldws, "Presidents"); +} finally { + /* disconnect */ + await client.end(); +} + +/* export data to xlsx */ +client = new pg.Client(opts); +try { + /* open connection to PostgreSQL database */ + await client.connect(); + + /* fetch all data from specified table */ + const res = await client.query(format(`SELECT * FROM %I`, "Presidents")); + + /* export to file */ + const newws = XLSX.utils.json_to_sheet(res.rows); + const newwb = XLSX.utils.book_new(); + XLSX.utils.book_append_sheet(newwb, newws, "Export"); + XLSX.writeFile(newwb, "SheetJSPGExport.xlsx"); +} finally { + /* disconnect */ + await client.end(); +} +})(); \ No newline at end of file diff --git a/sql-types.js b/sql-types.js index 633855b..b9f8158 100644 --- a/sql-types.js +++ b/sql-types.js @@ -8,36 +8,16 @@ function deduceType(cells) { if (nonEmptyCells.length === 0) return 'text'; // Check for dates by looking at both cell type and formatted value - const isDateCell = cell => { - if (!cell) return false; - if (cell.t === 'd') return true; - // Check if it's a numeric cell with date formatting - if (cell.t === 'n' && cell.w) { - // Common date formats in Excel - const datePatterns = [ - /\d{4}-\d{2}-\d{2}/, // 2024-01-01 - /\d{1,2}\/\d{1,2}\/\d{4}/, // 1/1/2024 - /\d{2}-[A-Za-z]{3}-\d{4}/, // 01-Jan-2024 - /[A-Za-z]{3}-\d{2}/, // Jan-24 - /\d{1,2}-[A-Za-z]{3}/ // 1-Jan - ]; - return datePatterns.some(pattern => pattern.test(cell.w)); - } - return false; - }; + const isDateCell = cell => cell?.t === 'd' || (cell?.t === 'n' && cell.w && /\d{4}-\d{2}-\d{2}|\d{1,2}\/\d{1,2}\/\d{4}|\d{2}-[A-Za-z]{3}-\d{4}|[A-Za-z]{3}-\d{2}|\d{1,2}-[A-Za-z]{3}/.test(cell.w)); - if (nonEmptyCells.some(isDateCell)) { - return 'date'; - } + if (nonEmptyCells.some(isDateCell)) { return 'date'; } // Check for booleans const allBooleans = nonEmptyCells.every(cell => cell.t === 'b'); if (allBooleans) { return 'boolean'; } // Check for numbers - const allNumbers = nonEmptyCells.every(cell => - cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, ''))) - ); + const allNumbers = nonEmptyCells.every(cell => cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, '')))); if (allNumbers) { const numbers = nonEmptyCells.map(cell => { @@ -54,7 +34,6 @@ function deduceType(cells) { return needsPrecision ? 'numeric' : 'double precision'; } - return 'text'; // default to string type } @@ -63,9 +42,7 @@ function parseValue(cell, type) { switch (type) { case 'date': - if (cell.t === 'd') { - return cell.v.toISOString().split('T')[0]; - } + if (cell.t === 'd') { return cell.v.toISOString().split('T')[0]; } if (cell.t === 'n') { const date = new Date((cell.v - 25569) * 86400 * 1000); return date.toISOString().split('T')[0]; @@ -89,12 +66,13 @@ function parseValue(cell, type) { } } -async function sheet_to_pg_table(client, worksheet, tableName) { +/* create table and load data given a worksheet and a PostgreSQL client */ +async function sheet_to_pg_table(client, worksheet, tableName) { if (!worksheet['!ref']) return; const range = XLSX.utils.decode_range(worksheet['!ref']); - // Extract headers from first row, clean names for PostgreSQL + /* Extract headers from first row, clean names for PostgreSQL */ const headers = []; for (let col = range.s.c; col <= range.e.c; col++) { const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col }); @@ -103,7 +81,7 @@ async function sheet_to_pg_table(client, worksheet, tableName) { headers.push(headerValue.toLowerCase()); } - // Group cell values by column for type deduction + /* Group cell values by column for type deduction */ const columnValues = headers.map(() => []); for (let row = range.s.r + 1; row <= range.e.r; row++) { for (let col = range.s.c; col <= range.e.c; col++) { @@ -113,14 +91,16 @@ async function sheet_to_pg_table(client, worksheet, tableName) { } } - // Deduc PostgreSQL type for each column + /* Deduce PostgreSQL type for each column */ const types = {}; headers.forEach((header, idx) => { types[header] = deduceType(columnValues[idx]); }); - // Create table + /* Delete table if it exists in the DB */ await client.query(format('DROP TABLE IF EXISTS %I', tableName)); + + /* Create table */ const createTableSQL = format( 'CREATE TABLE %I (%s)', tableName, @@ -128,7 +108,7 @@ async function sheet_to_pg_table(client, worksheet, tableName) { ); await client.query(createTableSQL); - // Insert data + /* Insert data row by row */ for (let row = range.s.r + 1; row <= range.e.r; row++) { const values = headers.map((header, col) => { const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); diff --git a/test.js b/test.js index 9c33d07..20e1b7c 100644 --- a/test.js +++ b/test.js @@ -57,7 +57,7 @@ async function runAllTests() { await readExcelAndTest('date_formats.xlsx', 'test_dates'); await readExcelAndTest('special_values.xlsx', 'test_special_values'); await readExcelAndTest('precision.xlsx', 'test_precision'); - /* await readExcelAndTest('string_formats.xlsx', 'test_string_formats'); */ + await readExcelAndTest('string_formats.xlsx', 'test_string_formats'); await readExcelAndTest('boolean_formats.xlsx', 'test_boolean_formats'); console.log('\nAll tests completed successfully');