new-script: added SheetJSPG.js demo for doc

This commit is contained in:
Asad 2024-11-20 18:33:20 -05:00
parent d2829ec554
commit 6547966016
4 changed files with 189 additions and 35 deletions

3
.gitignore vendored

@ -3,4 +3,5 @@ node_modules/
.vscode/
.log
:.prettierrc
SheetJSPGExport.xlsx
pres.numbers

173
SheetJSPG.js Normal file

@ -0,0 +1,173 @@
const pg = require("pg"), format = require("pg-format");
const XLSX = require("xlsx");
const opts = {
database:"SheetJSPG",
host: "127.0.0.1", // localhost
port: 5432,
user: "postgres",
password: "7509"
};
function deduceType(cells) {
if (!cells || cells.length === 0) return 'text';
const nonEmptyCells = cells.filter(cell => cell && cell.v != null);
if (nonEmptyCells.length === 0) return 'text';
// Check for dates by looking at both cell type and formatted value
const isDateCell = cell => cell?.t === 'd' || (cell?.t === 'n' && cell.w && /\d{4}-\d{2}-\d{2}|\d{1,2}\/\d{1,2}\/\d{4}|\d{2}-[A-Za-z]{3}-\d{4}|[A-Za-z]{3}-\d{2}|\d{1,2}-[A-Za-z]{3}/.test(cell.w));
if (nonEmptyCells.some(isDateCell)) { return 'date'; }
// Check for booleans
const allBooleans = nonEmptyCells.every(cell => cell.t === 'b');
if (allBooleans) { return 'boolean'; }
// Check for numbers
const allNumbers = nonEmptyCells.every(cell => cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, ''))));
if (allNumbers) {
const numbers = nonEmptyCells.map(cell => {
if (cell.t === 'n') return cell.v;
return parseFloat(cell.v.replace(/[,$\s%()]/g, ''));
});
const needsPrecision = numbers.some(num => {
const str = num.toString();
return str.includes('e') ||
(str.includes('.') && str.split('.')[1].length > 6) ||
Math.abs(num) > 1e15;
});
return needsPrecision ? 'numeric' : 'double precision';
}
return 'text'; // default to string type
}
function parseValue(cell, type) {
if (!cell || cell.v == null) return null;
switch (type) {
case 'date':
if (cell.t === 'd') { return cell.v.toISOString().split('T')[0]; }
if (cell.t === 'n') {
const date = new Date((cell.v - 25569) * 86400 * 1000);
return date.toISOString().split('T')[0];
}
return null;
case 'numeric':
case 'double precision':
if (cell.t === 'n') return cell.v;
if (cell.t === 's') {
const cleaned = cell.v.replace(/[,$\s%()]/g, '');
if (!isNaN(cleaned)) return parseFloat(cleaned);
}
return null;
case 'boolean':
return cell.t === 'b' ? cell.v : null;
default:
return String(cell.v);
}
}
/* create table and load data given a worksheet and a PostgreSQL client */
async function sheet_to_pg_table(client, worksheet, tableName) {
if (!worksheet['!ref']) return;
const range = XLSX.utils.decode_range(worksheet['!ref']);
/* Extract headers from first row, clean names for PostgreSQL */
const headers = [];
for (let col = range.s.c; col <= range.e.c; col++) {
const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col });
const cell = worksheet[cellAddress];
const headerValue = cell ? String(cell.v).replace(/[^a-zA-Z0-9_]/g, '_') : `column_${col + 1}`;
headers.push(headerValue.toLowerCase());
}
/* Group cell values by column for type deduction */
const columnValues = headers.map(() => []);
for (let row = range.s.r + 1; row <= range.e.r; row++) {
for (let col = range.s.c; col <= range.e.c; col++) {
const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });
const cell = worksheet[cellAddress];
columnValues[col].push(cell);
}
}
/* Deduce PostgreSQL type for each column */
const types = {};
headers.forEach((header, idx) => {
types[header] = deduceType(columnValues[idx]);
});
/* Delete table if it exists in the DB */
await client.query(format('DROP TABLE IF EXISTS %I', tableName));
/* Create table */
const createTableSQL = format(
'CREATE TABLE %I (%s)',
tableName,
headers.map(header => format('%I %s', header, types[header])).join(', ')
);
await client.query(createTableSQL);
/* Insert data row by row */
for (let row = range.s.r + 1; row <= range.e.r; row++) {
const values = headers.map((header, col) => {
const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });
const cell = worksheet[cellAddress];
return parseValue(cell, types[header]);
});
const insertSQL = format(
'INSERT INTO %I (%s) VALUES (%s)',
tableName,
headers.map(h => format('%I', h)).join(', '),
values.map(() => '%L').join(', ')
);
await client.query(format(insertSQL, ...values));
}
}
(async() => {
/* read file and get first worksheet */
const oldwb = XLSX.readFile("pres.numbers");
const oldws = oldwb.Sheets[oldwb.SheetNames[0]];
/* import data to postgres */
let client = new pg.Client(opts);
try {
/* open connection to PostgreSQL database */
await client.connect();
/* create table and load data given a worksheet */
await sheet_to_pg_table(client, oldws, "Presidents");
} finally {
/* disconnect */
await client.end();
}
/* export data to xlsx */
client = new pg.Client(opts);
try {
/* open connection to PostgreSQL database */
await client.connect();
/* fetch all data from specified table */
const res = await client.query(format(`SELECT * FROM %I`, "Presidents"));
/* export to file */
const newws = XLSX.utils.json_to_sheet(res.rows);
const newwb = XLSX.utils.book_new();
XLSX.utils.book_append_sheet(newwb, newws, "Export");
XLSX.writeFile(newwb, "SheetJSPGExport.xlsx");
} finally {
/* disconnect */
await client.end();
}
})();

@ -8,36 +8,16 @@ function deduceType(cells) {
if (nonEmptyCells.length === 0) return 'text';
// Check for dates by looking at both cell type and formatted value
const isDateCell = cell => {
if (!cell) return false;
if (cell.t === 'd') return true;
// Check if it's a numeric cell with date formatting
if (cell.t === 'n' && cell.w) {
// Common date formats in Excel
const datePatterns = [
/\d{4}-\d{2}-\d{2}/, // 2024-01-01
/\d{1,2}\/\d{1,2}\/\d{4}/, // 1/1/2024
/\d{2}-[A-Za-z]{3}-\d{4}/, // 01-Jan-2024
/[A-Za-z]{3}-\d{2}/, // Jan-24
/\d{1,2}-[A-Za-z]{3}/ // 1-Jan
];
return datePatterns.some(pattern => pattern.test(cell.w));
}
return false;
};
const isDateCell = cell => cell?.t === 'd' || (cell?.t === 'n' && cell.w && /\d{4}-\d{2}-\d{2}|\d{1,2}\/\d{1,2}\/\d{4}|\d{2}-[A-Za-z]{3}-\d{4}|[A-Za-z]{3}-\d{2}|\d{1,2}-[A-Za-z]{3}/.test(cell.w));
if (nonEmptyCells.some(isDateCell)) {
return 'date';
}
if (nonEmptyCells.some(isDateCell)) { return 'date'; }
// Check for booleans
const allBooleans = nonEmptyCells.every(cell => cell.t === 'b');
if (allBooleans) { return 'boolean'; }
// Check for numbers
const allNumbers = nonEmptyCells.every(cell =>
cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, '')))
);
const allNumbers = nonEmptyCells.every(cell => cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, ''))));
if (allNumbers) {
const numbers = nonEmptyCells.map(cell => {
@ -54,7 +34,6 @@ function deduceType(cells) {
return needsPrecision ? 'numeric' : 'double precision';
}
return 'text'; // default to string type
}
@ -63,9 +42,7 @@ function parseValue(cell, type) {
switch (type) {
case 'date':
if (cell.t === 'd') {
return cell.v.toISOString().split('T')[0];
}
if (cell.t === 'd') { return cell.v.toISOString().split('T')[0]; }
if (cell.t === 'n') {
const date = new Date((cell.v - 25569) * 86400 * 1000);
return date.toISOString().split('T')[0];
@ -89,12 +66,13 @@ function parseValue(cell, type) {
}
}
async function sheet_to_pg_table(client, worksheet, tableName) {
/* create table and load data given a worksheet and a PostgreSQL client */
async function sheet_to_pg_table(client, worksheet, tableName) {
if (!worksheet['!ref']) return;
const range = XLSX.utils.decode_range(worksheet['!ref']);
// Extract headers from first row, clean names for PostgreSQL
/* Extract headers from first row, clean names for PostgreSQL */
const headers = [];
for (let col = range.s.c; col <= range.e.c; col++) {
const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col });
@ -103,7 +81,7 @@ async function sheet_to_pg_table(client, worksheet, tableName) {
headers.push(headerValue.toLowerCase());
}
// Group cell values by column for type deduction
/* Group cell values by column for type deduction */
const columnValues = headers.map(() => []);
for (let row = range.s.r + 1; row <= range.e.r; row++) {
for (let col = range.s.c; col <= range.e.c; col++) {
@ -113,14 +91,16 @@ async function sheet_to_pg_table(client, worksheet, tableName) {
}
}
// Deduc PostgreSQL type for each column
/* Deduce PostgreSQL type for each column */
const types = {};
headers.forEach((header, idx) => {
types[header] = deduceType(columnValues[idx]);
});
// Create table
/* Delete table if it exists in the DB */
await client.query(format('DROP TABLE IF EXISTS %I', tableName));
/* Create table */
const createTableSQL = format(
'CREATE TABLE %I (%s)',
tableName,
@ -128,7 +108,7 @@ async function sheet_to_pg_table(client, worksheet, tableName) {
);
await client.query(createTableSQL);
// Insert data
/* Insert data row by row */
for (let row = range.s.r + 1; row <= range.e.r; row++) {
const values = headers.map((header, col) => {
const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });

@ -57,7 +57,7 @@ async function runAllTests() {
await readExcelAndTest('date_formats.xlsx', 'test_dates');
await readExcelAndTest('special_values.xlsx', 'test_special_values');
await readExcelAndTest('precision.xlsx', 'test_precision');
/* await readExcelAndTest('string_formats.xlsx', 'test_string_formats'); */
await readExcelAndTest('string_formats.xlsx', 'test_string_formats');
await readExcelAndTest('boolean_formats.xlsx', 'test_boolean_formats');
console.log('\nAll tests completed successfully');