Merge pull request 'demo-postgresql-improved-type-detection' (#1) from demo-postgresql-improved-type-detection into main
Reviewed-on: #1
This commit is contained in:
commit
367c94e7b4
2
.gitignore
vendored
2
.gitignore
vendored
@ -4,4 +4,4 @@ node_modules/
|
||||
.log
|
||||
:.prettierrc
|
||||
SheetJSPGExport.xlsx
|
||||
pres.numbers
|
||||
pres.numbers
|
||||
|
173
SheetJSPG.js
173
SheetJSPG.js
@ -1,173 +0,0 @@
|
||||
const pg = require("pg"), format = require("pg-format");
|
||||
const XLSX = require("xlsx");
|
||||
const opts = {
|
||||
database:"SheetJSPG",
|
||||
host: "127.0.0.1", // localhost
|
||||
port: 5432,
|
||||
user: "postgres",
|
||||
password: "7509"
|
||||
};
|
||||
|
||||
function deduceType(cells) {
|
||||
if (!cells || cells.length === 0) return 'text';
|
||||
|
||||
const nonEmptyCells = cells.filter(cell => cell && cell.v != null);
|
||||
if (nonEmptyCells.length === 0) return 'text';
|
||||
|
||||
// Check for dates by looking at both cell type and formatted value
|
||||
const isDateCell = cell => cell?.t === 'd' || (cell?.t === 'n' && cell.w && /\d{4}-\d{2}-\d{2}|\d{1,2}\/\d{1,2}\/\d{4}|\d{2}-[A-Za-z]{3}-\d{4}|[A-Za-z]{3}-\d{2}|\d{1,2}-[A-Za-z]{3}/.test(cell.w));
|
||||
|
||||
if (nonEmptyCells.some(isDateCell)) { return 'date'; }
|
||||
|
||||
// Check for booleans
|
||||
const allBooleans = nonEmptyCells.every(cell => cell.t === 'b');
|
||||
if (allBooleans) { return 'boolean'; }
|
||||
|
||||
// Check for numbers
|
||||
const allNumbers = nonEmptyCells.every(cell => cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, ''))));
|
||||
|
||||
if (allNumbers) {
|
||||
const numbers = nonEmptyCells.map(cell => {
|
||||
if (cell.t === 'n') return cell.v;
|
||||
return parseFloat(cell.v.replace(/[,$\s%()]/g, ''));
|
||||
});
|
||||
|
||||
const needsPrecision = numbers.some(num => {
|
||||
const str = num.toString();
|
||||
return str.includes('e') ||
|
||||
(str.includes('.') && str.split('.')[1].length > 6) ||
|
||||
Math.abs(num) > 1e15;
|
||||
});
|
||||
|
||||
return needsPrecision ? 'numeric' : 'double precision';
|
||||
}
|
||||
return 'text'; // default to string type
|
||||
}
|
||||
|
||||
function parseValue(cell, type) {
|
||||
if (!cell || cell.v == null) return null;
|
||||
|
||||
switch (type) {
|
||||
case 'date':
|
||||
if (cell.t === 'd') { return cell.v.toISOString().split('T')[0]; }
|
||||
if (cell.t === 'n') {
|
||||
const date = new Date((cell.v - 25569) * 86400 * 1000);
|
||||
return date.toISOString().split('T')[0];
|
||||
}
|
||||
return null;
|
||||
|
||||
case 'numeric':
|
||||
case 'double precision':
|
||||
if (cell.t === 'n') return cell.v;
|
||||
if (cell.t === 's') {
|
||||
const cleaned = cell.v.replace(/[,$\s%()]/g, '');
|
||||
if (!isNaN(cleaned)) return parseFloat(cleaned);
|
||||
}
|
||||
return null;
|
||||
|
||||
case 'boolean':
|
||||
return cell.t === 'b' ? cell.v : null;
|
||||
|
||||
default:
|
||||
return String(cell.v);
|
||||
}
|
||||
}
|
||||
|
||||
/* create table and load data given a worksheet and a PostgreSQL client */
|
||||
async function sheet_to_pg_table(client, worksheet, tableName) {
|
||||
if (!worksheet['!ref']) return;
|
||||
|
||||
const range = XLSX.utils.decode_range(worksheet['!ref']);
|
||||
|
||||
/* Extract headers from first row, clean names for PostgreSQL */
|
||||
const headers = [];
|
||||
for (let col = range.s.c; col <= range.e.c; col++) {
|
||||
const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col });
|
||||
const cell = worksheet[cellAddress];
|
||||
const headerValue = cell ? String(cell.v).replace(/[^a-zA-Z0-9_]/g, '_') : `column_${col + 1}`;
|
||||
headers.push(headerValue.toLowerCase());
|
||||
}
|
||||
|
||||
/* Group cell values by column for type deduction */
|
||||
const columnValues = headers.map(() => []);
|
||||
for (let row = range.s.r + 1; row <= range.e.r; row++) {
|
||||
for (let col = range.s.c; col <= range.e.c; col++) {
|
||||
const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });
|
||||
const cell = worksheet[cellAddress];
|
||||
columnValues[col].push(cell);
|
||||
}
|
||||
}
|
||||
|
||||
/* Deduce PostgreSQL type for each column */
|
||||
const types = {};
|
||||
headers.forEach((header, idx) => {
|
||||
types[header] = deduceType(columnValues[idx]);
|
||||
});
|
||||
|
||||
/* Delete table if it exists in the DB */
|
||||
await client.query(format('DROP TABLE IF EXISTS %I', tableName));
|
||||
|
||||
/* Create table */
|
||||
const createTableSQL = format(
|
||||
'CREATE TABLE %I (%s)',
|
||||
tableName,
|
||||
headers.map(header => format('%I %s', header, types[header])).join(', ')
|
||||
);
|
||||
await client.query(createTableSQL);
|
||||
|
||||
/* Insert data row by row */
|
||||
for (let row = range.s.r + 1; row <= range.e.r; row++) {
|
||||
const values = headers.map((header, col) => {
|
||||
const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });
|
||||
const cell = worksheet[cellAddress];
|
||||
return parseValue(cell, types[header]);
|
||||
});
|
||||
|
||||
const insertSQL = format(
|
||||
'INSERT INTO %I (%s) VALUES (%s)',
|
||||
tableName,
|
||||
headers.map(h => format('%I', h)).join(', '),
|
||||
values.map(() => '%L').join(', ')
|
||||
);
|
||||
await client.query(format(insertSQL, ...values));
|
||||
}
|
||||
}
|
||||
|
||||
(async() => {
|
||||
|
||||
/* read file and get first worksheet */
|
||||
const oldwb = XLSX.readFile("pres.numbers");
|
||||
const oldws = oldwb.Sheets[oldwb.SheetNames[0]];
|
||||
|
||||
/* import data to postgres */
|
||||
let client = new pg.Client(opts);
|
||||
try {
|
||||
/* open connection to PostgreSQL database */
|
||||
await client.connect();
|
||||
|
||||
/* create table and load data given a worksheet */
|
||||
await sheet_to_pg_table(client, oldws, "Presidents");
|
||||
} finally {
|
||||
/* disconnect */
|
||||
await client.end();
|
||||
}
|
||||
|
||||
/* export data to xlsx */
|
||||
client = new pg.Client(opts);
|
||||
try {
|
||||
/* open connection to PostgreSQL database */
|
||||
await client.connect();
|
||||
|
||||
/* fetch all data from specified table */
|
||||
const res = await client.query(format(`SELECT * FROM %I`, "Presidents"));
|
||||
|
||||
/* export to file */
|
||||
const newws = XLSX.utils.json_to_sheet(res.rows);
|
||||
const newwb = XLSX.utils.book_new();
|
||||
XLSX.utils.book_append_sheet(newwb, newws, "Export");
|
||||
XLSX.writeFile(newwb, "SheetJSPGExport.xlsx");
|
||||
} finally {
|
||||
/* disconnect */
|
||||
await client.end();
|
||||
}
|
||||
})();
|
6
test.js
6
test.js
@ -1,6 +1,6 @@
|
||||
const XLSX = require('xlsx');
|
||||
const { Client } = require('pg');
|
||||
const { sheet_to_pg_table } = require('./sql-types');
|
||||
const { sheet_to_pg_table } = require('./index');
|
||||
const path = require('path');
|
||||
|
||||
async function readExcelAndTest(filename, tableName) {
|
||||
@ -19,7 +19,7 @@ async function readExcelAndTest(filename, tableName) {
|
||||
const client = new Client({
|
||||
host: 'localhost',
|
||||
database: 'SheetJSPG',
|
||||
user: 'postgres',
|
||||
user: 'dark',
|
||||
password: '7509'
|
||||
});
|
||||
|
||||
@ -67,4 +67,4 @@ async function runAllTests() {
|
||||
}
|
||||
}
|
||||
|
||||
runAllTests().catch(console.error);
|
||||
runAllTests().catch(console.error);
|
||||
|
Binary file not shown.
Binary file not shown.
@ -2,11 +2,6 @@ import pandas as pd
|
||||
from datetime import datetime
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
def create_test_directory():
|
||||
"""Create a directory for test files if it doesn't exist"""
|
||||
if not os.path.exists('test_files'):
|
||||
os.makedirs('test_files')
|
||||
|
||||
def generate_number_formats_test():
|
||||
"""Test Case 1: Common spreadsheet number formats"""
|
||||
@ -22,8 +17,7 @@ def generate_number_formats_test():
|
||||
]
|
||||
})
|
||||
|
||||
# Create Excel writer with xlsxwriter engine
|
||||
writer = pd.ExcelWriter('test_files/number_formats.xlsx', engine='xlsxwriter')
|
||||
writer = pd.ExcelWriter('number_formats.xlsx', engine='xlsxwriter')
|
||||
df.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
|
||||
# Get workbook and worksheet objects
|
||||
@ -55,7 +49,7 @@ def generate_date_formats_test():
|
||||
]
|
||||
})
|
||||
|
||||
writer = pd.ExcelWriter('test_files/date_formats.xlsx', engine='xlsxwriter')
|
||||
writer = pd.ExcelWriter('date_formats.xlsx', engine='xlsxwriter')
|
||||
df.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
|
||||
workbook = writer.book
|
||||
@ -98,7 +92,7 @@ def generate_special_values_test():
|
||||
]
|
||||
})
|
||||
|
||||
writer = pd.ExcelWriter('test_files/special_values.xlsx', engine='xlsxwriter')
|
||||
writer = pd.ExcelWriter('special_values.xlsx', engine='xlsxwriter')
|
||||
df.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
writer.close()
|
||||
|
||||
@ -117,7 +111,7 @@ def generate_precision_test():
|
||||
]
|
||||
})
|
||||
|
||||
writer = pd.ExcelWriter('test_files/precision.xlsx', engine='xlsxwriter')
|
||||
writer = pd.ExcelWriter('precision.xlsx', engine='xlsxwriter')
|
||||
df.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
|
||||
workbook = writer.book
|
||||
@ -156,7 +150,7 @@ def generate_string_formats_test():
|
||||
]
|
||||
})
|
||||
|
||||
writer = pd.ExcelWriter('test_files/string_formats.xlsx', engine='xlsxwriter')
|
||||
writer = pd.ExcelWriter('string_formats.xlsx', engine='xlsxwriter')
|
||||
df.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
|
||||
workbook = writer.book
|
||||
@ -179,7 +173,7 @@ def generate_boolean_formats_test():
|
||||
]
|
||||
})
|
||||
|
||||
writer = pd.ExcelWriter('test_files/boolean_formats.xlsx', engine='xlsxwriter')
|
||||
writer = pd.ExcelWriter('boolean_formats.xlsx', engine='xlsxwriter')
|
||||
df.to_excel(writer, index=False, sheet_name='Sheet1')
|
||||
|
||||
workbook = writer.book
|
||||
@ -203,9 +197,6 @@ def generate_boolean_formats_test():
|
||||
|
||||
|
||||
def main():
|
||||
"""Geneate all test Excel files"""
|
||||
create_test_directory()
|
||||
|
||||
print("Generating test Excel files...")
|
||||
generate_number_formats_test()
|
||||
generate_date_formats_test()
|
||||
@ -213,7 +204,7 @@ def main():
|
||||
generate_precision_test()
|
||||
generate_string_formats_test()
|
||||
generate_boolean_formats_test()
|
||||
print("Test files generated in 'test_files' directory")
|
||||
print("Test files generated")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
main()
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Loading…
Reference in New Issue
Block a user