const format = require("pg-format"); const XLSX = require('xlsx'); function deduceType(cells) { if (!cells || cells.length === 0) return 'text'; const nonEmptyCells = cells.filter(cell => cell && cell.v != null); if (nonEmptyCells.length === 0) return 'text'; // Check for dates by looking at both cell type and formatted value const isDateCell = cell => { if (!cell) return false; if (cell.t === 'd') return true; // Check if it's a numeric cell with date formatting if (cell.t === 'n' && cell.w) { // Common date formats in Excel const datePatterns = [ /\d{4}-\d{2}-\d{2}/, // 2024-01-01 /\d{1,2}\/\d{1,2}\/\d{4}/, // 1/1/2024 /\d{2}-[A-Za-z]{3}-\d{4}/, // 01-Jan-2024 /[A-Za-z]{3}-\d{2}/, // Jan-24 /\d{1,2}-[A-Za-z]{3}/ // 1-Jan ]; return datePatterns.some(pattern => pattern.test(cell.w)); } return false; }; if (nonEmptyCells.some(isDateCell)) { return 'date'; } // Check for booleans const allBooleans = nonEmptyCells.every(cell => cell.t === 'b'); if (allBooleans) { return 'boolean'; } // Check for numbers const allNumbers = nonEmptyCells.every(cell => cell.t === 'n' || (cell.t === 's' && !isNaN(cell.v.replace(/[,$\s%()]/g, ''))) ); if (allNumbers) { const numbers = nonEmptyCells.map(cell => { if (cell.t === 'n') return cell.v; return parseFloat(cell.v.replace(/[,$\s%()]/g, '')); }); const needsPrecision = numbers.some(num => { const str = num.toString(); return str.includes('e') || (str.includes('.') && str.split('.')[1].length > 6) || Math.abs(num) > 1e15; }); return needsPrecision ? 'numeric' : 'double precision'; } return 'text'; // default to string type } function parseValue(cell, type) { if (!cell || cell.v == null) return null; switch (type) { case 'date': if (cell.t === 'd') { return cell.v.toISOString().split('T')[0]; } if (cell.t === 'n') { const date = new Date((cell.v - 25569) * 86400 * 1000); return date.toISOString().split('T')[0]; } return null; case 'numeric': case 'double precision': if (cell.t === 'n') return cell.v; if (cell.t === 's') { const cleaned = cell.v.replace(/[,$\s%()]/g, ''); if (!isNaN(cleaned)) return parseFloat(cleaned); } return null; case 'boolean': return cell.t === 'b' ? cell.v : null; default: return String(cell.v); } } async function sheet_to_pg_table(client, worksheet, tableName) { if (!worksheet['!ref']) return; const range = XLSX.utils.decode_range(worksheet['!ref']); // Extract headers from first row, clean names for PostgreSQL const headers = []; for (let col = range.s.c; col <= range.e.c; col++) { const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col }); const cell = worksheet[cellAddress]; const headerValue = cell ? String(cell.v).replace(/[^a-zA-Z0-9_]/g, '_') : `column_${col + 1}`; headers.push(headerValue.toLowerCase()); } // Group cell values by column for type deduction const columnValues = headers.map(() => []); for (let row = range.s.r + 1; row <= range.e.r; row++) { for (let col = range.s.c; col <= range.e.c; col++) { const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); const cell = worksheet[cellAddress]; columnValues[col].push(cell); } } // Deduc PostgreSQL type for each column const types = {}; headers.forEach((header, idx) => { types[header] = deduceType(columnValues[idx]); }); // Create table await client.query(format('DROP TABLE IF EXISTS %I', tableName)); const createTableSQL = format( 'CREATE TABLE %I (%s)', tableName, headers.map(header => format('%I %s', header, types[header])).join(', ') ); await client.query(createTableSQL); // Insert data for (let row = range.s.r + 1; row <= range.e.r; row++) { const values = headers.map((header, col) => { const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); const cell = worksheet[cellAddress]; return parseValue(cell, types[header]); }); const insertSQL = format( 'INSERT INTO %I (%s) VALUES (%s)', tableName, headers.map(h => format('%I', h)).join(', '), values.map(() => '%L').join(', ') ); await client.query(format(insertSQL, ...values)); } } module.exports = { sheet_to_pg_table };