commit e8fe0658227f7232404dc7ea9b4a8f31b05a82b4 Author: Asad Date: Wed Nov 20 14:41:09 2024 -0500 initial diff --git a/README.md b/README.md new file mode 100644 index 0000000..f432b7f --- /dev/null +++ b/README.md @@ -0,0 +1,69 @@ +# Sheetjs to PostgreSQL Creating a Table Demo + +A Node.js utility that intelligently converts Sheetjs `worksheet` to PostgreSQL tables while preserving appropriate data types. + +> This demo project serves as a refernce implementation for SheetJS + PostgreSQL integration. For more details, vist the [SheetJS Documentation](https://docs.sheetjs.com/docs/demos/data/postgresql/#creating-a-table). + +### Features +* Automatic data type detection from Excel columns +* Support various data formats: + * Numbers (integer and floating-point) + * Dates + * Booleans + * Text +* Handles special number formats (scientific notations, high precision) +* Clean column names for PostgreSQL compatibility + +### Prerequisites +* Node.js +* PostgreSQL (16) +* Python 3.x + +### Installation +1. Install Python dependencies: + +```bash +pip install -r requirements.txt +``` + +2. Install Node.js dependencies: +``` +npm install i +``` + +### Setup +1. Generate test_files: +```bash +python3 gen_test_files.py +``` +2. Configure PostgreSQL connection in `test.js` +```javascript +const client = new Client({ + host: 'localhost', + database: 'SheetJSPG', + user: 'postgres', + password: '7509' +}); +``` + +### Run +```bash +node test.js +``` + +### Test Files +The test suite includes various Excel files testing different data scenarios: + +* `number_formats.xlsx`: Various numeric formats +* `date_formats.xlsx`: Date handling +* `special_values.xlsx`: Edge cases +* `precision.xlsx`: High-precision numbers +* `string_formats.xlsx`: Text handling +* `boolean_formats.xlsx`: Boolean values + +### Type Mapping +* Excel dates → PostgreSQL `date` +* Booleans → PostgreSQL `boolean` +* High-precision numbers → PostgreSQL `numeric` +* Standard numbers → PostgreSQL `double precision` +* Text/other → PostgreSQL `text` \ No newline at end of file diff --git a/gen_test_files.py b/gen_test_files.py new file mode 100644 index 0000000..06a9b1f --- /dev/null +++ b/gen_test_files.py @@ -0,0 +1,219 @@ +import pandas as pd +from datetime import datetime +import numpy as np +import os + +def create_test_directory(): + """Create a directory for test files if it doesn't exist""" + if not os.path.exists('test_files'): + os.makedirs('test_files') + +def generate_number_formats_test(): + """Test Case 1: Common spreadsheet number formats""" + df = pd.DataFrame({ + 'id': range(1, 7), + 'value': [ + 1234.56, # Plain number + '1,234.56', # Thousands separator + 1234.5600, # Fixed decimal places + 0.1234, # Will be formatted as percentage + -1234.56, # Will be formatted as parentheses + -1230 # Will be formatted as scientific + ] + }) + + # Create Excel writer with xlsxwriter engine + writer = pd.ExcelWriter('test_files/number_formats.xlsx', engine='xlsxwriter') + df.to_excel(writer, index=False, sheet_name='Sheet1') + + # Get workbook and worksheet objects + workbook = writer.book + worksheet = writer.sheets['Sheet1'] + + # Add formats + percent_format = workbook.add_format({'num_format': '0.00%'}) + accounting_format = workbook.add_format({'num_format': '_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)'}) + scientific_format = workbook.add_format({'num_format': '0.00E+00'}) + + # Apply formats to specific cells + worksheet.set_column('B:B', 15) # Set column width + worksheet.write('B5', 0.1234, percent_format) # Percentage + worksheet.write('B6', -1234.56, accounting_format) # Parentheses + worksheet.write('B7', -1230, scientific_format) # Scientific + + writer.close() + +def generate_date_formats_test(): + """Test Case 3: Date and timestamp formats""" + df = pd.DataFrame({ + 'id': range(1, 5), + 'date': [ + datetime(2024, 1, 1), # ISO format + datetime(2024, 1, 1), # US format + datetime(2024, 1, 1), # Excel format + 45292, # Excel serial date + ] + }) + + writer = pd.ExcelWriter('test_files/date_formats.xlsx', engine='xlsxwriter') + df.to_excel(writer, index=False, sheet_name='Sheet1') + + workbook = writer.book + worksheet = writer.sheets['Sheet1'] + + # Add different date and timestamp formats + iso_format = workbook.add_format({'num_format': 'yyyy-mm-dd'}) + us_format = workbook.add_format({'num_format': 'm/d/yyyy'}) + excel_format = workbook.add_format({'num_format': 'dd-mmm-yyyy'}) + + # New timestamp formats + datetime_24h_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'}) + datetime_12h_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss AM/PM'}) + datetime_ms_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss.000'}) + iso_timestamp_format = workbook.add_format({'num_format': 'yyyy-mm-ddThh:mm:ss'}) + + # Set column width to accommodate timestamps + worksheet.set_column('B:B', 25) + + # Apply formats + worksheet.write('B2', datetime(2024, 1, 1), iso_format) + worksheet.write('B3', datetime(2024, 1, 1), us_format) + worksheet.write('B4', datetime(2024, 1, 1), excel_format) + worksheet.write('B5', 45292, excel_format) + + + writer.close() + + +def generate_special_values_test(): + """Test Case 4: Empty and special values""" + df = pd.DataFrame({ + 'id': range(1, 6), + 'value': [ + np.nan, # NULL + '', # Empty string + '#N/A', # Excel error + '#DIV/0!', # Excel error + '-' # Common placeholder + ] + }) + + writer = pd.ExcelWriter('test_files/special_values.xlsx', engine='xlsxwriter') + df.to_excel(writer, index=False, sheet_name='Sheet1') + writer.close() + +def generate_precision_test(): + """Test Case 5: Number precision""" + df = pd.DataFrame({ + 'id': range(1, 8), + 'value': [ + 1.234567890123456, # High precision decimal + 12345678901234567890, # Large integer (> 15 digits) + -0.00000000123456, # Small decimal + 9.99999e20, # Scientific notation large + -1.23456e-10, # Scientific notation small + 123456789.123456789, # Mixed large number with decimals + 1234567890123456.789 # Edge case for precision + ] + }) + + writer = pd.ExcelWriter('test_files/precision.xlsx', engine='xlsxwriter') + df.to_excel(writer, index=False, sheet_name='Sheet1') + + workbook = writer.book + worksheet = writer.sheets['Sheet1'] + + # Add formats for different number types + precision_format = workbook.add_format({'num_format': '0.000000000000000'}) + scientific_format = workbook.add_format({'num_format': '0.000000E+00'}) + + worksheet.write('B3', 1.234567890123456, precision_format) + worksheet.write('B4', 9.99999e20, scientific_format) + + # Apply formats + worksheet.set_column('B:B', 20) + for row in range(1, 8): + if row in [4, 5]: # Scientific notation + worksheet.write(row, 1, df['value'][row-1], scientific_format) + else: + worksheet.write(row, 1, df['value'][row-1], precision_format) + + writer.close() + +def generate_string_formats_test(): + """Test Case 2: String formats and special characters""" + df = pd.DataFrame({ + 'id': range(1, 9), + 'value': [ + 'Simple text', # Plain text + 'Text with spaces ', # Trailing spaces + ' Text with spaces', # Leading spaces + 'Text with\nnewline', # Newline character + 'Text with "quotes"', # Quoted text + 'Text with special chars: @#$%', # Special characters + 'Very long text ' * 10, # Long text + 'Super long text ' * 100 # Super long text + ] + }) + + writer = pd.ExcelWriter('test_files/string_formats.xlsx', engine='xlsxwriter') + df.to_excel(writer, index=False, sheet_name='Sheet1') + + workbook = writer.book + worksheet = writer.sheets['Sheet1'] + + # Set column width to show long text + worksheet.set_column('B:B', 50) + + writer.close() + +def generate_boolean_formats_test(): + """Test Case: Boolean formats in Excel""" + df = pd.DataFrame({ + 'id': range(1, 5), + 'value': [ + True, # Simple True + False, # Simple False + 'TRUE', # String TRUE + 'FALSE' # String FALSE + ] + }) + + writer = pd.ExcelWriter('test_files/boolean_formats.xlsx', engine='xlsxwriter') + df.to_excel(writer, index=False, sheet_name='Sheet1') + + workbook = writer.book + worksheet = writer.sheets['Sheet1'] + + # Add boolean formats + bool_format = workbook.add_format({'num_format': 'BOOLEAN'}) + custom_true_false = workbook.add_format({'num_format': '"True";;"False"'}) + yes_no_format = workbook.add_format({'num_format': '"YES";;NO'}) + + # Set column width + worksheet.set_column('B:B', 15) + + # Apply different boolean formats + worksheet.write('B2', True, bool_format) # Standard TRUE + worksheet.write('B3', False, bool_format) # Standard FALSE + worksheet.write('B4', True, yes_no_format) # Yes/No format + worksheet.write('B5', False, yes_no_format) # Yes/No format + + writer.close() + + +def main(): + """Geneate all test Excel files""" + create_test_directory() + + print("Generating test Excel files...") + generate_number_formats_test() + generate_date_formats_test() + generate_special_values_test() + generate_precision_test() + generate_string_formats_test() + generate_boolean_formats_test() + print("Test files generated in 'test_files' directory") + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/package.json b/package.json new file mode 100644 index 0000000..6e0aaf0 --- /dev/null +++ b/package.json @@ -0,0 +1,17 @@ +{ + "name": "rework-create-table-deduction-solution", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "test": "echo \"Error: no test specified\" && exit 1" + }, + "keywords": [], + "author": "", + "license": "ISC", + "dependencies": { + "assert": "^2.1.0", + "pg": "^8.13.1", + "xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz" + } +} diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml new file mode 100644 index 0000000..4b456fc --- /dev/null +++ b/pnpm-lock.yaml @@ -0,0 +1,335 @@ +lockfileVersion: '6.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +dependencies: + assert: + specifier: ^2.1.0 + version: 2.1.0 + pg: + specifier: ^8.13.1 + version: 8.13.1 + xlsx: + specifier: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz + version: '@cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz' + +packages: + + /assert@2.1.0: + resolution: {integrity: sha512-eLHpSK/Y4nhMJ07gDaAzoX/XAKS8PSaojml3M0DM4JpV1LAi5JOJ/p6H/XWrl8L+DzVEvVCW1z3vWAaB9oTsQw==} + dependencies: + call-bind: 1.0.7 + is-nan: 1.3.2 + object-is: 1.1.6 + object.assign: 4.1.5 + util: 0.12.5 + dev: false + + /available-typed-arrays@1.0.7: + resolution: {integrity: sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==} + engines: {node: '>= 0.4'} + dependencies: + possible-typed-array-names: 1.0.0 + dev: false + + /call-bind@1.0.7: + resolution: {integrity: sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==} + engines: {node: '>= 0.4'} + dependencies: + es-define-property: 1.0.0 + es-errors: 1.3.0 + function-bind: 1.1.2 + get-intrinsic: 1.2.4 + set-function-length: 1.2.2 + dev: false + + /define-data-property@1.1.4: + resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==} + engines: {node: '>= 0.4'} + dependencies: + es-define-property: 1.0.0 + es-errors: 1.3.0 + gopd: 1.0.1 + dev: false + + /define-properties@1.2.1: + resolution: {integrity: sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==} + engines: {node: '>= 0.4'} + dependencies: + define-data-property: 1.1.4 + has-property-descriptors: 1.0.2 + object-keys: 1.1.1 + dev: false + + /es-define-property@1.0.0: + resolution: {integrity: sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==} + engines: {node: '>= 0.4'} + dependencies: + get-intrinsic: 1.2.4 + dev: false + + /es-errors@1.3.0: + resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==} + engines: {node: '>= 0.4'} + dev: false + + /for-each@0.3.3: + resolution: {integrity: sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw==} + dependencies: + is-callable: 1.2.7 + dev: false + + /function-bind@1.1.2: + resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==} + dev: false + + /get-intrinsic@1.2.4: + resolution: {integrity: sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==} + engines: {node: '>= 0.4'} + dependencies: + es-errors: 1.3.0 + function-bind: 1.1.2 + has-proto: 1.0.3 + has-symbols: 1.0.3 + hasown: 2.0.2 + dev: false + + /gopd@1.0.1: + resolution: {integrity: sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==} + dependencies: + get-intrinsic: 1.2.4 + dev: false + + /has-property-descriptors@1.0.2: + resolution: {integrity: sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==} + dependencies: + es-define-property: 1.0.0 + dev: false + + /has-proto@1.0.3: + resolution: {integrity: sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==} + engines: {node: '>= 0.4'} + dev: false + + /has-symbols@1.0.3: + resolution: {integrity: sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==} + engines: {node: '>= 0.4'} + dev: false + + /has-tostringtag@1.0.2: + resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==} + engines: {node: '>= 0.4'} + dependencies: + has-symbols: 1.0.3 + dev: false + + /hasown@2.0.2: + resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==} + engines: {node: '>= 0.4'} + dependencies: + function-bind: 1.1.2 + dev: false + + /inherits@2.0.4: + resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + dev: false + + /is-arguments@1.1.1: + resolution: {integrity: sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==} + engines: {node: '>= 0.4'} + dependencies: + call-bind: 1.0.7 + has-tostringtag: 1.0.2 + dev: false + + /is-callable@1.2.7: + resolution: {integrity: sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==} + engines: {node: '>= 0.4'} + dev: false + + /is-generator-function@1.0.10: + resolution: {integrity: sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A==} + engines: {node: '>= 0.4'} + dependencies: + has-tostringtag: 1.0.2 + dev: false + + /is-nan@1.3.2: + resolution: {integrity: sha512-E+zBKpQ2t6MEo1VsonYmluk9NxGrbzpeeLC2xIViuO2EjU2xsXsBPwTr3Ykv9l08UYEVEdWeRZNouaZqF6RN0w==} + engines: {node: '>= 0.4'} + dependencies: + call-bind: 1.0.7 + define-properties: 1.2.1 + dev: false + + /is-typed-array@1.1.13: + resolution: {integrity: sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw==} + engines: {node: '>= 0.4'} + dependencies: + which-typed-array: 1.1.15 + dev: false + + /object-is@1.1.6: + resolution: {integrity: sha512-F8cZ+KfGlSGi09lJT7/Nd6KJZ9ygtvYC0/UYYLI9nmQKLMnydpB9yvbv9K1uSkEu7FU9vYPmVwLg328tX+ot3Q==} + engines: {node: '>= 0.4'} + dependencies: + call-bind: 1.0.7 + define-properties: 1.2.1 + dev: false + + /object-keys@1.1.1: + resolution: {integrity: sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==} + engines: {node: '>= 0.4'} + dev: false + + /object.assign@4.1.5: + resolution: {integrity: sha512-byy+U7gp+FVwmyzKPYhW2h5l3crpmGsxl7X2s8y43IgxvG4g3QZ6CffDtsNQy1WsmZpQbO+ybo0AlW7TY6DcBQ==} + engines: {node: '>= 0.4'} + dependencies: + call-bind: 1.0.7 + define-properties: 1.2.1 + has-symbols: 1.0.3 + object-keys: 1.1.1 + dev: false + + /pg-cloudflare@1.1.1: + resolution: {integrity: sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==} + requiresBuild: true + dev: false + optional: true + + /pg-connection-string@2.7.0: + resolution: {integrity: sha512-PI2W9mv53rXJQEOb8xNR8lH7Hr+EKa6oJa38zsK0S/ky2er16ios1wLKhZyxzD7jUReiWokc9WK5nxSnC7W1TA==} + dev: false + + /pg-int8@1.0.1: + resolution: {integrity: sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==} + engines: {node: '>=4.0.0'} + dev: false + + /pg-pool@3.7.0(pg@8.13.1): + resolution: {integrity: sha512-ZOBQForurqh4zZWjrgSwwAtzJ7QiRX0ovFkZr2klsen3Nm0aoh33Ls0fzfv3imeH/nw/O27cjdz5kzYJfeGp/g==} + peerDependencies: + pg: '>=8.0' + dependencies: + pg: 8.13.1 + dev: false + + /pg-protocol@1.7.0: + resolution: {integrity: sha512-hTK/mE36i8fDDhgDFjy6xNOG+LCorxLG3WO17tku+ij6sVHXh1jQUJ8hYAnRhNla4QVD2H8er/FOjc/+EgC6yQ==} + dev: false + + /pg-types@2.2.0: + resolution: {integrity: sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==} + engines: {node: '>=4'} + dependencies: + pg-int8: 1.0.1 + postgres-array: 2.0.0 + postgres-bytea: 1.0.0 + postgres-date: 1.0.7 + postgres-interval: 1.2.0 + dev: false + + /pg@8.13.1: + resolution: {integrity: sha512-OUir1A0rPNZlX//c7ksiu7crsGZTKSOXJPgtNiHGIlC9H0lO+NC6ZDYksSgBYY/thSWhnSRBv8w1lieNNGATNQ==} + engines: {node: '>= 8.0.0'} + peerDependencies: + pg-native: '>=3.0.1' + peerDependenciesMeta: + pg-native: + optional: true + dependencies: + pg-connection-string: 2.7.0 + pg-pool: 3.7.0(pg@8.13.1) + pg-protocol: 1.7.0 + pg-types: 2.2.0 + pgpass: 1.0.5 + optionalDependencies: + pg-cloudflare: 1.1.1 + dev: false + + /pgpass@1.0.5: + resolution: {integrity: sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==} + dependencies: + split2: 4.2.0 + dev: false + + /possible-typed-array-names@1.0.0: + resolution: {integrity: sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==} + engines: {node: '>= 0.4'} + dev: false + + /postgres-array@2.0.0: + resolution: {integrity: sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==} + engines: {node: '>=4'} + dev: false + + /postgres-bytea@1.0.0: + resolution: {integrity: sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==} + engines: {node: '>=0.10.0'} + dev: false + + /postgres-date@1.0.7: + resolution: {integrity: sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==} + engines: {node: '>=0.10.0'} + dev: false + + /postgres-interval@1.2.0: + resolution: {integrity: sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==} + engines: {node: '>=0.10.0'} + dependencies: + xtend: 4.0.2 + dev: false + + /set-function-length@1.2.2: + resolution: {integrity: sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==} + engines: {node: '>= 0.4'} + dependencies: + define-data-property: 1.1.4 + es-errors: 1.3.0 + function-bind: 1.1.2 + get-intrinsic: 1.2.4 + gopd: 1.0.1 + has-property-descriptors: 1.0.2 + dev: false + + /split2@4.2.0: + resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==} + engines: {node: '>= 10.x'} + dev: false + + /util@0.12.5: + resolution: {integrity: sha512-kZf/K6hEIrWHI6XqOFUiiMa+79wE/D8Q+NCNAWclkyg3b4d2k7s0QGepNjiABc+aR3N1PAyHL7p6UcLY6LmrnA==} + dependencies: + inherits: 2.0.4 + is-arguments: 1.1.1 + is-generator-function: 1.0.10 + is-typed-array: 1.1.13 + which-typed-array: 1.1.15 + dev: false + + /which-typed-array@1.1.15: + resolution: {integrity: sha512-oV0jmFtUky6CXfkqehVvBP/LSWJ2sy4vWMioiENyJLePrBO/yKyV9OyJySfAKosh+RYkIl5zJCNZ8/4JncrpdA==} + engines: {node: '>= 0.4'} + dependencies: + available-typed-arrays: 1.0.7 + call-bind: 1.0.7 + for-each: 0.3.3 + gopd: 1.0.1 + has-tostringtag: 1.0.2 + dev: false + + /xtend@4.0.2: + resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==} + engines: {node: '>=0.4'} + dev: false + + '@cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz': + resolution: {tarball: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz} + name: xlsx + version: 0.20.3 + engines: {node: '>=0.8'} + hasBin: true + dev: false diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..0111e7a --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +pandas==2.1.0 +xlsxwriter==3.1.2 +numpy==1.24.3 \ No newline at end of file diff --git a/sql-types.js b/sql-types.js new file mode 100644 index 0000000..7682d76 --- /dev/null +++ b/sql-types.js @@ -0,0 +1,113 @@ +const format = require("pg-format"); +const XLSX = require('xlsx'); + +// Utility func for number handling +const isNumeric = value => !isNaN(String(value).replace(/[,$\s]/g,'')) && !isNaN(parseFloat(String(value).replace(/[,$\s]/g,''))); +const cleanNumericValue = value => typeof value === 'string' ? value.replace(/[,$\s]/g, '') : value; + +// Determines PostgreSQL data type based on column values +function deduceType(values) { + if (!values || values.length === 0) return 'text'; + + const isDateCell = cell => cell && (cell.t === 'd' | (cell.t === 'n')); + const isBooleanCell = cell => cell?.t === 'b'; + const isValidNumber = cell => cell && (cell.t === 'n' || isNumeric(cell.v)); + const needsPrecision = num => { + const str = num.toString(); + return str.includes('e') || + (str.includes('.') && str.split('.')[1].length > 6) || + Math.abs(num) > 1e15; + }; + + // Type detection priority: dates > booleans > numbers > text + if (values.some(isDateCell)) return 'date'; + if (values.some(isBooleanCell) && values.every(cell => !cell || isBooleanCell(cell))) return 'boolean'; + + const numberValues = values + .filter(isValidNumber) + .map(cell => parseFloat(cleanNumericValue(cell.v))); + + if (numberValues.length && values.every(cell => !cell || isValidNumber(cell))) { + return numberValues.some(needsPrecision) ? 'numeric' : 'double precision'; + } + return 'text'; +} + +// Converts Sheetjs cell value to PostgreSQL compatible format +function parseValue(cell, type) { + if (!cell || cell.v == null || cell.v === '') return null; + + switch (type) { + case 'date': + if (cell.t === 'd') return cell.v.toISOString().split('T')[0]; + if (cell.t === 'n') return new Date((cell.v - 25569) * 86400 * 1000).toISOString().split('T')[0]; + return null; + case 'double precision': + if (cell.t === 'n') return cell.v; + if (isNumeric(cell.v)) return parseFloat(cleanNumericValue(cell.v)); + return null; + case 'boolean': + return cell.t === 'b' ? cell.v : null; + default: + return String(cell.v); + } +} + +async function sheet_to_pg_table(client, worksheet, tableName) { + if (!worksheet['!ref']) return; + + const range = XLSX.utils.decode_range(worksheet['!ref']); + + // Extract headers from first row, clean names for PostgreSQL + const headers = []; + for (let col = range.s.c; col <= range.e.c; col++) { + const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col }); + const cell = worksheet[cellAddress]; + const headerValue = cell ? String(cell.v).replace(/[^a-zA-Z0-9_]/g, '_') : `column_${col + 1}`; + headers.push(headerValue.toLowerCase()); + } + + // Group cell values by column for type deduction + const columnValues = headers.map(() => []); + for (let row = range.s.r + 1; row <= range.e.r; row++) { + for (let col = range.s.c; col <= range.e.c; col++) { + const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); + const cell = worksheet[cellAddress]; + columnValues[col].push(cell); + } + } + + // Deduc PostgreSQL type for each column + const types = {}; + headers.forEach((header, idx) => { + types[header] = deduceType(columnValues[idx]); + }); + + await client.query(format('DROP TABLE IF EXISTS %I', tableName)); + + const createTableSQL = format( + 'CREATE TABLE %I (%s)', + tableName, + headers.map(header => format('%I %s', header, types[header])).join(', ') + ); + await client.query(createTableSQL); + + // Insert data row by row + for (let row = range.s.r + 1; row <= range.e.r; row++) { + const values = headers.map((header, col) => { + const cellAddress = XLSX.utils.encode_cell({ r: row, c: col }); + const cell = worksheet[cellAddress]; + return parseValue(cell, types[header]); + }); + + const insertSQL = format( + 'INSERT INTO %I (%s) VALUES (%s)', + tableName, + headers.map(h => format('%I', h)).join(', '), + values.map(() => '%L').join(', ') + ); + await client.query(format(insertSQL, ...values)); + } +} + +module.exports = { sheet_to_pg_table }; \ No newline at end of file diff --git a/test.js b/test.js new file mode 100644 index 0000000..f79106e --- /dev/null +++ b/test.js @@ -0,0 +1,70 @@ +const XLSX = require('xlsx'); +const { Client } = require('pg'); +const { sheet_to_pg_table } = require('./sql-types'); +const path = require('path'); + +async function readExcelAndTest(filename, tableName) { + console.log(`\nTesting ${filename}...`); + + // Read Excel file + const workbook = XLSX.readFile(path.join('test_files', filename), { dense: true } ); + const sheetName = workbook.SheetNames[0]; + const worksheet = workbook.Sheets[sheetName]; + + // Convert to array of objects + const data = XLSX.utils.sheet_to_json(worksheet); // keep number formatting + console.log('Parsed Excel data:', data); + + // Connect to PostgreSQL + const client = new Client({ + host: 'localhost', + database: 'SheetJSPG', + user: 'postgres', + password: '7509' + }); + + try { + await client.connect(); + + // Import data + await sheet_to_pg_table(client, workbook, tableName); + + // Verify table structure + const structure = await client.query(` + SELECT column_name, data_type + FROM information_schema.columns + WHERE table_name = $1 + ORDER BY ordinal_position; + `, [tableName]); + console.log('\nTable structure:', structure.rows); + + // Verify data + const results = await client.query(`SELECT * FROM ${tableName}`); + console.log('\nImported data from DB:', results.rows); + + } catch (error) { + console.error(`Error testing ${filename}:`, error); + throw error; + } finally { + await client.end(); + } +} + +async function runAllTests() { + try { + // Test each Excel file + await readExcelAndTest('number_formats.xlsx', 'test_number_formats'); + await readExcelAndTest('date_formats.xlsx', 'test_dates'); + await readExcelAndTest('special_values.xlsx', 'test_special_values'); + await readExcelAndTest('precision.xlsx', 'test_precision'); + await readExcelAndTest('string_formats.xlsx', 'test_string_formats'); + await readExcelAndTest('boolean_formats.xlsx', 'test_boolean_formats'); + + console.log('\nAll tests completed successfully'); + } catch (error) { + console.error('\nTests failed:', error); + process.exit(1); + } +} + +runAllTests().catch(console.error); \ No newline at end of file diff --git a/test_files/boolean_formats.xlsx b/test_files/boolean_formats.xlsx new file mode 100644 index 0000000..148aa09 Binary files /dev/null and b/test_files/boolean_formats.xlsx differ diff --git a/test_files/date_formats.xlsx b/test_files/date_formats.xlsx new file mode 100644 index 0000000..a84a23a Binary files /dev/null and b/test_files/date_formats.xlsx differ diff --git a/test_files/number_formats.xlsx b/test_files/number_formats.xlsx new file mode 100644 index 0000000..7b9ece4 Binary files /dev/null and b/test_files/number_formats.xlsx differ diff --git a/test_files/precision.xlsx b/test_files/precision.xlsx new file mode 100644 index 0000000..1d900b4 Binary files /dev/null and b/test_files/precision.xlsx differ diff --git a/test_files/special_values.xlsx b/test_files/special_values.xlsx new file mode 100644 index 0000000..7076c8a Binary files /dev/null and b/test_files/special_values.xlsx differ diff --git a/test_files/string_formats.xlsx b/test_files/string_formats.xlsx new file mode 100644 index 0000000..f3e51d8 Binary files /dev/null and b/test_files/string_formats.xlsx differ