This commit is contained in:
Asad 2024-11-20 14:41:09 -05:00
commit e8fe065822
13 changed files with 826 additions and 0 deletions

69
README.md Normal file

@ -0,0 +1,69 @@
# Sheetjs to PostgreSQL Creating a Table Demo
A Node.js utility that intelligently converts Sheetjs `worksheet` to PostgreSQL tables while preserving appropriate data types.
> This demo project serves as a refernce implementation for SheetJS + PostgreSQL integration. For more details, vist the [SheetJS Documentation](https://docs.sheetjs.com/docs/demos/data/postgresql/#creating-a-table).
### Features
* Automatic data type detection from Excel columns
* Support various data formats:
* Numbers (integer and floating-point)
* Dates
* Booleans
* Text
* Handles special number formats (scientific notations, high precision)
* Clean column names for PostgreSQL compatibility
### Prerequisites
* Node.js
* PostgreSQL (16)
* Python 3.x
### Installation
1. Install Python dependencies:
```bash
pip install -r requirements.txt
```
2. Install Node.js dependencies:
```
npm install i
```
### Setup
1. Generate test_files:
```bash
python3 gen_test_files.py
```
2. Configure PostgreSQL connection in `test.js`
```javascript
const client = new Client({
host: 'localhost',
database: 'SheetJSPG',
user: 'postgres',
password: '7509'
});
```
### Run
```bash
node test.js
```
### Test Files
The test suite includes various Excel files testing different data scenarios:
* `number_formats.xlsx`: Various numeric formats
* `date_formats.xlsx`: Date handling
* `special_values.xlsx`: Edge cases
* `precision.xlsx`: High-precision numbers
* `string_formats.xlsx`: Text handling
* `boolean_formats.xlsx`: Boolean values
### Type Mapping
* Excel dates → PostgreSQL `date`
* Booleans → PostgreSQL `boolean`
* High-precision numbers → PostgreSQL `numeric`
* Standard numbers → PostgreSQL `double precision`
* Text/other → PostgreSQL `text`

219
gen_test_files.py Normal file

@ -0,0 +1,219 @@
import pandas as pd
from datetime import datetime
import numpy as np
import os
def create_test_directory():
"""Create a directory for test files if it doesn't exist"""
if not os.path.exists('test_files'):
os.makedirs('test_files')
def generate_number_formats_test():
"""Test Case 1: Common spreadsheet number formats"""
df = pd.DataFrame({
'id': range(1, 7),
'value': [
1234.56, # Plain number
'1,234.56', # Thousands separator
1234.5600, # Fixed decimal places
0.1234, # Will be formatted as percentage
-1234.56, # Will be formatted as parentheses
-1230 # Will be formatted as scientific
]
})
# Create Excel writer with xlsxwriter engine
writer = pd.ExcelWriter('test_files/number_formats.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
# Get workbook and worksheet objects
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Add formats
percent_format = workbook.add_format({'num_format': '0.00%'})
accounting_format = workbook.add_format({'num_format': '_(* #,##0.00_);_(* (#,##0.00);_(* "-"??_);_(@_)'})
scientific_format = workbook.add_format({'num_format': '0.00E+00'})
# Apply formats to specific cells
worksheet.set_column('B:B', 15) # Set column width
worksheet.write('B5', 0.1234, percent_format) # Percentage
worksheet.write('B6', -1234.56, accounting_format) # Parentheses
worksheet.write('B7', -1230, scientific_format) # Scientific
writer.close()
def generate_date_formats_test():
"""Test Case 3: Date and timestamp formats"""
df = pd.DataFrame({
'id': range(1, 5),
'date': [
datetime(2024, 1, 1), # ISO format
datetime(2024, 1, 1), # US format
datetime(2024, 1, 1), # Excel format
45292, # Excel serial date
]
})
writer = pd.ExcelWriter('test_files/date_formats.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Add different date and timestamp formats
iso_format = workbook.add_format({'num_format': 'yyyy-mm-dd'})
us_format = workbook.add_format({'num_format': 'm/d/yyyy'})
excel_format = workbook.add_format({'num_format': 'dd-mmm-yyyy'})
# New timestamp formats
datetime_24h_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss'})
datetime_12h_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss AM/PM'})
datetime_ms_format = workbook.add_format({'num_format': 'yyyy-mm-dd hh:mm:ss.000'})
iso_timestamp_format = workbook.add_format({'num_format': 'yyyy-mm-ddThh:mm:ss'})
# Set column width to accommodate timestamps
worksheet.set_column('B:B', 25)
# Apply formats
worksheet.write('B2', datetime(2024, 1, 1), iso_format)
worksheet.write('B3', datetime(2024, 1, 1), us_format)
worksheet.write('B4', datetime(2024, 1, 1), excel_format)
worksheet.write('B5', 45292, excel_format)
writer.close()
def generate_special_values_test():
"""Test Case 4: Empty and special values"""
df = pd.DataFrame({
'id': range(1, 6),
'value': [
np.nan, # NULL
'', # Empty string
'#N/A', # Excel error
'#DIV/0!', # Excel error
'-' # Common placeholder
]
})
writer = pd.ExcelWriter('test_files/special_values.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
writer.close()
def generate_precision_test():
"""Test Case 5: Number precision"""
df = pd.DataFrame({
'id': range(1, 8),
'value': [
1.234567890123456, # High precision decimal
12345678901234567890, # Large integer (> 15 digits)
-0.00000000123456, # Small decimal
9.99999e20, # Scientific notation large
-1.23456e-10, # Scientific notation small
123456789.123456789, # Mixed large number with decimals
1234567890123456.789 # Edge case for precision
]
})
writer = pd.ExcelWriter('test_files/precision.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Add formats for different number types
precision_format = workbook.add_format({'num_format': '0.000000000000000'})
scientific_format = workbook.add_format({'num_format': '0.000000E+00'})
worksheet.write('B3', 1.234567890123456, precision_format)
worksheet.write('B4', 9.99999e20, scientific_format)
# Apply formats
worksheet.set_column('B:B', 20)
for row in range(1, 8):
if row in [4, 5]: # Scientific notation
worksheet.write(row, 1, df['value'][row-1], scientific_format)
else:
worksheet.write(row, 1, df['value'][row-1], precision_format)
writer.close()
def generate_string_formats_test():
"""Test Case 2: String formats and special characters"""
df = pd.DataFrame({
'id': range(1, 9),
'value': [
'Simple text', # Plain text
'Text with spaces ', # Trailing spaces
' Text with spaces', # Leading spaces
'Text with\nnewline', # Newline character
'Text with "quotes"', # Quoted text
'Text with special chars: @#$%', # Special characters
'Very long text ' * 10, # Long text
'Super long text ' * 100 # Super long text
]
})
writer = pd.ExcelWriter('test_files/string_formats.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Set column width to show long text
worksheet.set_column('B:B', 50)
writer.close()
def generate_boolean_formats_test():
"""Test Case: Boolean formats in Excel"""
df = pd.DataFrame({
'id': range(1, 5),
'value': [
True, # Simple True
False, # Simple False
'TRUE', # String TRUE
'FALSE' # String FALSE
]
})
writer = pd.ExcelWriter('test_files/boolean_formats.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False, sheet_name='Sheet1')
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Add boolean formats
bool_format = workbook.add_format({'num_format': 'BOOLEAN'})
custom_true_false = workbook.add_format({'num_format': '"True";;"False"'})
yes_no_format = workbook.add_format({'num_format': '"YES";;NO'})
# Set column width
worksheet.set_column('B:B', 15)
# Apply different boolean formats
worksheet.write('B2', True, bool_format) # Standard TRUE
worksheet.write('B3', False, bool_format) # Standard FALSE
worksheet.write('B4', True, yes_no_format) # Yes/No format
worksheet.write('B5', False, yes_no_format) # Yes/No format
writer.close()
def main():
"""Geneate all test Excel files"""
create_test_directory()
print("Generating test Excel files...")
generate_number_formats_test()
generate_date_formats_test()
generate_special_values_test()
generate_precision_test()
generate_string_formats_test()
generate_boolean_formats_test()
print("Test files generated in 'test_files' directory")
if __name__ == "__main__":
main()

17
package.json Normal file

@ -0,0 +1,17 @@
{
"name": "rework-create-table-deduction-solution",
"version": "1.0.0",
"description": "",
"main": "index.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"keywords": [],
"author": "",
"license": "ISC",
"dependencies": {
"assert": "^2.1.0",
"pg": "^8.13.1",
"xlsx": "https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz"
}
}

335
pnpm-lock.yaml Normal file

@ -0,0 +1,335 @@
lockfileVersion: '6.0'
settings:
autoInstallPeers: true
excludeLinksFromLockfile: false
dependencies:
assert:
specifier: ^2.1.0
version: 2.1.0
pg:
specifier: ^8.13.1
version: 8.13.1
xlsx:
specifier: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz
version: '@cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz'
packages:
/assert@2.1.0:
resolution: {integrity: sha512-eLHpSK/Y4nhMJ07gDaAzoX/XAKS8PSaojml3M0DM4JpV1LAi5JOJ/p6H/XWrl8L+DzVEvVCW1z3vWAaB9oTsQw==}
dependencies:
call-bind: 1.0.7
is-nan: 1.3.2
object-is: 1.1.6
object.assign: 4.1.5
util: 0.12.5
dev: false
/available-typed-arrays@1.0.7:
resolution: {integrity: sha512-wvUjBtSGN7+7SjNpq/9M2Tg350UZD3q62IFZLbRAR1bSMlCo1ZaeW+BJ+D090e4hIIZLBcTDWe4Mh4jvUDajzQ==}
engines: {node: '>= 0.4'}
dependencies:
possible-typed-array-names: 1.0.0
dev: false
/call-bind@1.0.7:
resolution: {integrity: sha512-GHTSNSYICQ7scH7sZ+M2rFopRoLh8t2bLSW6BbgrtLsahOIB5iyAVJf9GjWK3cYTDaMj4XdBpM1cA6pIS0Kv2w==}
engines: {node: '>= 0.4'}
dependencies:
es-define-property: 1.0.0
es-errors: 1.3.0
function-bind: 1.1.2
get-intrinsic: 1.2.4
set-function-length: 1.2.2
dev: false
/define-data-property@1.1.4:
resolution: {integrity: sha512-rBMvIzlpA8v6E+SJZoo++HAYqsLrkg7MSfIinMPFhmkorw7X+dOXVJQs+QT69zGkzMyfDnIMN2Wid1+NbL3T+A==}
engines: {node: '>= 0.4'}
dependencies:
es-define-property: 1.0.0
es-errors: 1.3.0
gopd: 1.0.1
dev: false
/define-properties@1.2.1:
resolution: {integrity: sha512-8QmQKqEASLd5nx0U1B1okLElbUuuttJ/AnYmRXbbbGDWh6uS208EjD4Xqq/I9wK7u0v6O08XhTWnt5XtEbR6Dg==}
engines: {node: '>= 0.4'}
dependencies:
define-data-property: 1.1.4
has-property-descriptors: 1.0.2
object-keys: 1.1.1
dev: false
/es-define-property@1.0.0:
resolution: {integrity: sha512-jxayLKShrEqqzJ0eumQbVhTYQM27CfT1T35+gCgDFoL82JLsXqTJ76zv6A0YLOgEnLUMvLzsDsGIrl8NFpT2gQ==}
engines: {node: '>= 0.4'}
dependencies:
get-intrinsic: 1.2.4
dev: false
/es-errors@1.3.0:
resolution: {integrity: sha512-Zf5H2Kxt2xjTvbJvP2ZWLEICxA6j+hAmMzIlypy4xcBg1vKVnx89Wy0GbS+kf5cwCVFFzdCFh2XSCFNULS6csw==}
engines: {node: '>= 0.4'}
dev: false
/for-each@0.3.3:
resolution: {integrity: sha512-jqYfLp7mo9vIyQf8ykW2v7A+2N4QjeCeI5+Dz9XraiO1ign81wjiH7Fb9vSOWvQfNtmSa4H2RoQTrrXivdUZmw==}
dependencies:
is-callable: 1.2.7
dev: false
/function-bind@1.1.2:
resolution: {integrity: sha512-7XHNxH7qX9xG5mIwxkhumTox/MIRNcOgDrxWsMt2pAr23WHp6MrRlN7FBSFpCpr+oVO0F744iUgR82nJMfG2SA==}
dev: false
/get-intrinsic@1.2.4:
resolution: {integrity: sha512-5uYhsJH8VJBTv7oslg4BznJYhDoRI6waYCxMmCdnTrcCrHA/fCFKoTFz2JKKE0HdDFUF7/oQuhzumXJK7paBRQ==}
engines: {node: '>= 0.4'}
dependencies:
es-errors: 1.3.0
function-bind: 1.1.2
has-proto: 1.0.3
has-symbols: 1.0.3
hasown: 2.0.2
dev: false
/gopd@1.0.1:
resolution: {integrity: sha512-d65bNlIadxvpb/A2abVdlqKqV563juRnZ1Wtk6s1sIR8uNsXR70xqIzVqxVf1eTqDunwT2MkczEeaezCKTZhwA==}
dependencies:
get-intrinsic: 1.2.4
dev: false
/has-property-descriptors@1.0.2:
resolution: {integrity: sha512-55JNKuIW+vq4Ke1BjOTjM2YctQIvCT7GFzHwmfZPGo5wnrgkid0YQtnAleFSqumZm4az3n2BS+erby5ipJdgrg==}
dependencies:
es-define-property: 1.0.0
dev: false
/has-proto@1.0.3:
resolution: {integrity: sha512-SJ1amZAJUiZS+PhsVLf5tGydlaVB8EdFpaSO4gmiUKUOxk8qzn5AIy4ZeJUmh22znIdk/uMAUT2pl3FxzVUH+Q==}
engines: {node: '>= 0.4'}
dev: false
/has-symbols@1.0.3:
resolution: {integrity: sha512-l3LCuF6MgDNwTDKkdYGEihYjt5pRPbEg46rtlmnSPlUbgmB8LOIrKJbYYFBSbnPaJexMKtiPO8hmeRjRz2Td+A==}
engines: {node: '>= 0.4'}
dev: false
/has-tostringtag@1.0.2:
resolution: {integrity: sha512-NqADB8VjPFLM2V0VvHUewwwsw0ZWBaIdgo+ieHtK3hasLz4qeCRjYcqfB6AQrBggRKppKF8L52/VqdVsO47Dlw==}
engines: {node: '>= 0.4'}
dependencies:
has-symbols: 1.0.3
dev: false
/hasown@2.0.2:
resolution: {integrity: sha512-0hJU9SCPvmMzIBdZFqNPXWa6dqh7WdH0cII9y+CyS8rG3nL48Bclra9HmKhVVUHyPWNH5Y7xDwAB7bfgSjkUMQ==}
engines: {node: '>= 0.4'}
dependencies:
function-bind: 1.1.2
dev: false
/inherits@2.0.4:
resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==}
dev: false
/is-arguments@1.1.1:
resolution: {integrity: sha512-8Q7EARjzEnKpt/PCD7e1cgUS0a6X8u5tdSiMqXhojOdoV9TsMsiO+9VLC5vAmO8N7/GmXn7yjR8qnA6bVAEzfA==}
engines: {node: '>= 0.4'}
dependencies:
call-bind: 1.0.7
has-tostringtag: 1.0.2
dev: false
/is-callable@1.2.7:
resolution: {integrity: sha512-1BC0BVFhS/p0qtw6enp8e+8OD0UrK0oFLztSjNzhcKA3WDuJxxAPXzPuPtKkjEY9UUoEWlX/8fgKeu2S8i9JTA==}
engines: {node: '>= 0.4'}
dev: false
/is-generator-function@1.0.10:
resolution: {integrity: sha512-jsEjy9l3yiXEQ+PsXdmBwEPcOxaXWLspKdplFUVI9vq1iZgIekeC0L167qeu86czQaxed3q/Uzuw0swL0irL8A==}
engines: {node: '>= 0.4'}
dependencies:
has-tostringtag: 1.0.2
dev: false
/is-nan@1.3.2:
resolution: {integrity: sha512-E+zBKpQ2t6MEo1VsonYmluk9NxGrbzpeeLC2xIViuO2EjU2xsXsBPwTr3Ykv9l08UYEVEdWeRZNouaZqF6RN0w==}
engines: {node: '>= 0.4'}
dependencies:
call-bind: 1.0.7
define-properties: 1.2.1
dev: false
/is-typed-array@1.1.13:
resolution: {integrity: sha512-uZ25/bUAlUY5fR4OKT4rZQEBrzQWYV9ZJYGGsUmEJ6thodVJ1HX64ePQ6Z0qPWP+m+Uq6e9UugrE38jeYsDSMw==}
engines: {node: '>= 0.4'}
dependencies:
which-typed-array: 1.1.15
dev: false
/object-is@1.1.6:
resolution: {integrity: sha512-F8cZ+KfGlSGi09lJT7/Nd6KJZ9ygtvYC0/UYYLI9nmQKLMnydpB9yvbv9K1uSkEu7FU9vYPmVwLg328tX+ot3Q==}
engines: {node: '>= 0.4'}
dependencies:
call-bind: 1.0.7
define-properties: 1.2.1
dev: false
/object-keys@1.1.1:
resolution: {integrity: sha512-NuAESUOUMrlIXOfHKzD6bpPu3tYt3xvjNdRIQ+FeT0lNb4K8WR70CaDxhuNguS2XG+GjkyMwOzsN5ZktImfhLA==}
engines: {node: '>= 0.4'}
dev: false
/object.assign@4.1.5:
resolution: {integrity: sha512-byy+U7gp+FVwmyzKPYhW2h5l3crpmGsxl7X2s8y43IgxvG4g3QZ6CffDtsNQy1WsmZpQbO+ybo0AlW7TY6DcBQ==}
engines: {node: '>= 0.4'}
dependencies:
call-bind: 1.0.7
define-properties: 1.2.1
has-symbols: 1.0.3
object-keys: 1.1.1
dev: false
/pg-cloudflare@1.1.1:
resolution: {integrity: sha512-xWPagP/4B6BgFO+EKz3JONXv3YDgvkbVrGw2mTo3D6tVDQRh1e7cqVGvyR3BE+eQgAvx1XhW/iEASj4/jCWl3Q==}
requiresBuild: true
dev: false
optional: true
/pg-connection-string@2.7.0:
resolution: {integrity: sha512-PI2W9mv53rXJQEOb8xNR8lH7Hr+EKa6oJa38zsK0S/ky2er16ios1wLKhZyxzD7jUReiWokc9WK5nxSnC7W1TA==}
dev: false
/pg-int8@1.0.1:
resolution: {integrity: sha512-WCtabS6t3c8SkpDBUlb1kjOs7l66xsGdKpIPZsg4wR+B3+u9UAum2odSsF9tnvxg80h4ZxLWMy4pRjOsFIqQpw==}
engines: {node: '>=4.0.0'}
dev: false
/pg-pool@3.7.0(pg@8.13.1):
resolution: {integrity: sha512-ZOBQForurqh4zZWjrgSwwAtzJ7QiRX0ovFkZr2klsen3Nm0aoh33Ls0fzfv3imeH/nw/O27cjdz5kzYJfeGp/g==}
peerDependencies:
pg: '>=8.0'
dependencies:
pg: 8.13.1
dev: false
/pg-protocol@1.7.0:
resolution: {integrity: sha512-hTK/mE36i8fDDhgDFjy6xNOG+LCorxLG3WO17tku+ij6sVHXh1jQUJ8hYAnRhNla4QVD2H8er/FOjc/+EgC6yQ==}
dev: false
/pg-types@2.2.0:
resolution: {integrity: sha512-qTAAlrEsl8s4OiEQY69wDvcMIdQN6wdz5ojQiOy6YRMuynxenON0O5oCpJI6lshc6scgAY8qvJ2On/p+CXY0GA==}
engines: {node: '>=4'}
dependencies:
pg-int8: 1.0.1
postgres-array: 2.0.0
postgres-bytea: 1.0.0
postgres-date: 1.0.7
postgres-interval: 1.2.0
dev: false
/pg@8.13.1:
resolution: {integrity: sha512-OUir1A0rPNZlX//c7ksiu7crsGZTKSOXJPgtNiHGIlC9H0lO+NC6ZDYksSgBYY/thSWhnSRBv8w1lieNNGATNQ==}
engines: {node: '>= 8.0.0'}
peerDependencies:
pg-native: '>=3.0.1'
peerDependenciesMeta:
pg-native:
optional: true
dependencies:
pg-connection-string: 2.7.0
pg-pool: 3.7.0(pg@8.13.1)
pg-protocol: 1.7.0
pg-types: 2.2.0
pgpass: 1.0.5
optionalDependencies:
pg-cloudflare: 1.1.1
dev: false
/pgpass@1.0.5:
resolution: {integrity: sha512-FdW9r/jQZhSeohs1Z3sI1yxFQNFvMcnmfuj4WBMUTxOrAyLMaTcE1aAMBiTlbMNaXvBCQuVi0R7hd8udDSP7ug==}
dependencies:
split2: 4.2.0
dev: false
/possible-typed-array-names@1.0.0:
resolution: {integrity: sha512-d7Uw+eZoloe0EHDIYoe+bQ5WXnGMOpmiZFTuMWCwpjzzkL2nTjcKiAk4hh8TjnGye2TwWOk3UXucZ+3rbmBa8Q==}
engines: {node: '>= 0.4'}
dev: false
/postgres-array@2.0.0:
resolution: {integrity: sha512-VpZrUqU5A69eQyW2c5CA1jtLecCsN2U/bD6VilrFDWq5+5UIEVO7nazS3TEcHf1zuPYO/sqGvUvW62g86RXZuA==}
engines: {node: '>=4'}
dev: false
/postgres-bytea@1.0.0:
resolution: {integrity: sha512-xy3pmLuQqRBZBXDULy7KbaitYqLcmxigw14Q5sj8QBVLqEwXfeybIKVWiqAXTlcvdvb0+xkOtDbfQMOf4lST1w==}
engines: {node: '>=0.10.0'}
dev: false
/postgres-date@1.0.7:
resolution: {integrity: sha512-suDmjLVQg78nMK2UZ454hAG+OAW+HQPZ6n++TNDUX+L0+uUlLywnoxJKDou51Zm+zTCjrCl0Nq6J9C5hP9vK/Q==}
engines: {node: '>=0.10.0'}
dev: false
/postgres-interval@1.2.0:
resolution: {integrity: sha512-9ZhXKM/rw350N1ovuWHbGxnGh/SNJ4cnxHiM0rxE4VN41wsg8P8zWn9hv/buK00RP4WvlOyr/RBDiptyxVbkZQ==}
engines: {node: '>=0.10.0'}
dependencies:
xtend: 4.0.2
dev: false
/set-function-length@1.2.2:
resolution: {integrity: sha512-pgRc4hJ4/sNjWCSS9AmnS40x3bNMDTknHgL5UaMBTMyJnU90EgWh1Rz+MC9eFu4BuN/UwZjKQuY/1v3rM7HMfg==}
engines: {node: '>= 0.4'}
dependencies:
define-data-property: 1.1.4
es-errors: 1.3.0
function-bind: 1.1.2
get-intrinsic: 1.2.4
gopd: 1.0.1
has-property-descriptors: 1.0.2
dev: false
/split2@4.2.0:
resolution: {integrity: sha512-UcjcJOWknrNkF6PLX83qcHM6KHgVKNkV62Y8a5uYDVv9ydGQVwAHMKqHdJje1VTWpljG0WYpCDhrCdAOYH4TWg==}
engines: {node: '>= 10.x'}
dev: false
/util@0.12.5:
resolution: {integrity: sha512-kZf/K6hEIrWHI6XqOFUiiMa+79wE/D8Q+NCNAWclkyg3b4d2k7s0QGepNjiABc+aR3N1PAyHL7p6UcLY6LmrnA==}
dependencies:
inherits: 2.0.4
is-arguments: 1.1.1
is-generator-function: 1.0.10
is-typed-array: 1.1.13
which-typed-array: 1.1.15
dev: false
/which-typed-array@1.1.15:
resolution: {integrity: sha512-oV0jmFtUky6CXfkqehVvBP/LSWJ2sy4vWMioiENyJLePrBO/yKyV9OyJySfAKosh+RYkIl5zJCNZ8/4JncrpdA==}
engines: {node: '>= 0.4'}
dependencies:
available-typed-arrays: 1.0.7
call-bind: 1.0.7
for-each: 0.3.3
gopd: 1.0.1
has-tostringtag: 1.0.2
dev: false
/xtend@4.0.2:
resolution: {integrity: sha512-LKYU1iAXJXUgAXn9URjiu+MWhyUXHsvfp7mcuYm9dSUKK0/CjtrUwFAxD82/mCWbtLsGjFIad0wIsod4zrTAEQ==}
engines: {node: '>=0.4'}
dev: false
'@cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz':
resolution: {tarball: https://cdn.sheetjs.com/xlsx-0.20.3/xlsx-0.20.3.tgz}
name: xlsx
version: 0.20.3
engines: {node: '>=0.8'}
hasBin: true
dev: false

3
requirements.txt Normal file

@ -0,0 +1,3 @@
pandas==2.1.0
xlsxwriter==3.1.2
numpy==1.24.3

113
sql-types.js Normal file

@ -0,0 +1,113 @@
const format = require("pg-format");
const XLSX = require('xlsx');
// Utility func for number handling
const isNumeric = value => !isNaN(String(value).replace(/[,$\s]/g,'')) && !isNaN(parseFloat(String(value).replace(/[,$\s]/g,'')));
const cleanNumericValue = value => typeof value === 'string' ? value.replace(/[,$\s]/g, '') : value;
// Determines PostgreSQL data type based on column values
function deduceType(values) {
if (!values || values.length === 0) return 'text';
const isDateCell = cell => cell && (cell.t === 'd' | (cell.t === 'n'));
const isBooleanCell = cell => cell?.t === 'b';
const isValidNumber = cell => cell && (cell.t === 'n' || isNumeric(cell.v));
const needsPrecision = num => {
const str = num.toString();
return str.includes('e') ||
(str.includes('.') && str.split('.')[1].length > 6) ||
Math.abs(num) > 1e15;
};
// Type detection priority: dates > booleans > numbers > text
if (values.some(isDateCell)) return 'date';
if (values.some(isBooleanCell) && values.every(cell => !cell || isBooleanCell(cell))) return 'boolean';
const numberValues = values
.filter(isValidNumber)
.map(cell => parseFloat(cleanNumericValue(cell.v)));
if (numberValues.length && values.every(cell => !cell || isValidNumber(cell))) {
return numberValues.some(needsPrecision) ? 'numeric' : 'double precision';
}
return 'text';
}
// Converts Sheetjs cell value to PostgreSQL compatible format
function parseValue(cell, type) {
if (!cell || cell.v == null || cell.v === '') return null;
switch (type) {
case 'date':
if (cell.t === 'd') return cell.v.toISOString().split('T')[0];
if (cell.t === 'n') return new Date((cell.v - 25569) * 86400 * 1000).toISOString().split('T')[0];
return null;
case 'double precision':
if (cell.t === 'n') return cell.v;
if (isNumeric(cell.v)) return parseFloat(cleanNumericValue(cell.v));
return null;
case 'boolean':
return cell.t === 'b' ? cell.v : null;
default:
return String(cell.v);
}
}
async function sheet_to_pg_table(client, worksheet, tableName) {
if (!worksheet['!ref']) return;
const range = XLSX.utils.decode_range(worksheet['!ref']);
// Extract headers from first row, clean names for PostgreSQL
const headers = [];
for (let col = range.s.c; col <= range.e.c; col++) {
const cellAddress = XLSX.utils.encode_cell({ r: range.s.r, c: col });
const cell = worksheet[cellAddress];
const headerValue = cell ? String(cell.v).replace(/[^a-zA-Z0-9_]/g, '_') : `column_${col + 1}`;
headers.push(headerValue.toLowerCase());
}
// Group cell values by column for type deduction
const columnValues = headers.map(() => []);
for (let row = range.s.r + 1; row <= range.e.r; row++) {
for (let col = range.s.c; col <= range.e.c; col++) {
const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });
const cell = worksheet[cellAddress];
columnValues[col].push(cell);
}
}
// Deduc PostgreSQL type for each column
const types = {};
headers.forEach((header, idx) => {
types[header] = deduceType(columnValues[idx]);
});
await client.query(format('DROP TABLE IF EXISTS %I', tableName));
const createTableSQL = format(
'CREATE TABLE %I (%s)',
tableName,
headers.map(header => format('%I %s', header, types[header])).join(', ')
);
await client.query(createTableSQL);
// Insert data row by row
for (let row = range.s.r + 1; row <= range.e.r; row++) {
const values = headers.map((header, col) => {
const cellAddress = XLSX.utils.encode_cell({ r: row, c: col });
const cell = worksheet[cellAddress];
return parseValue(cell, types[header]);
});
const insertSQL = format(
'INSERT INTO %I (%s) VALUES (%s)',
tableName,
headers.map(h => format('%I', h)).join(', '),
values.map(() => '%L').join(', ')
);
await client.query(format(insertSQL, ...values));
}
}
module.exports = { sheet_to_pg_table };

70
test.js Normal file

@ -0,0 +1,70 @@
const XLSX = require('xlsx');
const { Client } = require('pg');
const { sheet_to_pg_table } = require('./sql-types');
const path = require('path');
async function readExcelAndTest(filename, tableName) {
console.log(`\nTesting ${filename}...`);
// Read Excel file
const workbook = XLSX.readFile(path.join('test_files', filename), { dense: true } );
const sheetName = workbook.SheetNames[0];
const worksheet = workbook.Sheets[sheetName];
// Convert to array of objects
const data = XLSX.utils.sheet_to_json(worksheet); // keep number formatting
console.log('Parsed Excel data:', data);
// Connect to PostgreSQL
const client = new Client({
host: 'localhost',
database: 'SheetJSPG',
user: 'postgres',
password: '7509'
});
try {
await client.connect();
// Import data
await sheet_to_pg_table(client, workbook, tableName);
// Verify table structure
const structure = await client.query(`
SELECT column_name, data_type
FROM information_schema.columns
WHERE table_name = $1
ORDER BY ordinal_position;
`, [tableName]);
console.log('\nTable structure:', structure.rows);
// Verify data
const results = await client.query(`SELECT * FROM ${tableName}`);
console.log('\nImported data from DB:', results.rows);
} catch (error) {
console.error(`Error testing ${filename}:`, error);
throw error;
} finally {
await client.end();
}
}
async function runAllTests() {
try {
// Test each Excel file
await readExcelAndTest('number_formats.xlsx', 'test_number_formats');
await readExcelAndTest('date_formats.xlsx', 'test_dates');
await readExcelAndTest('special_values.xlsx', 'test_special_values');
await readExcelAndTest('precision.xlsx', 'test_precision');
await readExcelAndTest('string_formats.xlsx', 'test_string_formats');
await readExcelAndTest('boolean_formats.xlsx', 'test_boolean_formats');
console.log('\nAll tests completed successfully');
} catch (error) {
console.error('\nTests failed:', error);
process.exit(1);
}
}
runAllTests().catch(console.error);

Binary file not shown.

Binary file not shown.

Binary file not shown.

BIN
test_files/precision.xlsx Normal file

Binary file not shown.

Binary file not shown.

Binary file not shown.