95 lines
2.1 KiB
Python
95 lines
2.1 KiB
Python
|
import pandas as pd
|
||
|
import numpy as np
|
||
|
from datetime import datetime, timedelta
|
||
|
|
||
|
# Create test data
|
||
|
data = {
|
||
|
'ID': [1, 2, 3, '4', 5, 6, 7, 8, 9, 10],
|
||
|
'Date': [
|
||
|
'2024-01-01',
|
||
|
'01/02/2024',
|
||
|
'Invalid Date',
|
||
|
'2024-01-04',
|
||
|
'05-01-2024',
|
||
|
'2024/01/06',
|
||
|
datetime.now(),
|
||
|
'2024-01-08 ', # Extra space
|
||
|
'2024-01-09',
|
||
|
None
|
||
|
],
|
||
|
'Amount': [
|
||
|
1000.50,
|
||
|
'1,200.75',
|
||
|
'abc',
|
||
|
-999999999, # Outlier
|
||
|
1500.0000, # Extra decimals
|
||
|
1600.50,
|
||
|
None,
|
||
|
' 1800.25 ', # Leading/trailing spaces
|
||
|
1900.50,
|
||
|
2000
|
||
|
],
|
||
|
'Category': [
|
||
|
'Sales',
|
||
|
'Sales ', # Trailing space
|
||
|
'SALES', # Inconsistent case
|
||
|
'Marketing',
|
||
|
'Marketing', # Duplicate
|
||
|
None,
|
||
|
'Finance',
|
||
|
'HR',
|
||
|
'HR',
|
||
|
'' # Empty string
|
||
|
],
|
||
|
'Calculation': [
|
||
|
'=A2/0', # #DIV/0!
|
||
|
'=VLOOKUP("missing",A1:B1,2)', # #N/A
|
||
|
'=INVALID_FUNCTION()', # #NAME?
|
||
|
'=A1', # Valid
|
||
|
None,
|
||
|
'=REF!', # #REF!
|
||
|
'=VALUE("abc")', # #VALUE!
|
||
|
'=1+1',
|
||
|
'=SUM()', # Empty sum
|
||
|
'=NULL' # #NULL!
|
||
|
],
|
||
|
'Percentage': [
|
||
|
'50%',
|
||
|
'0.75', # Inconsistent format
|
||
|
'80.00%',
|
||
|
'.90', # Missing leading zero
|
||
|
'100%',
|
||
|
None,
|
||
|
'120%', # Outlier
|
||
|
'60.5%',
|
||
|
'70%',
|
||
|
'65' # Missing % symbol
|
||
|
]
|
||
|
}
|
||
|
|
||
|
df = pd.DataFrame(data)
|
||
|
|
||
|
# Add duplicate column
|
||
|
df['Category_2'] = df['Category']
|
||
|
|
||
|
# Create Excel writer
|
||
|
writer = pd.ExcelWriter('test_data.xlsx', engine='xlsxwriter')
|
||
|
|
||
|
# Write visible sheet
|
||
|
df.to_excel(writer, sheet_name='Main', index=False)
|
||
|
|
||
|
# Write hidden sheet with duplicates
|
||
|
df.to_excel(writer, sheet_name='Hidden', index=False)
|
||
|
|
||
|
# Get workbook and worksheet
|
||
|
workbook = writer.book
|
||
|
worksheet = writer.sheets['Main']
|
||
|
|
||
|
# Add some formatting inconsistencies
|
||
|
format1 = workbook.add_format({'num_format': '#,##0.00'})
|
||
|
format2 = workbook.add_format({'num_format': '0.0'})
|
||
|
|
||
|
worksheet.set_column('C:C', 15, format1)
|
||
|
worksheet.write('C4', 1234.56, format2)
|
||
|
|
||
|
writer.close()
|