chore: migrate

This commit is contained in:
Asad 2024-12-03 10:17:00 -05:00
parent 13dbb7d44e
commit 427135e75f
18 changed files with 466 additions and 45 deletions

BIN
favicon.ico Executable file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

@ -4,6 +4,15 @@
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>SheetSense Analyzer</title>
<!-- Google tag (gtag.js) -->
<script async src="https://www.googletagmanager.com/gtag/js?id=G-1TJSQFTRJF"></script>
<script>
window.dataLayer = window.dataLayer || [];
function gtag(){dataLayer.push(arguments);}
gtag('js', new Date());
gtag('config', 'G-1TJSQFTRJF');
</script>
<script src="https://cdn.sheetjs.com/xlsx-0.20.3/package/dist/xlsx.full.min.js"></script>
<script src="https://unpkg.com/sheetsense/dist/sheetsense.browser.js"></script>
<style>
@ -14,14 +23,49 @@
padding: 20px;
background: #f5f5f5;
}
.upload-container {
.upload-container, .demo-container {
background: white;
padding: 2rem;
padding: 0.50rem 2rem;
border-radius: 8px;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
text-align: center;
margin-bottom: 2rem;
}
.demo-grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(300px, 1fr));
gap: 1rem;
margin-top: 1rem;
}
.demo-card {
border: 1px solid #e5e7eb;
border-radius: 8px;
padding: 1rem;
background: #fff;
}
.demo-card h3 {
margin-top: 0;
color: #1f2937;
border-bottom: 2px solid #e5e7eb;
padding-bottom: 0.5rem;
}
.demo-button {
display: block;
width: 100%;
padding: 0.5rem 1rem;
text-align: left;
background: none;
border: none;
border-bottom: 1px solid #e5e7eb;
cursor: pointer;
color: #4b5563;
font-size: 0.875rem;
}
.demo-button:last-child {
border-bottom: none;
}
.demo-button:hover {
background: #f9fafb;
}
#drop-zone {
border: 2px dashed #ccc;
border-radius: 4px;
@ -44,50 +88,70 @@
padding: 1rem;
border-radius: 4px;
}
.error {
background: #fff5f5;
border-left: 4px solid #dc2626;
}
.warning {
background: #fffbeb;
border-left: 4px solid #d97706;
}
.info {
background: #f0f9ff;
border-left: 4px solid #0284c7;
}
.metadata {
background: #f8fafc;
padding: 1rem;
border-radius: 4px;
margin: 1rem 0;
}
.hidden {
display: none;
}
.footer {
.error { background: #fff5f5; border-left: 4px solid #dc2626; }
.warning { background: #fffbeb; border-left: 4px solid #d97706; }
.info { background: #f0f9ff; border-left: 4px solid #0284c7; }
.hidden { display: none; }
.loading {
text-align: center;
margin-top: 2rem;
padding: 1rem;
color: #666;
}
.footer a {
color: #0066cc;
text-decoration: none;
}
.footer a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
<div class="upload-container">
<h1>SheetSense Analyzer </h1>
<h2>SheetSense Analyzer</h2>
<p>Excel workbook validation engine that detects quality issues, ensures consistency, and makes your spreadsheets bulletproof.</p>
<div id="drop-zone">
<p>Drop your Excel file here or click to choose</p>
<p>Drop your Excel (xls or xlsx) file here or click to choose</p>
<input type="file" id="file-input" accept=".xlsx,.xls" class="hidden">
</div>
</div>
<div class="demo-container">
<h2>Test Files</h2>
<div class="test-files-list">
<div class="test-files-category">
<div class="category-header">Circular Reference Tests</div>
<button class="demo-button" data-file="test_files/circular_reference/direct_circular.xlsx">
Direct Circular Reference Test
</button>
<button class="demo-button" data-file="test_files/circular_reference/indirect_circular.xlsx">
Indirect Circular Reference Test
</button>
<button class="demo-button" data-file="test_files/circular_reference/valid_cross_reference.xlsx">
Valid Cross Reference Test
</button>
</div>
<div class="test-files-category">
<div class="category-header">Data Quality Tests</div>
<button class="demo-button" data-file="test_files/data_quality/data_quality_issues.xlsx">
Data Quality Issues Test
</button>
</div>
<div class="test-files-category">
<div class="category-header">Formula Errors</div>
<button class="demo-button" data-file="test_files/formula_errors/formula_errors.xlsx">
Formula Errors Test
</button>
</div>
<div class="test-files-category">
<div class="category-header">Hidden Content</div>
<button class="demo-button" data-file="test_files/hidden_cell/hidden_cells.xlsx">
Hidden Cell
</button>
<button class="demo-button" data-file="test_files/hidden_cell/hidden_rows_columns.xlsx">
Hidden rows and columns
</button>
<button class="demo-button" data-file="test_files/hidden_cell/hidden_cells.xlsx">
Consecutive hidden rows
</button>
</div>
</div>
</div>
<div id="results" class="results hidden">
<h2>Analysis Results</h2>
<div class="metadata">
@ -100,13 +164,15 @@
</div>
</div>
<div class="footer">
<div class="footer">
<p>
<a href="https://ezy.ovh/bmicX" target="_blank">View on GitHub</a> |
Created by <a href="https://ezy.ovh/oEaFv" target="_blank">@asadbek064</a>
<a href="https://ezy.ovh/bmicX" target="_blank">View on GitHub</a> |
Created by <a href="https://ezy.ovh/oEaFv" target="_blank">@asadbek064</a>
<a href="https://mylinx.cc/asad" target="_blank">mylinx.cc/asad</a>
</p>
</div>
<script>
const dropZone = document.getElementById('drop-zone');
const fileInput = document.getElementById('file-input');
@ -114,7 +180,62 @@
const metadataContent = document.getElementById('metadata-content');
const issuesContent = document.getElementById('issues-content');
// Drag and drop handlers
// Demo button handlers
document.querySelectorAll('.demo-button').forEach(button => {
button.addEventListener('click', async () => {
const filePath = button.dataset.file;
results.classList.add('hidden');
// Show loading state
const loadingDiv = document.createElement('div');
loadingDiv.className = 'loading';
loadingDiv.innerHTML = `Loading ${filePath}...`;
button.parentNode.appendChild(loadingDiv);
try {
// Use relative path from the root of your Cloudflare Pages deployment
const response = await fetch(`/${filePath}`, {
headers: {
'Accept': 'application/octet-stream'
}
});
if (!response.ok) {
throw new Error(`Failed to load file: ${filePath} (${response.status} ${response.statusText})`);
}
const arrayBuffer = await response.arrayBuffer();
const data = new Uint8Array(arrayBuffer);
const workbook = XLSX.read(data, {
type: 'array',
cellFormula: true,
cellNF: true,
cellText: true,
cellStyles: true,
cellDates: true,
raw: true
});
const analyzer = new SheetSense.ExcelAnalyzer(workbook);
const analysis = analyzer.analyze();
displayResults(analysis);
} catch (error) {
console.error('Error loading test file:', error);
issuesContent.innerHTML = `
<div class="issue error">
<strong>Error loading test file</strong>
<p>${error.message}</p>
</div>
`;
results.classList.remove('hidden');
} finally {
loadingDiv.remove();
}
});
});
// Rest of the JavaScript code remains the same
dropZone.addEventListener('dragover', (e) => {
e.preventDefault();
dropZone.classList.add('dragover');
@ -131,7 +252,6 @@
handleFile(file);
});
// Click to upload
dropZone.addEventListener('click', () => {
fileInput.click();
});
@ -145,9 +265,16 @@
const reader = new FileReader();
reader.onload = (e) => {
const data = new Uint8Array(e.target.result);
const workbook = XLSX.read(data, { type: 'array' });
const workbook = XLSX.read(data, {
type: 'buffer',
cellFormula: true,
cellNF: true,
cellText: true,
cellStyles: true,
cellDates: true,
raw: true
});
// Use the actual SheetSense package
const analyzer = new SheetSense.ExcelAnalyzer(workbook);
const analysis = analyzer.analyze();
displayResults(analysis);
@ -158,7 +285,6 @@
function displayResults(analysis) {
results.classList.remove('hidden');
// Display metadata
metadataContent.innerHTML = `
<p>Sheets: ${analysis.metadata.sheetCount}</p>
<p>Formulas: ${analysis.metadata.formulaCount}</p>
@ -167,7 +293,6 @@
<p>External References: ${analysis.metadata.externalReferences}</p>
`;
// Display issues
if (analysis.issues.length === 0) {
issuesContent.innerHTML = '<p>No issues found!</p>';
} else {
@ -183,4 +308,4 @@
}
</script>
</body>
</html>
</html>

Binary file not shown.

@ -0,0 +1,77 @@
import openpyxl
from openpyxl import Workbook
def create_direct_circular_reference():
wb = Workbook()
ws = wb.active
ws.title = "Sheet1"
# Create direct circular reference
cell = ws['A1']
cell.value = '=A1+1' # This sets the formula
cell.data_type = 'f' # Explicitly set as formula type
# Save with keep_vba=True to preserve formulas
wb.save('direct_circular.xlsx')
print("Created direct_circular.xlsx")
def create_indirect_circular_reference():
wb = Workbook()
ws = wb.active
ws.title = "Sheet1"
# Create indirect circular reference chain
ws['A1'].value = '=B1+1'
ws['A1'].data_type = 'f'
ws['B1'].value = '=C1+1'
ws['B1'].data_type = 'f'
ws['C1'].value = '=A1+1'
ws['C1'].data_type = 'f'
wb.save('indirect_circular.xlsx')
print("Created indirect_circular.xlsx")
def create_valid_cross_reference():
wb = Workbook()
ws = wb.active
ws.title = "Sheet1"
# Create valid cross-reference
ws['A1'].value = 1 # Numeric value
ws['A1'].data_type = 'n'
ws['B1'].value = '=A1*2' # Formula referencing A1
ws['B1'].data_type = 'f'
wb.save('valid_cross_reference.xlsx')
print("Created valid_cross_reference.xlsx")
def verify_formulas():
"""Verify the formulas were saved correctly"""
for filename in ['direct_circular.xlsx', 'indirect_circular.xlsx', 'valid_cross_reference.xlsx']:
wb = openpyxl.load_workbook(f'{filename}', data_only=False)
ws = wb.active
print(f"\nVerifying {filename}:")
for row in ws.iter_rows():
for cell in row:
if cell.value and str(cell.value).startswith('='):
print(f"Cell {cell.coordinate}: Formula = {cell.value}")
else:
print(f"Cell {cell.coordinate}: Value = {cell.value}")
def main():
print("Generating Excel test files...")
try:
create_direct_circular_reference()
create_indirect_circular_reference()
create_valid_cross_reference()
print("\nAll files generated successfully!")
print("\nVerifying formula storage:")
verify_formulas()
except Exception as e:
print(f"Error generating files: {str(e)}")
if __name__ == "__main__":
main()

Binary file not shown.

Binary file not shown.

@ -0,0 +1,15 @@
import pandas as pd
import numpy as np
# Data quality issues dataset
data = {
'Product_ID': ['A1', 'B2', 123, 'D4', None, 'F6', 'G7', np.nan, 'I9', 'J10'],
'Stock_Count': [100, '200', 'invalid', 400, -999999, 600, '7OO', '800 ', None, 1000],
'Price': [10.999, '20.00', 30, '40.0000', 50.5, '60', None, '80.0', 90.99999, '100.'],
'Last_Updated': [None, '2024-13-01', '01/01/2024', '2024.01.01', 'yesterday',
'01-01-2024 ', '2024/01/01', '1st Jan 2024', pd.Timestamp.now(), '2024-01-01']
}
df = pd.DataFrame(data)
df.to_excel('data_quality_issues.xlsx', index=False)

Binary file not shown.

@ -0,0 +1,29 @@
import pandas as pd
data = {
'Employee': [' John Doe', 'Jane Smith ', ' Bob Jones', 'Alice Brown ', 'Tom Wilson'],
'Salary': ['$50,000', '60000', '$70,000.00', '80000.0', '90,000'],
'Hire_Date': ['01-Jan-2024', '2024/01/02', '03.01.2024', 'Jan 4, 2024', '2024-01-05'],
'Department': ['IT ', ' HR', 'Finance', ' Marketing ', 'Sales'],
'Performance': ['95.00%', '87.5', '92', '88.75%', '90.0%']
}
df = pd.DataFrame(data)
writer = pd.ExcelWriter('formatting_issues.xlsx', engine='xlsxwriter')
df.to_excel(writer, index=False)
workbook = writer.book
worksheet = writer.sheets['Sheet1']
# Add inconsistent formatting
formats = [
workbook.add_format({'num_format': '#,##0'}),
workbook.add_format({'num_format': '0.00'}),
workbook.add_format({'num_format': '$#,##0.00'}),
workbook.add_format({'num_format': '0%'})
]
for col, fmt in enumerate(formats):
worksheet.set_column(col, col, 15, fmt)
writer.close()

Binary file not shown.

@ -0,0 +1,18 @@
import pandas as pd
import openpyxl
wb = openpyxl.Workbook()
ws = wb.active
formulas = [
['=A1/0', '=B1*C1', '=UNKNOWNFUNC()', '=D1+E1'],
['=VLOOKUP("x",A1:A2,3,FALSE)', '=SUM(#REF!)', '=VALUE("abc")', '=NULL'],
['=1/0', '=NA()', '=NAME?', '=VALUE!'],
['=IF(A1="",,)', '=INDIRECT("invalid")', '=1+"text"', '=SUM()']
]
for row_idx, row in enumerate(formulas, start=1):
for col_idx, formula in enumerate(row, start=1):
ws.cell(row=row_idx, column=col_idx, value=formula)
wb.save('formula_errors.xlsx')

95
test_files/gen_test.py Normal file

@ -0,0 +1,95 @@
import pandas as pd
import numpy as np
from datetime import datetime, timedelta
# Create test data
data = {
'ID': [1, 2, 3, '4', 5, 6, 7, 8, 9, 10],
'Date': [
'2024-01-01',
'01/02/2024',
'Invalid Date',
'2024-01-04',
'05-01-2024',
'2024/01/06',
datetime.now(),
'2024-01-08 ', # Extra space
'2024-01-09',
None
],
'Amount': [
1000.50,
'1,200.75',
'abc',
-999999999, # Outlier
1500.0000, # Extra decimals
1600.50,
None,
' 1800.25 ', # Leading/trailing spaces
1900.50,
2000
],
'Category': [
'Sales',
'Sales ', # Trailing space
'SALES', # Inconsistent case
'Marketing',
'Marketing', # Duplicate
None,
'Finance',
'HR',
'HR',
'' # Empty string
],
'Calculation': [
'=A2/0', # #DIV/0!
'=VLOOKUP("missing",A1:B1,2)', # #N/A
'=INVALID_FUNCTION()', # #NAME?
'=A1', # Valid
None,
'=REF!', # #REF!
'=VALUE("abc")', # #VALUE!
'=1+1',
'=SUM()', # Empty sum
'=NULL' # #NULL!
],
'Percentage': [
'50%',
'0.75', # Inconsistent format
'80.00%',
'.90', # Missing leading zero
'100%',
None,
'120%', # Outlier
'60.5%',
'70%',
'65' # Missing % symbol
]
}
df = pd.DataFrame(data)
# Add duplicate column
df['Category_2'] = df['Category']
# Create Excel writer
writer = pd.ExcelWriter('test_data.xlsx', engine='xlsxwriter')
# Write visible sheet
df.to_excel(writer, sheet_name='Main', index=False)
# Write hidden sheet with duplicates
df.to_excel(writer, sheet_name='Hidden', index=False)
# Get workbook and worksheet
workbook = writer.book
worksheet = writer.sheets['Main']
# Add some formatting inconsistencies
format1 = workbook.add_format({'num_format': '#,##0.00'})
format2 = workbook.add_format({'num_format': '0.0'})
worksheet.set_column('C:C', 15, format1)
worksheet.write('C4', 1234.56, format2)
writer.close()

Binary file not shown.

@ -0,0 +1,62 @@
import openpyxl
from openpyxl import Workbook
from openpyxl.styles import Protection
from openpyxl.worksheet.dimensions import DimensionHolder, ColumnDimension, RowDimension
def create_hidden_cells_test():
wb = Workbook()
ws = wb.active
ws['A1'] = 'Visible'
ws['B1'] = 'Hidden'
ws.protection.sheet = True
ws['B1'].protection = Protection(hidden=True)
return wb
def create_hidden_rows_columns_test():
wb = Workbook()
ws = wb.active
ws['A1'] = 'Content'
ws['B2'] = 'More'
ws['C3'] = 'Data'
# Handle column dimensions
col_dims = DimensionHolder(worksheet=ws)
col_dims['A'] = ColumnDimension(ws, index='A', hidden=True)
col_dims['C'] = ColumnDimension(ws, index='C', hidden=True)
ws.column_dimensions = col_dims
# Handle row dimensions separately
for row in [1, 3]:
ws.row_dimensions[row] = RowDimension(ws, index=row, hidden=True)
return wb
def create_consecutive_hidden_rows_test():
wb = Workbook()
ws = wb.active
for i in range(1, 6):
ws[f'A{i}'] = f'Row {i}'
for row in [1, 2, 4, 5]:
ws.row_dimensions[row] = RowDimension(ws, index=row, hidden=True)
return wb
def main():
test_files = {
'hidden_cells.xlsx': create_hidden_cells_test(),
'hidden_rows_columns.xlsx': create_hidden_rows_columns_test(),
'consecutive_hidden_rows.xlsx': create_consecutive_hidden_rows_test()
}
for filename, wb in test_files.items():
wb.save(filename)
print(f"Created {filename}")
if __name__ == "__main__":
main()

Binary file not shown.

Binary file not shown.

BIN
test_files/test_data.xlsx Normal file

Binary file not shown.