From 2d6c821261b3639c4f1aaa120844642e8f6f094e Mon Sep 17 00:00:00 2001 From: SheetJS Date: Wed, 2 Oct 2024 01:04:56 -0400 Subject: [PATCH] Parse DIF-esque CSV (fixes #3230 h/t @lowkeyfish) --- CHANGELOG.md | 1 + bits/40_harb.js | 14 ++++++++++++++ bits/87_read.js | 2 +- 3 files changed, 16 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 350b601..633c350 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ changes may not be included if they are not expected to break existing code. * Sheet Visibility for ODS / FODS (h/t @edemaine) * HTML DOM ingress support formulae (`data-f`) * Proper handling of XLSX encoded entities (h/t @inreoh) +* Proper handling of invalid DIF sheets that match heuristics (h/t @lowkeyfish) ## v0.20.3 diff --git a/bits/40_harb.js b/bits/40_harb.js index 4ce011b..9be637a 100644 --- a/bits/40_harb.js +++ b/bits/40_harb.js @@ -1138,3 +1138,17 @@ function read_wb_ID(d, opts) { } } +function read_wb_TABL(d, opts) { + var o = opts || {}, OLD_WTF = !!o.WTF; o.WTF = true; + try { + var out = DIF.to_workbook(d, o); + if(!out || !out.Sheets) throw "DIF bad workbook"; + var ws = out.Sheets[out.SheetNames[0]]; + if(!ws || !ws["!ref"]) throw "DIF empty worksheet"; + o.WTF = OLD_WTF; + return out; + } catch(e) { + o.WTF = OLD_WTF; + return PRN.to_workbook(d, opts); + } +} diff --git a/bits/87_read.js b/bits/87_read.js index 4933c13..fe0e8eb 100644 --- a/bits/87_read.js +++ b/bits/87_read.js @@ -99,7 +99,7 @@ function readSync(data/*:RawData*/, opts/*:?ParseOpts*/)/*:Workbook*/ { if(n[1] === 0x49 && n[2] === 0x2a && n[3] === 0x00) throw new Error("TIFF Image File is not a spreadsheet"); if(n[1] === 0x44) return read_wb_ID(d, o); break; - case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return DIF.to_workbook(d, o); break; + case 0x54: if(n[1] === 0x41 && n[2] === 0x42 && n[3] === 0x4C) return read_wb_TABL(d, o); break; case 0x50: return (n[1] === 0x4B && n[2] < 0x09 && n[3] < 0x09) ? read_zip(d, o) : read_prn(data, d, o, str); case 0xEF: return n[3] === 0x3C ? parse_xlml(d, o) : read_prn(data, d, o, str); case 0xFF: