XLML Streaming Write

- CSV Export only quote leading ID (fixes #2959)
This commit is contained in:
SheetJS 2024-05-31 03:16:53 -04:00
parent 947a5178bd
commit 5ef49e2b96
10 changed files with 184 additions and 19 deletions

@ -243,6 +243,7 @@ function SSF_large_exp(v/*:number*/)/*:string*/ {
}
function SSF_general_num(v/*:number*/)/*:string*/ {
if(!isFinite(v)) return isNaN(v) ? "#VALUE!" : "#DIV/0!";
var V = Math.floor(Math.log(Math.abs(v))*Math.LOG10E), o;
if(V >= -4 && V <= -1) o = v.toPrecision(10+V);

@ -123,8 +123,8 @@ function parse_TypedPropertyValue(blob, type/*:number*/, _opts)/*:any*/ {
case 0x03 /*VT_I4*/: ret = blob.read_shift(4, 'i'); return ret;
case 0x0B /*VT_BOOL*/: return blob.read_shift(4) !== 0x0;
case 0x13 /*VT_UI4*/: ret = blob.read_shift(4); return ret;
case 0x1E /*VT_LPSTR*/: return parse_lpstr(blob, t, 4).replace(chr0,'');
case 0x1F /*VT_LPWSTR*/: return parse_lpwstr(blob);
case 0x1E /*VT_LPSTR*/: blob.l += 4; val = parse_VtString(blob, blob[blob.l-4]).replace(/(^|[^\u0000])\u0000+$/,"$1"); break;
case 0x1F /*VT_LPWSTR*/: blob.l += 4; val = parse_VtString(blob, blob[blob.l-4]).replace(/(^|[^\u0000])\u0000+$/,"$1"); break;
case 0x40 /*VT_FILETIME*/: return parse_FILETIME(blob);
case 0x41 /*VT_BLOB*/: return parse_BLOB(blob);
case 0x47 /*VT_CF*/: return parse_ClipboardData(blob);

@ -1186,9 +1186,12 @@ function write_ws_xlml_table(ws/*:Worksheet*/, opts, idx/*:number*/, wb/*:Workbo
o.push(writextag("Column",null,k));
});
var dense = ws["!data"] != null;
var addr = {r:0,c:0};
for(var R = range.s.r; R <= range.e.r; ++R) {
var row = [write_ws_xlml_row(R, (ws['!rows']||[])[R])];
addr.r = R;
for(var C = range.s.c; C <= range.e.c; ++C) {
addr.c = C;
var skip = false;
for(mi = 0; mi != marr.length; ++mi) {
if(marr[mi].s.c > C) continue;
@ -1199,7 +1202,6 @@ function write_ws_xlml_table(ws/*:Worksheet*/, opts, idx/*:number*/, wb/*:Workbo
break;
}
if(skip) continue;
var addr = {r:R,c:C};
var ref = encode_col(C) + encode_row(R), cell = dense ? (ws["!data"][R]||[])[C] : ws[ref];
row.push(write_ws_xlml_cell(cell, ref, ws, opts, idx, wb, addr));
}
@ -1223,7 +1225,7 @@ function write_ws_xlml(idx/*:number*/, opts, wb/*:Workbook*/)/*:string*/ {
/* WorksheetOptions */
o.push(write_ws_xlml_wsopts(ws, opts, idx, wb));
if(ws["!autofilter"]) o.push('<AutoFilter x:Range="' + a1_to_rc(fix_range(ws["!autofilter"].ref), {r:0,c:0}) + '" xmlns="urn:schemas-microsoft-com:office:excel"></AutoFilter>');
if(ws && ws["!autofilter"]) o.push('<AutoFilter x:Range="' + a1_to_rc(fix_range(ws["!autofilter"].ref), {r:0,c:0}) + '" xmlns="urn:schemas-microsoft-com:office:excel"></AutoFilter>');
return o.join("");
}
@ -1242,11 +1244,10 @@ function write_xlml(wb, opts)/*:string*/ {
d.push(write_props_xlml(wb, opts));
d.push(write_wb_xlml(wb, opts));
d.push("");
d.push("");
d.push(write_names_xlml(wb, opts));
for(var i = 0; i < wb.SheetNames.length; ++i)
d.push(writextag("Worksheet", write_ws_xlml(i, opts, wb), {"ss:Name":escapexml(wb.SheetNames[i])}));
d[2] = write_sty_xlml(wb, opts);
d[3] = write_names_xlml(wb, opts);
return XML_HEADER + writextag("Workbook", d.join(""), {
'xmlns': XLMLNS.ss,
'xmlns:o': XLMLNS.o,

@ -105,7 +105,7 @@ function write_ws_biff2(ba/*:BufArray*/, ws/*:Worksheet*/, idx/*:number*/, opts,
if(range.e.c > 0xFF || range.e.r > 0x3FFF) {
if(opts.WTF) throw new Error("Range " + (ws['!ref'] || "A1") + " exceeds format limit A1:IV16384");
range.e.c = Math.min(range.e.c, 0xFF);
range.e.r = Math.min(range.e.c, 0x3FFF);
range.e.r = Math.min(range.e.r, 0x3FFF);
}
var date1904 = (((wb||{}).Workbook||{}).WBProps||{}).date1904;
var row = [], comments = [];
@ -515,9 +515,9 @@ function write_ws_biff8(idx/*:number*/, opts, wb/*:Workbook*/) {
var range = safe_decode_range(ws['!ref'] || "A1");
var MAX_ROWS = b8 ? 65536 : 16384;
if(range.e.c > 0xFF || range.e.r >= MAX_ROWS) {
if(opts.WTF) throw new Error("Range " + (ws['!ref'] || "A1") + " exceeds format limit A1:IV16384");
if(opts.WTF) throw new Error("Range " + (ws['!ref'] || "A1") + " exceeds format limit A1:IV" + MAX_ROWS);
range.e.c = Math.min(range.e.c, 0xFF);
range.e.r = Math.min(range.e.c, MAX_ROWS-1);
range.e.r = Math.min(range.e.r, MAX_ROWS-1);
}
write_biff_rec(ba, 0x0809, write_BOF(wb, 0x10, opts));
@ -552,12 +552,11 @@ function write_ws_biff8(idx/*:number*/, opts, wb/*:Workbook*/) {
if(dense) row = ws["!data"][R] || [];
rr = encode_row(R);
for(C = range.s.c; C <= range.e.c; ++C) {
ref = cols[C] + rr;
var cell = dense ? row[C] : ws[ref];
var cell = dense ? row[C] : ws[cols[C] + rr];
if(!cell) continue;
/* write cell */
write_ws_biff8_cell(ba, cell, R, C, opts, date1904);
if(b8 && cell.l) ws['!links'].push([ref, cell.l]);
if(b8 && cell.l) ws['!links'].push([cols[C] + rr, cell.l]);
if(cell.c) comments.push([cell.c, R, C]);
}
}
@ -701,6 +700,9 @@ function write_biff_buf(wb/*:Workbook*/, opts/*:WriteOpts*/) {
if(range.e.c > 255) { // note: 255 is IV
if(typeof console != "undefined" && console.error) console.error("Worksheet '" + wb.SheetNames[i] + "' extends beyond column IV (255). Data may be lost.");
}
if(range.e.r > 65535) {
if(typeof console != "undefined" && console.error) console.error("Worksheet '" + wb.SheetNames[i] + "' extends beyond row 65536. Data may be lost.");
}
}
var o = opts || {};

@ -104,7 +104,7 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/) {
}
var qreg = /"/g;
function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array<string>*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ {
function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array<string>*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, w/*:number*/, o/*:Sheet2CSVOpts*/)/*:?string*/ {
var isempty = true;
var row/*:Array<string>*/ = [], txt = "", rr = encode_row(R);
var dense = sheet["!data"] != null;
@ -117,7 +117,7 @@ function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Arr
isempty = false;
txt = ''+(o.rawNumbers && val.t == "n" ? val.v : format_cell(val, null, o));
for(var i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34 || o.forceQuotes) {txt = "\"" + txt.replace(qreg, '""') + "\""; break; }
if(txt == "ID") txt = '"ID"';
if(txt == "ID" && w == 0 && row.length == 0) txt = '"ID"';
} else if(val.f != null && !val.F) {
isempty = false;
txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"';
@ -144,7 +144,7 @@ function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/)/*:string*/ {
var w = 0;
for(var R = r.s.r; R <= r.e.r; ++R) {
if ((rowinfo[R]||{}).hidden) continue;
row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o);
row = make_csv_row(sheet, r, R, cols, fs, rs, FS, w, o);
if(row == null) { continue; }
if(row || (o.blankrows !== false)) out.push((w++ ? RS : "") + row);
}

@ -19,7 +19,7 @@ function write_csv_stream(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
while(R <= r.e.r) {
++R;
if ((rowinfo[R-1]||{}).hidden) continue;
row = make_csv_row(sheet, r, R-1, cols, fs, rs, FS, o);
row = make_csv_row(sheet, r, R-1, cols, fs, rs, FS, w, o);
if(row != null) {
if(row || (o.blankrows !== false)) return stream.push((w++ ? RS : "") + row);
}
@ -116,9 +116,165 @@ function write_json_stream(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
return stream;
}
function write_xlml_stream(wb/*:Workbook*/, o/*:?Sheet2XLMLOpts*/) {
var stream = _Readable();
var opts = o == null ? {} : o;
if(!wb.SSF) wb.SSF = dup(table_fmt);
if(wb.SSF) {
make_ssf(); SSF_load_table(wb.SSF);
// $FlowIgnore
opts.revssf = evert_num(wb.SSF); opts.revssf[wb.SSF[65535]] = 0;
opts.ssf = wb.SSF;
opts.cellXfs = [];
get_cell_style(opts.cellXfs, {}, {revssf:{"General":0}});
}
/* do one pass to determine styles since they must be added before tables */
wb.SheetNames.forEach(function(n) {
var ws = wb.Sheets[n];
if(!ws || !ws["!ref"]) return;
var range = decode_range(ws["!ref"]);
var dense = ws["!data"] != null;
var ddata = dense ? ws["!data"] : [];
var addr = {r:0,c:0};
for(var R = range.s.r; R <= range.e.r; ++R) {
addr.r = R;
if(dense && !ddata[R]) continue;
for(var C = range.s.c; C <= range.e.c; ++C) {
addr.c = C;
var cell = dense ? ddata[R][C] : ws[encode_col(C) + encode_row(R)];
if(!cell) continue;
if(cell.t == "d" && cell.z == null) { cell = dup(cell); cell.z = table_fmt[14]; }
void get_cell_style(opts.cellXfs, cell, opts);
}
}
});
var sty = write_sty_xlml(wb, opts);
var stage = 0, wsidx = 0, ws = wb.Sheets[wb.SheetNames[wsidx]], range = safe_decode_range(ws), R = -1, T = false;
var marr = [], mi = 0, dense = false, darr = [], addr = {r:0,c:0};
stream._read = function() { switch(stage) {
/* header */
case 0: {
stage = 1;
stream.push(XML_HEADER);
stream.push("<Workbook" + wxt_helper({
'xmlns': XLMLNS.ss,
'xmlns:o': XLMLNS.o,
'xmlns:x': XLMLNS.x,
'xmlns:ss': XLMLNS.ss,
'xmlns:dt': XLMLNS.dt,
'xmlns:html': XLMLNS.html
}) + ">");
} break;
/* preamble */
case 1: {
stage = 2;
stream.push(write_props_xlml(wb, opts));
stream.push(write_wb_xlml(wb, opts));
} break;
/* style and name tables */
case 2: {
stage = 3;
stream.push(sty);
stream.push(write_names_xlml(wb, opts));
} break;
/* worksheet preamble */
case 3: {
T = false;
if(wsidx >= wb.SheetNames.length) { stage = -1; stream.push(""); break; }
stream.push("<Worksheet" + wxt_helper({ "ss:Name": escapexml(wb.SheetNames[wsidx])}) + ">");
ws = wb.Sheets[wb.SheetNames[wsidx]];
if(!ws) { stream.push("</Worksheet>"); return void ++wsidx; }
var names = write_ws_xlml_names(ws, opts, wsidx, wb);
if(names.length) stream.push("<Names>" + names + "</Names>");
if(!ws["!ref"]) return (stage = 5);
range = safe_decode_range(ws["!ref"]);
R = range.s.r;
stage = 4;
} break;
/* worksheet intramble */
case 4: {
if(R < 0 || R > range.e.r) { stream.push(T ? "</Table>" : ""); return void (stage = 5); }
if(R <= range.s.r) {
if(ws['!cols']) ws['!cols'].forEach(function(n, i) {
process_col(n);
var w = !!n.width;
var p = col_obj_w(i, n);
var k/*:any*/ = {"ss:Index":i+1};
if(w) k['ss:Width'] = width2px(p.width);
if(n.hidden) k['ss:Hidden']="1";
if(!T) { T = true; stream.push("<Table>"); }
stream.push(writextag("Column",null,k));
});
dense = ws["!data"] != null;
if(dense) darr = ws["!data"];
addr.r = addr.c = 0;
}
/* process 10 rows per invocation */
for(var cnt = 0; R <= range.e.r && cnt < 10; ++R, ++cnt) {
var row = [write_ws_xlml_row(R, (ws['!rows']||[])[R])];
addr.r = R;
if(!(dense && !darr[R])) for(var C = range.s.c; C <= range.e.c; ++C) {
addr.c = C;
var skip = false;
for(mi = 0; mi != marr.length; ++mi) {
if(marr[mi].s.c > C) continue;
if(marr[mi].s.r > R) continue;
if(marr[mi].e.c < C) continue;
if(marr[mi].e.r < R) continue;
if(marr[mi].s.c != C || marr[mi].s.r != R) skip = true;
break;
}
if(skip) continue;
var ref = encode_col(C) + encode_row(R), cell = dense ? darr[R][C] : ws[ref];
row.push(write_ws_xlml_cell(cell, ref, ws, opts, wsidx, wb, addr));
}
row.push("</Row>");
if(row.length > 2) {
if(!T) { T = true; stream.push("<Table>"); }
stream.push(row.join(""));
}
}
} break;
/* worksheet postamble */
case 5: {
stream.push(write_ws_xlml_wsopts(ws, opts, wsidx, wb));
if(ws && ws["!autofilter"]) stream.push('<AutoFilter x:Range="' + a1_to_rc(fix_range(ws["!autofilter"].ref), {r:0,c:0}) + '" xmlns="urn:schemas-microsoft-com:office:excel"></AutoFilter>');
stream.push("</Worksheet>");
wsidx++; R = -1;
return void (stage = 3);
}
/* footer */
case -1: {
stage = -2;
stream.push("</Workbook>");
} break;
/* exeunt */
case -2: stream.push(null); break;
}};
return stream;
}
var __stream = {
to_json: write_json_stream,
to_html: write_html_stream,
to_csv: write_csv_stream,
to_xlml: write_xlml_stream,
set_readable: set_readable
};

@ -29,6 +29,7 @@ var general_fmt_num = (function make_general_fmt_num() {
}
function general_fmt_num_base(v/*:number*/)/*:string*/ {
if(!isFinite(v)) return isNaN(v) ? "#VALUE!" : "#DIV/0!";
var V = Math.floor(Math.log(Math.abs(v))*Math.LOG10E), o;
if(V >= -4 && V <= -1) o = v.toPrecision(10+V);

@ -254,6 +254,7 @@ var general_fmt_num = (function make_general_fmt_num() {
}
function general_fmt_num_base(v/*:number*/)/*:string*/ {
if(!isFinite(v)) return isNaN(v) ? "#VALUE!" : "#DIV/0!";
var V = Math.floor(Math.log(Math.abs(v))*Math.LOG10E), o;
if(V >= -4 && V <= -1) o = v.toPrecision(10+V);

@ -250,6 +250,7 @@ var general_fmt_num = (function make_general_fmt_num() {
}
function general_fmt_num_base(v) {
if(!isFinite(v)) return isNaN(v) ? "#VALUE!" : "#DIV/0!";
var V = Math.floor(Math.log(Math.abs(v))*Math.LOG10E), o;
if(V >= -4 && V <= -1) o = v.toPrecision(10+V);

@ -23,8 +23,10 @@ describe('oddities', function() {
assert.equal(SSF.format('#,##0.0; (#,##0.0); "-"', NaN), " -");
assert.equal(SSF.format('#,##0.0; (#,##0.0); "-"', Infinity), " -");
assert.equal(SSF.format('#,##0.0; (#,##0.0); "-"', -Infinity), " -");
assert.equal(SSF.format('0.00', NaN), "#VALUE!");
assert.equal(SSF.format('0.00', Infinity), "#DIV/0!");
assert.equal(SSF.format('0.00', -Infinity), "#DIV/0!");
["0.00", "General"].forEach(function(fmt) {
assert.equal(SSF.format(fmt, NaN), "#VALUE!");
assert.equal(SSF.format(fmt, Infinity), "#DIV/0!");
assert.equal(SSF.format(fmt, -Infinity), "#DIV/0!");
});
});
});