diff --git a/README.md b/README.md index 142a20a..c934e6f 100644 --- a/README.md +++ b/README.md @@ -29,8 +29,10 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6. * [Optional Modules](#optional-modules) * [ECMAScript 5 Compatibility](#ecmascript-5-compatibility) - [Parsing Workbooks](#parsing-workbooks) + * [Note on Streaming Read](#note-on-streaming-read) - [Working with the Workbook](#working-with-the-workbook) - [Writing Workbooks](#writing-workbooks) + * [Streaming Write](#streaming-write) - [Interface](#interface) * [Parsing functions](#parsing-functions) * [Writing functions](#writing-functions) @@ -298,6 +300,39 @@ function handleFile(e) { input_dom_element.addEventListener('change', handleFile, false); ``` +**Complete examples:** + +- HTML5 File API / Base64 Text / Web Workers + +Note that older versions of IE do not support HTML5 File API, so the base64 mode +is used for testing. On OSX you can get the base64 encoding with: + +```bash +$ certutil -encode target_file target_file.b64 +``` + +(note: You have to open the file and remove the header and footer lines) + +- XMLHttpRequest + +### Note on Streaming Read + +The most common and interesting formats (XLS, XLSX/M, XLSB, ODS) are ultimately +ZIP or CFB containers of files. Neither format puts the directory structure at +the beginning of the file: ZIP files place the Central Directory records at the +end of the logical file, while CFB files can place the FAT structure anywhere in +the file! As a result, to properly handle these formats, a streaming function +would have to buffer the entire file before commencing. That belies the +expectations of streaming, so we do not provide any streaming read API. If you +really want to stream, there are node modules like `concat-stream` that will do +the buffering for you. + ## Working with the Workbook The full object format is described later in this README. @@ -320,25 +355,6 @@ var desired_value = (desired_cell ? desired_cell.v : undefined); **Complete examples:** -- HTML5 File API / Base64 Text / Web Workers - -Note that older versions of IE do not support HTML5 File API, so the base64 mode -is used for testing. On OSX you can get the base64 encoding with: - -```bash -$ certutil -encode target_file target_file.b64 -``` - -(note: You have to open the file and remove the header and footer lines) - -- XMLHttpRequest - - node The node version installs a command line tool `xlsx` which can read spreadsheet @@ -392,6 +408,12 @@ saveAs(new Blob([s2ab(wbout)],{type:"application/octet-stream"}), "test.xlsx"); - writing an array of arrays in nodejs - exporting an HTML table +### Streaming Write + +`XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. It +takes the same arguments but returns a readable stream. + + pipes CSV write stream to nodejs response. ## Interface `XLSX` is the exposed variable in the browser and the exported node variable @@ -769,7 +791,7 @@ worksheet['C1'] = { t:'n', f: "SUM(A1:A3*B1:B3)", F:"C1:C1" }; ``` For a multi-cell array formula, every cell has the same array range but only the -first cell has content. Consider `D1:D3=A1:A3*B1:B3`: +first cell specifies the formula. Consider `D1:D3=A1:A3*B1:B3`: ```js worksheet['D1'] = { t:'n', F:"D1:D3", f:"A1:A3*B1:B3" }; @@ -977,7 +999,8 @@ Plaintext format guessing follows the priority order: | Format | Test | |:-------|:--------------------------------------------------------------------| -| XML | starts with < | +| HTML | starts with \\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g; +var attregexg=/([^"\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:')|([^'">\s]+))/g; var tagregex=/<[^>]*>/g; var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/; function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { @@ -7,11 +7,13 @@ function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break; if(!skip_root) z[0] = tag.substr(0, eq); if(eq === tag.length) return z; - var m = tag.match(attregexg), j=0, v="", i=0, q="", cc=""; + var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="", quot = 1; if(m) for(i = 0; i != m.length; ++i) { cc = m[i]; for(c=0; c != cc.length; ++c) if(cc.charCodeAt(c) === 61) break; - q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1); + q = cc.substr(0,c); + quot = ((eq=cc.charCodeAt(c+1)) == 34 || eq == 39) ? 1 : 0; + v = cc.substring(c+1+quot, cc.length-quot); for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break; if(j===q.length) { if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods @@ -54,9 +56,10 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{ } function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); } +var htmlcharegex = /[\u0000-\u001f]/g; function escapehtml(text){ var s = text + ''; - return s.replace(decregex, function(y) { return rencoding[y]; }); + return s.replace(decregex, function(y) { return rencoding[y]; }).replace(htmlcharegex,function(s) { return "&#x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + ";"; }); } /* TODO: handle codepages */ diff --git a/bits/41_lotus.js b/bits/41_lotus.js index 1bf7f5b..8b66deb 100644 --- a/bits/41_lotus.js +++ b/bits/41_lotus.js @@ -28,7 +28,7 @@ var WK_ = (function() { if(!d) return d; var o = opts || {}; if(DENSE != null && o.dense == null) o.dense = DENSE; - var s = (o.dense ? [] : {}), n = "Sheet1", sidx = 0; + var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/), n = "Sheet1", sidx = 0; var sheets = {}, snames = [n]; var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; diff --git a/bits/43_sstbin.js b/bits/43_sstbin.js index a084bea..9d158ac 100644 --- a/bits/43_sstbin.js +++ b/bits/43_sstbin.js @@ -22,8 +22,8 @@ function parse_sst_bin(data, opts)/*:SST*/ { pass = false; break; default: - if(R_n.indexOf("Begin") > 0) state.push(R_n); - else if(R_n.indexOf("End") > 0) state.pop(); + if(R_n.indexOf("Begin") > 0){} + else if(R_n.indexOf("End") > 0){} if(!pass || opts.WTF) throw new Error("Unexpected record " + RT + " " + R_n); } }); diff --git a/bits/61_fcommon.js b/bits/61_fcommon.js index 1694626..a23bb9a 100644 --- a/bits/61_fcommon.js +++ b/bits/61_fcommon.js @@ -1,7 +1,7 @@ /* TODO: it will be useful to parse the function str */ var rc_to_a1 = (function(){ var rcregex = /(^|[^A-Za-z])R(\[?)(-?\d+|)\]?C(\[?)(-?\d+|)\]?/g; - var rcbase; + var rcbase/*:Cell*/ = ({r:0,c:0}/*:any*/); function rcfunc($$,$1,$2,$3,$4,$5) { var R = $3.length>0?parseInt($3,10)|0:0, C = $5.length>0?parseInt($5,10)|0:0; if(C<0 && $4.length === 0) C=0; @@ -10,7 +10,7 @@ var rc_to_a1 = (function(){ if($2.length > 0 || $3.length == 0) rRel = true; if(rRel) R += rcbase.r; else --R; return $1 + (cRel ? "" : "$") + encode_col(C) + (rRel ? "" : "$") + encode_row(R); } - return function rc_to_a1(fstr, base) { + return function rc_to_a1(fstr/*:string*/, base/*:Cell*/)/*:string*/ { rcbase = base; return fstr.replace(rcregex, rcfunc); }; diff --git a/bits/72_wbxml.js b/bits/72_wbxml.js index b08a69e..a45302b 100644 --- a/bits/72_wbxml.js +++ b/bits/72_wbxml.js @@ -160,6 +160,8 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ { var o = [XML_HEADER]; o[o.length] = WB_XML_ROOT; + var write_names = (wb.Workbook && (wb.Workbook.Names||[]).length > 0); + /* fileVersion */ /* fileSharing */ @@ -185,9 +187,9 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ { /* functionGroups */ /* externalReferences */ - if(wb.Workbook && (wb.Workbook.Names||[]).length > 0) { + if(write_names) { o[o.length] = ""; - wb.Workbook.Names.forEach(function(n) { + if(wb.Workbook && wb.Workbook.Names) wb.Workbook.Names.forEach(function(n) { var d = {name:n.Name}; if(n.Comment) d.comment = n.Comment; if(n.Sheet != null) d.localSheetId = ""+n.Sheet; diff --git a/bits/75_xlml.js b/bits/75_xlml.js index eef9ed5..c91a228 100644 --- a/bits/75_xlml.js +++ b/bits/75_xlml.js @@ -171,7 +171,7 @@ function parse_xlml_xml(d, opts)/*:Workbook*/ { make_ssf(SSF); var str = debom(xlml_normalize(d)); if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str)); - if(str.substr(0,1000).indexOf("= 0) return parse_html(str, opts); + if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts); var Rn; var state = [], tmp; if(DENSE != null && opts.dense == null) opts.dense = DENSE; diff --git a/bits/79_html.js b/bits/79_html.js index 428b51c..3c48b89 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -1,46 +1,93 @@ /* TODO: in browser attach to DOM; in node use an html parser */ -function parse_html(str/*:string*/, _opts)/*:Workbook*/ { - var opts = _opts || {}; - if(DENSE != null && opts.dense == null) opts.dense = DENSE; - var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); - var o/*:Workbook*/ = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} }; - var i = str.indexOf(" / pair"); - var rows = str.slice(i, j).split(/]*>/); - var R = 0, C = 0; - var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; - for(i = 0; i < rows.length; ++i) { - if(rows[i].substr(0,3) != ""); - for(j = 0; j < cells.length; ++j) { - if(cells[j].substr(0,3) != "")) > -1) m = m.slice(cc+1); - while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<")); - /* TODO: generate stub cells */ - if(!m.length) continue; - if(range.s.r > R) range.s.r = R; - if(range.e.r < R) range.e.r = R; - if(range.s.c > C) range.s.c = C; - if(range.e.c < C) range.e.c = C; - if(opts.dense) { - if(!ws[R]) ws[R] = []; - if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; - else ws[R][C] = {t:'s', v:m}; - } else { - var coord/*:string*/ = encode_cell({r:R, c:C}); - /* TODO: value parsing */ - if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; - else ws[coord] = {t:'s', v:m}; +var HTML_ = (function() { + function html_to_sheet(str/*:string*/, _opts)/*:Workbook*/ { + var opts = _opts || {}; + if(DENSE != null && opts.dense == null) opts.dense = DENSE; + var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); + var i = str.indexOf(" / pair"); + var rows = str.slice(i, j).split(/(:?]*>)/); + var R = -1, C = 0, RS = 0, CS = 0; + var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; + var merges = [], midx = 0; + for(i = 0; i < rows.length; ++i) { + var row = rows[i].trim(); + if(row.substr(0,3) == ""); + for(j = 0; j < cells.length; ++j) { + var cell = cells[j].trim(); + if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1); + while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<")); + var tag = parsexmltag(cell.slice(0, cell.indexOf(">"))); + CS = tag.colspan ? +tag.colspan : 1; + if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + /* TODO: generate stub cells */ + if(!m.length) { C += CS; continue; } + m = unescapexml(m).replace(/[\r\n]/g,""); + if(range.s.r > R) range.s.r = R; + if(range.e.r < R) range.e.r = R; + if(range.s.c > C) range.s.c = C; + if(range.e.c < C) range.e.c = C; + if(opts.dense) { + if(!ws[R]) ws[R] = []; + if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; + else ws[R][C] = {t:'s', v:m}; + } else { + var coord/*:string*/ = encode_cell({r:R, c:C}); + /* TODO: value parsing */ + if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; + else ws[coord] = {t:'s', v:m}; + } + C += CS; } } - ++R; C = 0; + ws['!ref'] = encode_range(range); + return ws; } - ws['!ref'] = encode_range(range); - return o; -} + function html_to_book(str/*:string*/, opts)/*:Workbook*/ { + return sheet_to_workbook(html_to_sheet(str, opts), opts); + } + function sheet_to_html(ws/*:Worksheet*/, opts)/*:string*/ { + var o/*:Array*/ = []; + var r = decode_range(ws['!ref']), cell/*:Cell*/; + var dense = Array.isArray(ws); + var M = (ws['!merges'] ||[]); + for(var R = r.s.r; R <= r.e.r; ++R) { + var oo = []; + for(var C = r.s.c; C <= r.e.c; ++C) { + var RS = 0, CS = 0; + for(var j = 0; j < M.length; ++j) { + if(M[j].s.r > R || M[j].s.c > C) continue; + if(M[j].e.r < R || M[j].e.c < C) continue; + if(M[j].s.r < R || M[j].s.c < C) { RS = -1; break; } + RS = M[j].e.r - M[j].s.r + 1; CS = M[j].e.c - M[j].s.c + 1; break; + } + if(RS < 0) continue; + var coord = encode_cell({r:R,c:C}); + cell = dense ? (ws[R]||[])[C] : ws[coord]; + if(!cell || cell.v == null) { oo.push(""); continue; } + /* TODO: html entities */ + var w = cell.h || escapexml(cell.w || (format_cell(cell), cell.w) || ""); + var sp = {}; + if(RS > 1) sp.rowspan = RS; + if(CS > 1) sp.colspan = CS; + oo.push(writextag('td', w, sp)); + } + o.push("" + oo.join("") + ""); + } + return "" + o.join("") + "
"; + } + + return { + to_workbook: html_to_book, + to_sheet: html_to_sheet, + from_sheet: sheet_to_html + }; +})(); function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { var opts = _opts || {}; @@ -61,7 +108,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}}); + if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o = {t:'s', v:v}; if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)}; if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; } diff --git a/bits/82_sheeter.js b/bits/82_sheeter.js index ba8e6f0..ab908ea 100644 --- a/bits/82_sheeter.js +++ b/bits/82_sheeter.js @@ -8,6 +8,7 @@ function write_obj_str(factory/*:WriteObjStrFactory*/) { }; } +var write_htm_str = write_obj_str(HTML_); var write_csv_str = write_obj_str({from_sheet:sheet_to_csv}); var write_slk_str = write_obj_str(SYLK); var write_dif_str = write_obj_str(DIF); diff --git a/bits/88_write.js b/bits/88_write.js index ab5561d..0423f5c 100644 --- a/bits/88_write.js +++ b/bits/88_write.js @@ -64,6 +64,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) { case 'xlml': return write_string_type(write_xlml(wb, o), o); case 'slk': case 'sylk': return write_string_type(write_slk_str(wb, o), o); + case 'html': return write_string_type(write_htm_str(wb, o), o); case 'txt': return write_bstr_type(write_txt_str(wb, o), o); case 'csv': return write_string_type(write_csv_str(wb, o), o); case 'dif': return write_string_type(write_dif_str(wb, o), o); @@ -86,6 +87,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) { case '.fods': o.bookType = 'fods'; break; case '.xlml': o.bookType = 'xlml'; break; case '.sylk': o.bookType = 'sylk'; break; + case '.html': o.bookType = 'html'; break; case '.xls': o.bookType = 'biff2'; break; case '.xml': o.bookType = 'xml'; break; case '.ods': o.bookType = 'ods'; break; @@ -94,6 +96,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) { case '.dif': o.bookType = 'dif'; break; case '.prn': o.bookType = 'prn'; break; case '.slk': o.bookType = 'sylk'; break; + case '.htm': o.bookType = 'html'; break; } } diff --git a/bits/90_utils.js b/bits/90_utils.js index 0c50d26..fd6d84f 100644 --- a/bits/90_utils.js +++ b/bits/90_utils.js @@ -154,42 +154,47 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ return out; } -function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { - var out = "", txt = "", qreg = /"/g; +var qreg = /"/g; +function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ { + var isempty = true; + var row = "", txt = "", rr = encode_row(R); + for(var C = r.s.c; C <= r.e.c; ++C) { + var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; + if(val == null) txt = ""; + else if(val.v != null) { + isempty = false; + txt = ''+format_cell(val, null, o); + for(var i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { + txt = "\"" + txt.replace(qreg, '""') + "\""; break; } + } else if(val.f != null && !val.F) { + isempty = false; + txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; + } else txt = ""; + /* NOTE: Excel CSV does not support array formulae */ + row += (C === r.s.c ? "" : FS) + txt; + } + if(o.blankrows === false && isempty) return null; + return row; +} + +function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/)/*:string*/ { + var out = ""; var o = opts == null ? {} : opts; if(sheet == null || sheet["!ref"] == null) return ""; var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$"); - var row = "", rr = "", cols = []; - var i = 0, cc = 0, val; - var R = 0, C = 0; - var dense = Array.isArray(sheet); - for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); - for(R = r.s.r; R <= r.e.r; ++R) { - var isempty = true; - row = ""; - rr = encode_row(R); - for(C = r.s.c; C <= r.e.c; ++C) { - val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; - if(val == null) txt = ""; - else if(val.v != null) { - isempty = false; - txt = ''+format_cell(val, null, o); - for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { - txt = "\"" + txt.replace(qreg, '""') + "\""; break; } - } else if(val.f != null && !val.F) { - isempty = false; - txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; - } else txt = ""; - /* NOTE: Excel CSV does not support array formulae */ - row += (C === r.s.c ? "" : FS) + txt; - } - if(o.blankrows === false && isempty) continue; + var row = "", cols = []; + o.dense = Array.isArray(sheet); + for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); + for(var R = r.s.r; R <= r.e.r; ++R) { + row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o); + if(row == null) { continue; } if(o.strip) row = row.replace(endregex,""); out += row + RS; } + delete o.dense; return out; } diff --git a/bits/97_node.js b/bits/97_node.js index 91a64e6..46e5b65 100644 --- a/bits/97_node.js +++ b/bits/97_node.js @@ -3,45 +3,26 @@ if(has_buf && typeof require != 'undefined') (function() { var write_csv_stream = function(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { var stream = Readable(); - var out = "", txt = "", qreg = /"/g; + var out = ""; var o = opts == null ? {} : opts; if(sheet == null || sheet["!ref"] == null) { stream.push(null); return stream; } var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$"); - var row = "", rr = "", cols = []; - var i = 0, cc = 0, val; - var R = 0, C = 0; - var dense = Array.isArray(sheet); - for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); - R = r.s.r; + var row = "", cols = []; + o.dense = Array.isArray(sheet); + for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); + var R = r.s.r; stream._read = function() { if(R > r.e.r) return stream.push(null); - while(true) { - var isempty = true; - row = ""; - rr = encode_row(R); - for(C = r.s.c; C <= r.e.c; ++C) { - val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; - if(val == null) txt = ""; - else if(val.v != null) { - isempty = false; - txt = ''+format_cell(val, null, o); - for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { - txt = "\"" + txt.replace(qreg, '""') + "\""; break; } - } else if(val.f != null && !val.F) { - isempty = false; - txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; - } else txt = ""; - /* NOTE: Excel CSV does not support array formulae */ - row += (C === r.s.c ? "" : FS) + txt; - } - if(o.blankrows === false && isempty) { ++R; continue; } - if(o.strip) row = row.replace(endregex,""); - stream.push(row + RS); - ++R; - break; + while(R <= r.e.r) { + row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o); + if(row == null) { ++R; continue; } + if(o.strip) row = row.replace(endregex,""); + stream.push(row + RS); + ++R; + break; } }; return stream; diff --git a/docbits/20_import.md b/docbits/20_import.md index 26ac49f..9c42ef5 100644 --- a/docbits/20_import.md +++ b/docbits/20_import.md @@ -124,3 +124,24 @@ function handleFile(e) { input_dom_element.addEventListener('change', handleFile, false); ``` +**Complete examples:** + +- HTML5 File API / Base64 Text / Web Workers + +Note that older versions of IE do not support HTML5 File API, so the base64 mode +is used for testing. On OSX you can get the base64 encoding with: + +```bash +$ certutil -encode target_file target_file.b64 +``` + +(note: You have to open the file and remove the header and footer lines) + +- XMLHttpRequest + diff --git a/docbits/21_readstream.md b/docbits/21_readstream.md new file mode 100644 index 0000000..0528a20 --- /dev/null +++ b/docbits/21_readstream.md @@ -0,0 +1,12 @@ +### Note on Streaming Read + +The most common and interesting formats (XLS, XLSX/M, XLSB, ODS) are ultimately +ZIP or CFB containers of files. Neither format puts the directory structure at +the beginning of the file: ZIP files place the Central Directory records at the +end of the logical file, while CFB files can place the FAT structure anywhere in +the file! As a result, to properly handle these formats, a streaming function +would have to buffer the entire file before commencing. That belies the +expectations of streaming, so we do not provide any streaming read API. If you +really want to stream, there are node modules like `concat-stream` that will do +the buffering for you. + diff --git a/docbits/25_manip.md b/docbits/25_manip.md index d9f5048..b9b78f6 100644 --- a/docbits/25_manip.md +++ b/docbits/25_manip.md @@ -20,25 +20,6 @@ var desired_value = (desired_cell ? desired_cell.v : undefined); **Complete examples:** -- HTML5 File API / Base64 Text / Web Workers - -Note that older versions of IE do not support HTML5 File API, so the base64 mode -is used for testing. On OSX you can get the base64 encoding with: - -```bash -$ certutil -encode target_file target_file.b64 -``` - -(note: You have to open the file and remove the header and footer lines) - -- XMLHttpRequest - - node The node version installs a command line tool `xlsx` which can read spreadsheet diff --git a/docbits/31_writestream.md b/docbits/31_writestream.md new file mode 100644 index 0000000..d9cae3d --- /dev/null +++ b/docbits/31_writestream.md @@ -0,0 +1,6 @@ +### Streaming Write + +`XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. It +takes the same arguments but returns a readable stream. + + pipes CSV write stream to nodejs response. diff --git a/docbits/61_formulae.md b/docbits/61_formulae.md index a2e8ae1..35df471 100644 --- a/docbits/61_formulae.md +++ b/docbits/61_formulae.md @@ -45,7 +45,7 @@ worksheet['C1'] = { t:'n', f: "SUM(A1:A3*B1:B3)", F:"C1:C1" }; ``` For a multi-cell array formula, every cell has the same array range but only the -first cell has content. Consider `D1:D3=A1:A3*B1:B3`: +first cell specifies the formula. Consider `D1:D3=A1:A3*B1:B3`: ```js worksheet['D1'] = { t:'n', F:"D1:D3", f:"A1:A3*B1:B3" }; diff --git a/docbits/80_parseopts.md b/docbits/80_parseopts.md index 2d7890b..1056919 100644 --- a/docbits/80_parseopts.md +++ b/docbits/80_parseopts.md @@ -77,7 +77,8 @@ Plaintext format guessing follows the priority order: | Format | Test | |:-------|:--------------------------------------------------------------------| -| XML | starts with < | +| HTML | starts with \ txt dbf -> csf html -> csf + csf -> html } } diff --git a/formats.png b/formats.png index 145c98e..af17b09 100644 Binary files a/formats.png and b/formats.png differ diff --git a/misc/docs/SUMMARY.md b/misc/docs/SUMMARY.md index 57b5bf7..613f855 100644 --- a/misc/docs/SUMMARY.md +++ b/misc/docs/SUMMARY.md @@ -6,8 +6,10 @@ * [Optional Modules](README.md#optional-modules) * [ECMAScript 5 Compatibility](README.md#ecmascript-5-compatibility) - [Parsing Workbooks](README.md#parsing-workbooks) + * [Note on Streaming Read](README.md#note-on-streaming-read) - [Working with the Workbook](README.md#working-with-the-workbook) - [Writing Workbooks](README.md#writing-workbooks) + * [Streaming Write](README.md#streaming-write) - [Interface](README.md#interface) * [Parsing functions](README.md#parsing-functions) * [Writing functions](README.md#writing-functions) diff --git a/test.js b/test.js index 2b8e928..eafe902 100644 --- a/test.js +++ b/test.js @@ -961,7 +961,7 @@ describe('parse features', function() { var bef = (function() { ws = X.utils.aoa_to_sheet([ ["a","b","c"], - ["&","<",">"] + ["&","<",">","\n"] ]); wb = {SheetNames:["Sheet1"],Sheets:{Sheet1:ws}}; }); @@ -972,6 +972,7 @@ describe('parse features', function() { assert.equal(get_cell(wb2.Sheets.Sheet1, "A2").h, "&"); assert.equal(get_cell(wb2.Sheets.Sheet1, "B2").h, "<"); assert.equal(get_cell(wb2.Sheets.Sheet1, "C2").h, ">"); + assert.equal(get_cell(wb2.Sheets.Sheet1, "D2").h, " "); }); }); }); @@ -1135,6 +1136,15 @@ describe('write features', function() { }); }); }); }); + describe('HTML', function() { + it('should use `h` value when present', function() { + var sheet = X.utils.aoa_to_sheet([["abc"]]); + get_cell(sheet, "A1").h = "abc"; + var wb = {SheetNames:["Sheet1"], Sheets:{Sheet1:sheet}}; + var str = X.write(wb, {bookType:"html", type:"binary"}); + assert(str.indexOf("abc") > 0); + }); + }); }); function seq(end, start) { diff --git a/xlsx.flow.js b/xlsx.flow.js index a6ecf85..68c0a85 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -1549,7 +1549,7 @@ function resolve_path(path/*:string*/, base/*:string*/)/*:string*/ { } return result.join('/'); } -var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g; +var attregexg=/([^"\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:')|([^'">\s]+))/g; var tagregex=/<[^>]*>/g; var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/; function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { @@ -1558,11 +1558,13 @@ function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break; if(!skip_root) z[0] = tag.substr(0, eq); if(eq === tag.length) return z; - var m = tag.match(attregexg), j=0, v="", i=0, q="", cc=""; + var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="", quot = 1; if(m) for(i = 0; i != m.length; ++i) { cc = m[i]; for(c=0; c != cc.length; ++c) if(cc.charCodeAt(c) === 61) break; - q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1); + q = cc.substr(0,c); + quot = ((eq=cc.charCodeAt(c+1)) == 34 || eq == 39) ? 1 : 0; + v = cc.substring(c+1+quot, cc.length-quot); for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break; if(j===q.length) { if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods @@ -1605,9 +1607,10 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{ } function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); } +var htmlcharegex = /[\u0000-\u001f]/g; function escapehtml(text){ var s = text + ''; - return s.replace(decregex, function(y) { return rencoding[y]; }); + return s.replace(decregex, function(y) { return rencoding[y]; }).replace(htmlcharegex,function(s) { return "&#x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + ";"; }); } /* TODO: handle codepages */ @@ -5332,7 +5335,7 @@ var WK_ = (function() { if(!d) return d; var o = opts || {}; if(DENSE != null && o.dense == null) o.dense = DENSE; - var s = (o.dense ? [] : {}), n = "Sheet1", sidx = 0; + var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/), n = "Sheet1", sidx = 0; var sheets = {}, snames = [n]; var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; @@ -5862,8 +5865,8 @@ function parse_sst_bin(data, opts)/*:SST*/ { pass = false; break; default: - if(R_n.indexOf("Begin") > 0) state.push(R_n); - else if(R_n.indexOf("End") > 0) state.pop(); + if(R_n.indexOf("Begin") > 0){} + else if(R_n.indexOf("End") > 0){} if(!pass || opts.WTF) throw new Error("Unexpected record " + RT + " " + R_n); } }); @@ -7462,7 +7465,7 @@ function parse_ms_xml() { return {'!type':'macro'}; } /* TODO: it will be useful to parse the function str */ var rc_to_a1 = (function(){ var rcregex = /(^|[^A-Za-z])R(\[?)(-?\d+|)\]?C(\[?)(-?\d+|)\]?/g; - var rcbase; + var rcbase/*:Cell*/ = ({r:0,c:0}/*:any*/); function rcfunc($$,$1,$2,$3,$4,$5) { var R = $3.length>0?parseInt($3,10)|0:0, C = $5.length>0?parseInt($5,10)|0:0; if(C<0 && $4.length === 0) C=0; @@ -7471,7 +7474,7 @@ var rc_to_a1 = (function(){ if($2.length > 0 || $3.length == 0) rRel = true; if(rRel) R += rcbase.r; else --R; return $1 + (cRel ? "" : "$") + encode_col(C) + (rRel ? "" : "$") + encode_row(R); } - return function rc_to_a1(fstr, base) { + return function rc_to_a1(fstr/*:string*/, base/*:Cell*/)/*:string*/ { rcbase = base; return fstr.replace(rcregex, rcfunc); }; @@ -11527,6 +11530,8 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ { var o = [XML_HEADER]; o[o.length] = WB_XML_ROOT; + var write_names = (wb.Workbook && (wb.Workbook.Names||[]).length > 0); + /* fileVersion */ /* fileSharing */ @@ -11552,9 +11557,9 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ { /* functionGroups */ /* externalReferences */ - if(wb.Workbook && (wb.Workbook.Names||[]).length > 0) { + if(write_names) { o[o.length] = ""; - wb.Workbook.Names.forEach(function(n) { + if(wb.Workbook && wb.Workbook.Names) wb.Workbook.Names.forEach(function(n) { var d = {name:n.Name}; if(n.Comment) d.comment = n.Comment; if(n.Sheet != null) d.localSheetId = ""+n.Sheet; @@ -12080,7 +12085,7 @@ function parse_xlml_xml(d, opts)/*:Workbook*/ { make_ssf(SSF); var str = debom(xlml_normalize(d)); if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str)); - if(str.substr(0,1000).indexOf("= 0) return parse_html(str, opts); + if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts); var Rn; var state = [], tmp; if(DENSE != null && opts.dense == null) opts.dense = DENSE; @@ -15054,48 +15059,95 @@ function write_biff_buf(wb/*:Workbook*/, opts/*:WriteOpts*/) { return ba.end(); } /* TODO: in browser attach to DOM; in node use an html parser */ -function parse_html(str/*:string*/, _opts)/*:Workbook*/ { - var opts = _opts || {}; - if(DENSE != null && opts.dense == null) opts.dense = DENSE; - var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); - var o/*:Workbook*/ = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} }; - var i = str.indexOf(" / pair"); - var rows = str.slice(i, j).split(/]*>/); - var R = 0, C = 0; - var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; - for(i = 0; i < rows.length; ++i) { - if(rows[i].substr(0,3) != ""); - for(j = 0; j < cells.length; ++j) { - if(cells[j].substr(0,3) != "")) > -1) m = m.slice(cc+1); - while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<")); - /* TODO: generate stub cells */ - if(!m.length) continue; - if(range.s.r > R) range.s.r = R; - if(range.e.r < R) range.e.r = R; - if(range.s.c > C) range.s.c = C; - if(range.e.c < C) range.e.c = C; - if(opts.dense) { - if(!ws[R]) ws[R] = []; - if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; - else ws[R][C] = {t:'s', v:m}; - } else { - var coord/*:string*/ = encode_cell({r:R, c:C}); - /* TODO: value parsing */ - if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; - else ws[coord] = {t:'s', v:m}; +var HTML_ = (function() { + function html_to_sheet(str/*:string*/, _opts)/*:Workbook*/ { + var opts = _opts || {}; + if(DENSE != null && opts.dense == null) opts.dense = DENSE; + var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/); + var i = str.indexOf(" / pair"); + var rows = str.slice(i, j).split(/(:?]*>)/); + var R = -1, C = 0, RS = 0, CS = 0; + var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; + var merges = [], midx = 0; + for(i = 0; i < rows.length; ++i) { + var row = rows[i].trim(); + if(row.substr(0,3) == ""); + for(j = 0; j < cells.length; ++j) { + var cell = cells[j].trim(); + if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1); + while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<")); + var tag = parsexmltag(cell.slice(0, cell.indexOf(">"))); + CS = tag.colspan ? +tag.colspan : 1; + if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + /* TODO: generate stub cells */ + if(!m.length) { C += CS; continue; } + m = unescapexml(m).replace(/[\r\n]/g,""); + if(range.s.r > R) range.s.r = R; + if(range.e.r < R) range.e.r = R; + if(range.s.c > C) range.s.c = C; + if(range.e.c < C) range.e.c = C; + if(opts.dense) { + if(!ws[R]) ws[R] = []; + if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; + else ws[R][C] = {t:'s', v:m}; + } else { + var coord/*:string*/ = encode_cell({r:R, c:C}); + /* TODO: value parsing */ + if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; + else ws[coord] = {t:'s', v:m}; + } + C += CS; } } - ++R; C = 0; + ws['!ref'] = encode_range(range); + return ws; } - ws['!ref'] = encode_range(range); - return o; -} + function html_to_book(str/*:string*/, opts)/*:Workbook*/ { + return sheet_to_workbook(html_to_sheet(str, opts), opts); + } + function sheet_to_html(ws/*:Worksheet*/, opts)/*:string*/ { + var o/*:Array*/ = []; + var r = decode_range(ws['!ref']), cell/*:Cell*/; + var dense = Array.isArray(ws); + var M = (ws['!merges'] ||[]); + for(var R = r.s.r; R <= r.e.r; ++R) { + var oo = []; + for(var C = r.s.c; C <= r.e.c; ++C) { + var RS = 0, CS = 0; + for(var j = 0; j < M.length; ++j) { + if(M[j].s.r > R || M[j].s.c > C) continue; + if(M[j].e.r < R || M[j].e.c < C) continue; + if(M[j].s.r < R || M[j].s.c < C) { RS = -1; break; } + RS = M[j].e.r - M[j].s.r + 1; CS = M[j].e.c - M[j].s.c + 1; break; + } + if(RS < 0) continue; + var coord = encode_cell({r:R,c:C}); + cell = dense ? (ws[R]||[])[C] : ws[coord]; + if(!cell || cell.v == null) { oo.push(""); continue; } + /* TODO: html entities */ + var w = cell.h || escapexml(cell.w || (format_cell(cell), cell.w) || ""); + var sp = {}; + if(RS > 1) sp.rowspan = RS; + if(CS > 1) sp.colspan = CS; + oo.push(writextag('td', w, sp)); + } + o.push("" + oo.join("") + ""); + } + return "" + o.join("") + "
"; + } + + return { + to_workbook: html_to_book, + to_sheet: html_to_sheet, + from_sheet: sheet_to_html + }; +})(); function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { var opts = _opts || {}; @@ -15116,7 +15168,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}}); + if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o = {t:'s', v:v}; if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)}; if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; } @@ -15652,6 +15704,7 @@ function write_obj_str(factory/*:WriteObjStrFactory*/) { }; } +var write_htm_str = write_obj_str(HTML_); var write_csv_str = write_obj_str({from_sheet:sheet_to_csv}); var write_slk_str = write_obj_str(SYLK); var write_dif_str = write_obj_str(DIF); @@ -16213,6 +16266,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) { case 'xlml': return write_string_type(write_xlml(wb, o), o); case 'slk': case 'sylk': return write_string_type(write_slk_str(wb, o), o); + case 'html': return write_string_type(write_htm_str(wb, o), o); case 'txt': return write_bstr_type(write_txt_str(wb, o), o); case 'csv': return write_string_type(write_csv_str(wb, o), o); case 'dif': return write_string_type(write_dif_str(wb, o), o); @@ -16235,6 +16289,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) { case '.fods': o.bookType = 'fods'; break; case '.xlml': o.bookType = 'xlml'; break; case '.sylk': o.bookType = 'sylk'; break; + case '.html': o.bookType = 'html'; break; case '.xls': o.bookType = 'biff2'; break; case '.xml': o.bookType = 'xml'; break; case '.ods': o.bookType = 'ods'; break; @@ -16243,6 +16298,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) { case '.dif': o.bookType = 'dif'; break; case '.prn': o.bookType = 'prn'; break; case '.slk': o.bookType = 'sylk'; break; + case '.htm': o.bookType = 'html'; break; } } @@ -16417,42 +16473,47 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){ return out; } -function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { - var out = "", txt = "", qreg = /"/g; +var qreg = /"/g; +function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ { + var isempty = true; + var row = "", txt = "", rr = encode_row(R); + for(var C = r.s.c; C <= r.e.c; ++C) { + var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; + if(val == null) txt = ""; + else if(val.v != null) { + isempty = false; + txt = ''+format_cell(val, null, o); + for(var i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { + txt = "\"" + txt.replace(qreg, '""') + "\""; break; } + } else if(val.f != null && !val.F) { + isempty = false; + txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; + } else txt = ""; + /* NOTE: Excel CSV does not support array formulae */ + row += (C === r.s.c ? "" : FS) + txt; + } + if(o.blankrows === false && isempty) return null; + return row; +} + +function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/)/*:string*/ { + var out = ""; var o = opts == null ? {} : opts; if(sheet == null || sheet["!ref"] == null) return ""; var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$"); - var row = "", rr = "", cols = []; - var i = 0, cc = 0, val; - var R = 0, C = 0; - var dense = Array.isArray(sheet); - for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); - for(R = r.s.r; R <= r.e.r; ++R) { - var isempty = true; - row = ""; - rr = encode_row(R); - for(C = r.s.c; C <= r.e.c; ++C) { - val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; - if(val == null) txt = ""; - else if(val.v != null) { - isempty = false; - txt = ''+format_cell(val, null, o); - for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { - txt = "\"" + txt.replace(qreg, '""') + "\""; break; } - } else if(val.f != null && !val.F) { - isempty = false; - txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; - } else txt = ""; - /* NOTE: Excel CSV does not support array formulae */ - row += (C === r.s.c ? "" : FS) + txt; - } - if(o.blankrows === false && isempty) continue; + var row = "", cols = []; + o.dense = Array.isArray(sheet); + for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); + for(var R = r.s.r; R <= r.e.r; ++R) { + row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o); + if(row == null) { continue; } if(o.strip) row = row.replace(endregex,""); out += row + RS; } + delete o.dense; return out; } @@ -16528,45 +16589,26 @@ if(has_buf && typeof require != 'undefined') (function() { var write_csv_stream = function(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) { var stream = Readable(); - var out = "", txt = "", qreg = /"/g; + var out = ""; var o = opts == null ? {} : opts; if(sheet == null || sheet["!ref"] == null) { stream.push(null); return stream; } var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$"); - var row = "", rr = "", cols = []; - var i = 0, cc = 0, val; - var R = 0, C = 0; - var dense = Array.isArray(sheet); - for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); - R = r.s.r; + var row = "", cols = []; + o.dense = Array.isArray(sheet); + for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); + var R = r.s.r; stream._read = function() { if(R > r.e.r) return stream.push(null); - while(true) { - var isempty = true; - row = ""; - rr = encode_row(R); - for(C = r.s.c; C <= r.e.c; ++C) { - val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; - if(val == null) txt = ""; - else if(val.v != null) { - isempty = false; - txt = ''+format_cell(val, null, o); - for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { - txt = "\"" + txt.replace(qreg, '""') + "\""; break; } - } else if(val.f != null && !val.F) { - isempty = false; - txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; - } else txt = ""; - /* NOTE: Excel CSV does not support array formulae */ - row += (C === r.s.c ? "" : FS) + txt; - } - if(o.blankrows === false && isempty) { ++R; continue; } - if(o.strip) row = row.replace(endregex,""); - stream.push(row + RS); - ++R; - break; + while(R <= r.e.r) { + row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o); + if(row == null) { ++R; continue; } + if(o.strip) row = row.replace(endregex,""); + stream.push(row + RS); + ++R; + break; } }; return stream; diff --git a/xlsx.js b/xlsx.js index e56cf7c..a0e786b 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1498,7 +1498,7 @@ function resolve_path(path, base) { } return result.join('/'); } -var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g; +var attregexg=/([^"\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:')|([^'">\s]+))/g; var tagregex=/<[^>]*>/g; var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/; function parsexmltag(tag, skip_root) { @@ -1507,11 +1507,13 @@ function parsexmltag(tag, skip_root) { for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break; if(!skip_root) z[0] = tag.substr(0, eq); if(eq === tag.length) return z; - var m = tag.match(attregexg), j=0, v="", i=0, q="", cc=""; + var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="", quot = 1; if(m) for(i = 0; i != m.length; ++i) { cc = m[i]; for(c=0; c != cc.length; ++c) if(cc.charCodeAt(c) === 61) break; - q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1); + q = cc.substr(0,c); + quot = ((eq=cc.charCodeAt(c+1)) == 34 || eq == 39) ? 1 : 0; + v = cc.substring(c+1+quot, cc.length-quot); for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break; if(j===q.length) { if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods @@ -1554,9 +1556,10 @@ function escapexml(text, xml){ } function escapexmltag(text){ return escapexml(text).replace(/ /g,"_x0020_"); } +var htmlcharegex = /[\u0000-\u001f]/g; function escapehtml(text){ var s = text + ''; - return s.replace(decregex, function(y) { return rencoding[y]; }); + return s.replace(decregex, function(y) { return rencoding[y]; }).replace(htmlcharegex,function(s) { return "&#x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + ";"; }); } /* TODO: handle codepages */ @@ -5276,7 +5279,7 @@ var WK_ = (function() { if(!d) return d; var o = opts || {}; if(DENSE != null && o.dense == null) o.dense = DENSE; - var s = (o.dense ? [] : {}), n = "Sheet1", sidx = 0; + var s = ((o.dense ? [] : {})), n = "Sheet1", sidx = 0; var sheets = {}, snames = [n]; var refguess = {s: {r:0, c:0}, e: {r:0, c:0} }; @@ -5806,8 +5809,8 @@ function parse_sst_bin(data, opts) { pass = false; break; default: - if(R_n.indexOf("Begin") > 0) state.push(R_n); - else if(R_n.indexOf("End") > 0) state.pop(); + if(R_n.indexOf("Begin") > 0){} + else if(R_n.indexOf("End") > 0){} if(!pass || opts.WTF) throw new Error("Unexpected record " + RT + " " + R_n); } }); @@ -7406,7 +7409,7 @@ function parse_ms_xml() { return {'!type':'macro'}; } /* TODO: it will be useful to parse the function str */ var rc_to_a1 = (function(){ var rcregex = /(^|[^A-Za-z])R(\[?)(-?\d+|)\]?C(\[?)(-?\d+|)\]?/g; - var rcbase; + var rcbase = ({r:0,c:0}); function rcfunc($$,$1,$2,$3,$4,$5) { var R = $3.length>0?parseInt($3,10)|0:0, C = $5.length>0?parseInt($5,10)|0:0; if(C<0 && $4.length === 0) C=0; @@ -11470,6 +11473,8 @@ function write_wb_xml(wb, opts) { var o = [XML_HEADER]; o[o.length] = WB_XML_ROOT; + var write_names = (wb.Workbook && (wb.Workbook.Names||[]).length > 0); + /* fileVersion */ /* fileSharing */ @@ -11495,9 +11500,9 @@ function write_wb_xml(wb, opts) { /* functionGroups */ /* externalReferences */ - if(wb.Workbook && (wb.Workbook.Names||[]).length > 0) { + if(write_names) { o[o.length] = ""; - wb.Workbook.Names.forEach(function(n) { + if(wb.Workbook && wb.Workbook.Names) wb.Workbook.Names.forEach(function(n) { var d = {name:n.Name}; if(n.Comment) d.comment = n.Comment; if(n.Sheet != null) d.localSheetId = ""+n.Sheet; @@ -12021,7 +12026,7 @@ function parse_xlml_xml(d, opts) { make_ssf(SSF); var str = debom(xlml_normalize(d)); if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str)); - if(str.substr(0,1000).indexOf("= 0) return parse_html(str, opts); + if(str.substr(0,1000).indexOf("= 0) return HTML_.to_workbook(str, opts); var Rn; var state = [], tmp; if(DENSE != null && opts.dense == null) opts.dense = DENSE; @@ -14993,48 +14998,95 @@ function write_biff_buf(wb, opts) { return ba.end(); } /* TODO: in browser attach to DOM; in node use an html parser */ -function parse_html(str, _opts) { - var opts = _opts || {}; - if(DENSE != null && opts.dense == null) opts.dense = DENSE; - var ws = opts.dense ? ([]) : ({}); - var o = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} }; - var i = str.indexOf(" / pair"); - var rows = str.slice(i, j).split(/]*>/); - var R = 0, C = 0; - var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; - for(i = 0; i < rows.length; ++i) { - if(rows[i].substr(0,3) != ""); - for(j = 0; j < cells.length; ++j) { - if(cells[j].substr(0,3) != "")) > -1) m = m.slice(cc+1); - while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<")); - /* TODO: generate stub cells */ - if(!m.length) continue; - if(range.s.r > R) range.s.r = R; - if(range.e.r < R) range.e.r = R; - if(range.s.c > C) range.s.c = C; - if(range.e.c < C) range.e.c = C; - if(opts.dense) { - if(!ws[R]) ws[R] = []; - if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; - else ws[R][C] = {t:'s', v:m}; - } else { - var coord = encode_cell({r:R, c:C}); - /* TODO: value parsing */ - if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; - else ws[coord] = {t:'s', v:m}; +var HTML_ = (function() { + function html_to_sheet(str, _opts) { + var opts = _opts || {}; + if(DENSE != null && opts.dense == null) opts.dense = DENSE; + var ws = opts.dense ? ([]) : ({}); + var i = str.indexOf(" / pair"); + var rows = str.slice(i, j).split(/(:?]*>)/); + var R = -1, C = 0, RS = 0, CS = 0; + var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}}; + var merges = [], midx = 0; + for(i = 0; i < rows.length; ++i) { + var row = rows[i].trim(); + if(row.substr(0,3) == ""); + for(j = 0; j < cells.length; ++j) { + var cell = cells[j].trim(); + if(cell.substr(0,3) != "")) > -1) m = m.slice(cc+1); + while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<")); + var tag = parsexmltag(cell.slice(0, cell.indexOf(">"))); + CS = tag.colspan ? +tag.colspan : 1; + if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + /* TODO: generate stub cells */ + if(!m.length) { C += CS; continue; } + m = unescapexml(m).replace(/[\r\n]/g,""); + if(range.s.r > R) range.s.r = R; + if(range.e.r < R) range.e.r = R; + if(range.s.c > C) range.s.c = C; + if(range.e.c < C) range.e.c = C; + if(opts.dense) { + if(!ws[R]) ws[R] = []; + if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m}; + else ws[R][C] = {t:'s', v:m}; + } else { + var coord = encode_cell({r:R, c:C}); + /* TODO: value parsing */ + if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m}; + else ws[coord] = {t:'s', v:m}; + } + C += CS; } } - ++R; C = 0; + ws['!ref'] = encode_range(range); + return ws; } - ws['!ref'] = encode_range(range); - return o; -} + function html_to_book(str, opts) { + return sheet_to_workbook(html_to_sheet(str, opts), opts); + } + function sheet_to_html(ws, opts) { + var o = []; + var r = decode_range(ws['!ref']), cell; + var dense = Array.isArray(ws); + var M = (ws['!merges'] ||[]); + for(var R = r.s.r; R <= r.e.r; ++R) { + var oo = []; + for(var C = r.s.c; C <= r.e.c; ++C) { + var RS = 0, CS = 0; + for(var j = 0; j < M.length; ++j) { + if(M[j].s.r > R || M[j].s.c > C) continue; + if(M[j].e.r < R || M[j].e.c < C) continue; + if(M[j].s.r < R || M[j].s.c < C) { RS = -1; break; } + RS = M[j].e.r - M[j].s.r + 1; CS = M[j].e.c - M[j].s.c + 1; break; + } + if(RS < 0) continue; + var coord = encode_cell({r:R,c:C}); + cell = dense ? (ws[R]||[])[C] : ws[coord]; + if(!cell || cell.v == null) { oo.push(""); continue; } + /* TODO: html entities */ + var w = cell.h || escapexml(cell.w || (format_cell(cell), cell.w) || ""); + var sp = {}; + if(RS > 1) sp.rowspan = RS; + if(CS > 1) sp.colspan = CS; + oo.push(writextag('td', w, sp)); + } + o.push("" + oo.join("") + ""); + } + return "" + o.join("") + "
"; + } + + return { + to_workbook: html_to_book, + to_sheet: html_to_sheet, + from_sheet: sheet_to_html + }; +})(); function parse_dom_table(table, _opts) { var opts = _opts || {}; @@ -15055,7 +15107,7 @@ function parse_dom_table(table, _opts) { } /* TODO: figure out how to extract nonstandard mso- style */ CS = +elt.getAttribute("colspan") || 1; - if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}}); + if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o = {t:'s', v:v}; if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)}; if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; } @@ -15591,6 +15643,7 @@ function write_obj_str(factory) { }; } +var write_htm_str = write_obj_str(HTML_); var write_csv_str = write_obj_str({from_sheet:sheet_to_csv}); var write_slk_str = write_obj_str(SYLK); var write_dif_str = write_obj_str(DIF); @@ -16148,6 +16201,7 @@ function writeSync(wb, opts) { case 'xlml': return write_string_type(write_xlml(wb, o), o); case 'slk': case 'sylk': return write_string_type(write_slk_str(wb, o), o); + case 'html': return write_string_type(write_htm_str(wb, o), o); case 'txt': return write_bstr_type(write_txt_str(wb, o), o); case 'csv': return write_string_type(write_csv_str(wb, o), o); case 'dif': return write_string_type(write_dif_str(wb, o), o); @@ -16170,6 +16224,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) { case '.fods': o.bookType = 'fods'; break; case '.xlml': o.bookType = 'xlml'; break; case '.sylk': o.bookType = 'sylk'; break; + case '.html': o.bookType = 'html'; break; case '.xls': o.bookType = 'biff2'; break; case '.xml': o.bookType = 'xml'; break; case '.ods': o.bookType = 'ods'; break; @@ -16178,6 +16233,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) { case '.dif': o.bookType = 'dif'; break; case '.prn': o.bookType = 'prn'; break; case '.slk': o.bookType = 'sylk'; break; + case '.htm': o.bookType = 'html'; break; } } @@ -16347,42 +16403,47 @@ function sheet_to_json(sheet, opts){ return out; } +var qreg = /"/g; +function make_csv_row(sheet, r, R, cols, fs, rs, FS, o) { + var isempty = true; + var row = "", txt = "", rr = encode_row(R); + for(var C = r.s.c; C <= r.e.c; ++C) { + var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; + if(val == null) txt = ""; + else if(val.v != null) { + isempty = false; + txt = ''+format_cell(val, null, o); + for(var i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { + txt = "\"" + txt.replace(qreg, '""') + "\""; break; } + } else if(val.f != null && !val.F) { + isempty = false; + txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; + } else txt = ""; + /* NOTE: Excel CSV does not support array formulae */ + row += (C === r.s.c ? "" : FS) + txt; + } + if(o.blankrows === false && isempty) return null; + return row; +} + function sheet_to_csv(sheet, opts) { - var out = "", txt = "", qreg = /"/g; + var out = ""; var o = opts == null ? {} : opts; if(sheet == null || sheet["!ref"] == null) return ""; var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$"); - var row = "", rr = "", cols = []; - var i = 0, cc = 0, val; - var R = 0, C = 0; - var dense = Array.isArray(sheet); - for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); - for(R = r.s.r; R <= r.e.r; ++R) { - var isempty = true; - row = ""; - rr = encode_row(R); - for(C = r.s.c; C <= r.e.c; ++C) { - val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; - if(val == null) txt = ""; - else if(val.v != null) { - isempty = false; - txt = ''+format_cell(val, null, o); - for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { - txt = "\"" + txt.replace(qreg, '""') + "\""; break; } - } else if(val.f != null && !val.F) { - isempty = false; - txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; - } else txt = ""; - /* NOTE: Excel CSV does not support array formulae */ - row += (C === r.s.c ? "" : FS) + txt; - } - if(o.blankrows === false && isempty) continue; + var row = "", cols = []; + o.dense = Array.isArray(sheet); + for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); + for(var R = r.s.r; R <= r.e.r; ++R) { + row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o); + if(row == null) { continue; } if(o.strip) row = row.replace(endregex,""); out += row + RS; } + delete o.dense; return out; } @@ -16458,45 +16519,26 @@ if(has_buf && typeof require != 'undefined') (function() { var write_csv_stream = function(sheet, opts) { var stream = Readable(); - var out = "", txt = "", qreg = /"/g; + var out = ""; var o = opts == null ? {} : opts; if(sheet == null || sheet["!ref"] == null) { stream.push(null); return stream; } var r = safe_decode_range(sheet["!ref"]); var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0); var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0); var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$"); - var row = "", rr = "", cols = []; - var i = 0, cc = 0, val; - var R = 0, C = 0; - var dense = Array.isArray(sheet); - for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); - R = r.s.r; + var row = "", cols = []; + o.dense = Array.isArray(sheet); + for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C); + var R = r.s.r; stream._read = function() { if(R > r.e.r) return stream.push(null); - while(true) { - var isempty = true; - row = ""; - rr = encode_row(R); - for(C = r.s.c; C <= r.e.c; ++C) { - val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr]; - if(val == null) txt = ""; - else if(val.v != null) { - isempty = false; - txt = ''+format_cell(val, null, o); - for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) { - txt = "\"" + txt.replace(qreg, '""') + "\""; break; } - } else if(val.f != null && !val.F) { - isempty = false; - txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"'; - } else txt = ""; - /* NOTE: Excel CSV does not support array formulae */ - row += (C === r.s.c ? "" : FS) + txt; - } - if(o.blankrows === false && isempty) { ++R; continue; } - if(o.strip) row = row.replace(endregex,""); - stream.push(row + RS); - ++R; - break; + while(R <= r.e.r) { + row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o); + if(row == null) { ++R; continue; } + if(o.strip) row = row.replace(endregex,""); + stream.push(row + RS); + ++R; + break; } }; return stream;