HTML Write support
- 'html' bookType write format - basic HTML entity encoding (fixes #629 h/t @xkr47) - HTML string and table merge cell fixes - doc notes on nodejs streaming
This commit is contained in:
parent
616d2e534f
commit
1d61054602
68
README.md
68
README.md
@ -29,8 +29,10 @@ with a unified JS representation, and ES3/ES5 browser compatibility back to IE6.
|
||||
* [Optional Modules](#optional-modules)
|
||||
* [ECMAScript 5 Compatibility](#ecmascript-5-compatibility)
|
||||
- [Parsing Workbooks](#parsing-workbooks)
|
||||
* [Note on Streaming Read](#note-on-streaming-read)
|
||||
- [Working with the Workbook](#working-with-the-workbook)
|
||||
- [Writing Workbooks](#writing-workbooks)
|
||||
* [Streaming Write](#streaming-write)
|
||||
- [Interface](#interface)
|
||||
* [Parsing functions](#parsing-functions)
|
||||
* [Writing functions](#writing-functions)
|
||||
@ -298,6 +300,39 @@ function handleFile(e) {
|
||||
input_dom_element.addEventListener('change', handleFile, false);
|
||||
```
|
||||
|
||||
**Complete examples:**
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/> HTML5 File API / Base64 Text / Web Workers
|
||||
|
||||
Note that older versions of IE do not support HTML5 File API, so the base64 mode
|
||||
is used for testing. On OSX you can get the base64 encoding with:
|
||||
|
||||
```bash
|
||||
$ <target_file base64 | pbcopy
|
||||
```
|
||||
|
||||
On Windows XP and up you can get the base64 encoding using `certutil`:
|
||||
|
||||
```cmd
|
||||
> certutil -encode target_file target_file.b64
|
||||
```
|
||||
|
||||
(note: You have to open the file and remove the header and footer lines)
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/ajax.html> XMLHttpRequest
|
||||
|
||||
### Note on Streaming Read
|
||||
|
||||
The most common and interesting formats (XLS, XLSX/M, XLSB, ODS) are ultimately
|
||||
ZIP or CFB containers of files. Neither format puts the directory structure at
|
||||
the beginning of the file: ZIP files place the Central Directory records at the
|
||||
end of the logical file, while CFB files can place the FAT structure anywhere in
|
||||
the file! As a result, to properly handle these formats, a streaming function
|
||||
would have to buffer the entire file before commencing. That belies the
|
||||
expectations of streaming, so we do not provide any streaming read API. If you
|
||||
really want to stream, there are node modules like `concat-stream` that will do
|
||||
the buffering for you.
|
||||
|
||||
## Working with the Workbook
|
||||
|
||||
The full object format is described later in this README.
|
||||
@ -320,25 +355,6 @@ var desired_value = (desired_cell ? desired_cell.v : undefined);
|
||||
|
||||
**Complete examples:**
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/> HTML5 File API / Base64 Text / Web Workers
|
||||
|
||||
Note that older versions of IE do not support HTML5 File API, so the base64 mode
|
||||
is used for testing. On OSX you can get the base64 encoding with:
|
||||
|
||||
```bash
|
||||
$ <target_file base64 | pbcopy
|
||||
```
|
||||
|
||||
On Windows XP and up you can get the base64 encoding using `certutil`:
|
||||
|
||||
```cmd
|
||||
> certutil -encode target_file target_file.b64
|
||||
```
|
||||
|
||||
(note: You have to open the file and remove the header and footer lines)
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/ajax.html> XMLHttpRequest
|
||||
|
||||
- <https://github.com/SheetJS/js-xlsx/blob/master/bin/xlsx.njs> node
|
||||
|
||||
The node version installs a command line tool `xlsx` which can read spreadsheet
|
||||
@ -392,6 +408,12 @@ saveAs(new Blob([s2ab(wbout)],{type:"application/octet-stream"}), "test.xlsx");
|
||||
- <http://git.io/WEK88Q> writing an array of arrays in nodejs
|
||||
- <http://sheetjs.com/demos/table.html> exporting an HTML table
|
||||
|
||||
### Streaming Write
|
||||
|
||||
`XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. It
|
||||
takes the same arguments but returns a readable stream.
|
||||
|
||||
<https://github.com/sheetjs/sheetaki> pipes CSV write stream to nodejs response.
|
||||
## Interface
|
||||
|
||||
`XLSX` is the exposed variable in the browser and the exported node variable
|
||||
@ -769,7 +791,7 @@ worksheet['C1'] = { t:'n', f: "SUM(A1:A3*B1:B3)", F:"C1:C1" };
|
||||
```
|
||||
|
||||
For a multi-cell array formula, every cell has the same array range but only the
|
||||
first cell has content. Consider `D1:D3=A1:A3*B1:B3`:
|
||||
first cell specifies the formula. Consider `D1:D3=A1:A3*B1:B3`:
|
||||
|
||||
```js
|
||||
worksheet['D1'] = { t:'n', F:"D1:D3", f:"A1:A3*B1:B3" };
|
||||
@ -977,7 +999,8 @@ Plaintext format guessing follows the priority order:
|
||||
|
||||
| Format | Test |
|
||||
|:-------|:--------------------------------------------------------------------|
|
||||
| XML | starts with < |
|
||||
| HTML | starts with \<html |
|
||||
| XML | starts with \< |
|
||||
| DSV | starts with `/sep=.$/`, separator is the specified character |
|
||||
| TSV | one of the first 1024 characters is a tab char `"\t"` |
|
||||
| CSV | one of the first 1024 characters is a comma char `","` |
|
||||
@ -1024,6 +1047,7 @@ output formats. The specific file type is controlled with `bookType` option:
|
||||
| `csv` | `.csv` | none | single | Comma Separated Values |
|
||||
| `txt` | `.txt` | none | single | UTF-16 Unicode Text (TXT) |
|
||||
| `sylk` | `.sylk` | none | single | Symbolic Link (SYLK) |
|
||||
| `html` | `.html` | none | single | HTML Document |
|
||||
| `dif` | `.dif` | none | single | Data Interchange Format (DIF) |
|
||||
| `prn` | `.prn` | none | single | Lotus Formatted Text |
|
||||
|
||||
@ -1277,7 +1301,7 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats:
|
||||
| Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123) | :o: | |
|
||||
| Quattro Pro Spreadsheet (WQ1/WQ2/WB1/WB2/WB3/QPW) | :o: | |
|
||||
| **Other Common Spreadsheet Output Formats** |:-----:|:-----:|
|
||||
| HTML Tables | :o: | |
|
||||
| HTML Tables | :o: | :o: |
|
||||
|
||||
### Excel 2007+ XML (XLSX/XLSM)
|
||||
|
||||
|
@ -27,6 +27,7 @@ program
|
||||
.option('-j, --json', 'emit formatted JSON (all fields text)')
|
||||
.option('-J, --raw-js', 'emit raw JS object (raw numbers)')
|
||||
.option('-A, --arrays', 'emit rows as JS objects (raw numbers)')
|
||||
.option('-H, --html', 'emit HTML')
|
||||
.option('-D, --dif', 'emit data interchange format (dif)')
|
||||
.option('-K, --sylk', 'emit symbolic link (sylk)')
|
||||
.option('-P, --prn', 'emit formatted text (prn)')
|
||||
@ -160,6 +161,7 @@ if(program.readOnly) process.exit(0);
|
||||
[
|
||||
['biff2', '.xls'],
|
||||
['sylk', '.slk'],
|
||||
['html', '.html'],
|
||||
['prn', '.prn'],
|
||||
['txt', '.txt'],
|
||||
['dif', '.dif']
|
||||
|
@ -1,4 +1,4 @@
|
||||
var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g;
|
||||
var attregexg=/([^"\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:')|([^'">\s]+))/g;
|
||||
var tagregex=/<[^>]*>/g;
|
||||
var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/;
|
||||
function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ {
|
||||
@ -7,11 +7,13 @@ function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ {
|
||||
for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break;
|
||||
if(!skip_root) z[0] = tag.substr(0, eq);
|
||||
if(eq === tag.length) return z;
|
||||
var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="";
|
||||
var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="", quot = 1;
|
||||
if(m) for(i = 0; i != m.length; ++i) {
|
||||
cc = m[i];
|
||||
for(c=0; c != cc.length; ++c) if(cc.charCodeAt(c) === 61) break;
|
||||
q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1);
|
||||
q = cc.substr(0,c);
|
||||
quot = ((eq=cc.charCodeAt(c+1)) == 34 || eq == 39) ? 1 : 0;
|
||||
v = cc.substring(c+1+quot, cc.length-quot);
|
||||
for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break;
|
||||
if(j===q.length) {
|
||||
if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods
|
||||
@ -54,9 +56,10 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{
|
||||
}
|
||||
function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); }
|
||||
|
||||
var htmlcharegex = /[\u0000-\u001f]/g;
|
||||
function escapehtml(text){
|
||||
var s = text + '';
|
||||
return s.replace(decregex, function(y) { return rencoding[y]; });
|
||||
return s.replace(decregex, function(y) { return rencoding[y]; }).replace(htmlcharegex,function(s) { return "&#x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + ";"; });
|
||||
}
|
||||
|
||||
/* TODO: handle codepages */
|
||||
|
@ -28,7 +28,7 @@ var WK_ = (function() {
|
||||
if(!d) return d;
|
||||
var o = opts || {};
|
||||
if(DENSE != null && o.dense == null) o.dense = DENSE;
|
||||
var s = (o.dense ? [] : {}), n = "Sheet1", sidx = 0;
|
||||
var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/), n = "Sheet1", sidx = 0;
|
||||
var sheets = {}, snames = [n];
|
||||
|
||||
var refguess = {s: {r:0, c:0}, e: {r:0, c:0} };
|
||||
|
@ -22,8 +22,8 @@ function parse_sst_bin(data, opts)/*:SST*/ {
|
||||
pass = false; break;
|
||||
|
||||
default:
|
||||
if(R_n.indexOf("Begin") > 0) state.push(R_n);
|
||||
else if(R_n.indexOf("End") > 0) state.pop();
|
||||
if(R_n.indexOf("Begin") > 0){}
|
||||
else if(R_n.indexOf("End") > 0){}
|
||||
if(!pass || opts.WTF) throw new Error("Unexpected record " + RT + " " + R_n);
|
||||
}
|
||||
});
|
||||
|
@ -1,7 +1,7 @@
|
||||
/* TODO: it will be useful to parse the function str */
|
||||
var rc_to_a1 = (function(){
|
||||
var rcregex = /(^|[^A-Za-z])R(\[?)(-?\d+|)\]?C(\[?)(-?\d+|)\]?/g;
|
||||
var rcbase;
|
||||
var rcbase/*:Cell*/ = ({r:0,c:0}/*:any*/);
|
||||
function rcfunc($$,$1,$2,$3,$4,$5) {
|
||||
var R = $3.length>0?parseInt($3,10)|0:0, C = $5.length>0?parseInt($5,10)|0:0;
|
||||
if(C<0 && $4.length === 0) C=0;
|
||||
@ -10,7 +10,7 @@ var rc_to_a1 = (function(){
|
||||
if($2.length > 0 || $3.length == 0) rRel = true; if(rRel) R += rcbase.r; else --R;
|
||||
return $1 + (cRel ? "" : "$") + encode_col(C) + (rRel ? "" : "$") + encode_row(R);
|
||||
}
|
||||
return function rc_to_a1(fstr, base) {
|
||||
return function rc_to_a1(fstr/*:string*/, base/*:Cell*/)/*:string*/ {
|
||||
rcbase = base;
|
||||
return fstr.replace(rcregex, rcfunc);
|
||||
};
|
||||
|
@ -160,6 +160,8 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ {
|
||||
var o = [XML_HEADER];
|
||||
o[o.length] = WB_XML_ROOT;
|
||||
|
||||
var write_names = (wb.Workbook && (wb.Workbook.Names||[]).length > 0);
|
||||
|
||||
/* fileVersion */
|
||||
/* fileSharing */
|
||||
|
||||
@ -185,9 +187,9 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ {
|
||||
/* functionGroups */
|
||||
/* externalReferences */
|
||||
|
||||
if(wb.Workbook && (wb.Workbook.Names||[]).length > 0) {
|
||||
if(write_names) {
|
||||
o[o.length] = "<definedNames>";
|
||||
wb.Workbook.Names.forEach(function(n) {
|
||||
if(wb.Workbook && wb.Workbook.Names) wb.Workbook.Names.forEach(function(n) {
|
||||
var d = {name:n.Name};
|
||||
if(n.Comment) d.comment = n.Comment;
|
||||
if(n.Sheet != null) d.localSheetId = ""+n.Sheet;
|
||||
|
@ -171,7 +171,7 @@ function parse_xlml_xml(d, opts)/*:Workbook*/ {
|
||||
make_ssf(SSF);
|
||||
var str = debom(xlml_normalize(d));
|
||||
if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str));
|
||||
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
|
||||
if(str.substr(0,1000).indexOf("<html") >= 0) return HTML_.to_workbook(str, opts);
|
||||
var Rn;
|
||||
var state = [], tmp;
|
||||
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
|
||||
|
127
bits/79_html.js
127
bits/79_html.js
@ -1,46 +1,93 @@
|
||||
/* TODO: in browser attach to DOM; in node use an html parser */
|
||||
function parse_html(str/*:string*/, _opts)/*:Workbook*/ {
|
||||
var opts = _opts || {};
|
||||
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
|
||||
var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/);
|
||||
var o/*:Workbook*/ = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
|
||||
var i = str.indexOf("<table"), j = str.indexOf("</table");
|
||||
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
|
||||
var rows = str.slice(i, j).split(/<tr[^>]*>/);
|
||||
var R = 0, C = 0;
|
||||
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
|
||||
for(i = 0; i < rows.length; ++i) {
|
||||
if(rows[i].substr(0,3) != "<td") continue;
|
||||
var cells = rows[i].split("</td>");
|
||||
for(j = 0; j < cells.length; ++j) {
|
||||
if(cells[j].substr(0,3) != "<td") continue;
|
||||
++C;
|
||||
var m = cells[j], cc = 0;
|
||||
/* TODO: parse styles etc */
|
||||
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
|
||||
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
|
||||
/* TODO: generate stub cells */
|
||||
if(!m.length) continue;
|
||||
if(range.s.r > R) range.s.r = R;
|
||||
if(range.e.r < R) range.e.r = R;
|
||||
if(range.s.c > C) range.s.c = C;
|
||||
if(range.e.c < C) range.e.c = C;
|
||||
if(opts.dense) {
|
||||
if(!ws[R]) ws[R] = [];
|
||||
if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m};
|
||||
else ws[R][C] = {t:'s', v:m};
|
||||
} else {
|
||||
var coord/*:string*/ = encode_cell({r:R, c:C});
|
||||
/* TODO: value parsing */
|
||||
if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m};
|
||||
else ws[coord] = {t:'s', v:m};
|
||||
var HTML_ = (function() {
|
||||
function html_to_sheet(str/*:string*/, _opts)/*:Workbook*/ {
|
||||
var opts = _opts || {};
|
||||
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
|
||||
var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/);
|
||||
var i = str.indexOf("<table"), j = str.indexOf("</table");
|
||||
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
|
||||
var rows = str.slice(i, j).split(/(:?<tr[^>]*>)/);
|
||||
var R = -1, C = 0, RS = 0, CS = 0;
|
||||
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
|
||||
var merges = [], midx = 0;
|
||||
for(i = 0; i < rows.length; ++i) {
|
||||
var row = rows[i].trim();
|
||||
if(row.substr(0,3) == "<tr") { ++R; C = 0; continue; }
|
||||
if(row.substr(0,3) != "<td") continue;
|
||||
var cells = row.split("</td>");
|
||||
for(j = 0; j < cells.length; ++j) {
|
||||
var cell = cells[j].trim();
|
||||
if(cell.substr(0,3) != "<td") continue;
|
||||
var m = cell, cc = 0;
|
||||
/* TODO: parse styles etc */
|
||||
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
|
||||
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
|
||||
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
|
||||
CS = tag.colspan ? +tag.colspan : 1;
|
||||
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
|
||||
/* TODO: generate stub cells */
|
||||
if(!m.length) { C += CS; continue; }
|
||||
m = unescapexml(m).replace(/[\r\n]/g,"");
|
||||
if(range.s.r > R) range.s.r = R;
|
||||
if(range.e.r < R) range.e.r = R;
|
||||
if(range.s.c > C) range.s.c = C;
|
||||
if(range.e.c < C) range.e.c = C;
|
||||
if(opts.dense) {
|
||||
if(!ws[R]) ws[R] = [];
|
||||
if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m};
|
||||
else ws[R][C] = {t:'s', v:m};
|
||||
} else {
|
||||
var coord/*:string*/ = encode_cell({r:R, c:C});
|
||||
/* TODO: value parsing */
|
||||
if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m};
|
||||
else ws[coord] = {t:'s', v:m};
|
||||
}
|
||||
C += CS;
|
||||
}
|
||||
}
|
||||
++R; C = 0;
|
||||
ws['!ref'] = encode_range(range);
|
||||
return ws;
|
||||
}
|
||||
ws['!ref'] = encode_range(range);
|
||||
return o;
|
||||
}
|
||||
function html_to_book(str/*:string*/, opts)/*:Workbook*/ {
|
||||
return sheet_to_workbook(html_to_sheet(str, opts), opts);
|
||||
}
|
||||
function sheet_to_html(ws/*:Worksheet*/, opts)/*:string*/ {
|
||||
var o/*:Array<string>*/ = [];
|
||||
var r = decode_range(ws['!ref']), cell/*:Cell*/;
|
||||
var dense = Array.isArray(ws);
|
||||
var M = (ws['!merges'] ||[]);
|
||||
for(var R = r.s.r; R <= r.e.r; ++R) {
|
||||
var oo = [];
|
||||
for(var C = r.s.c; C <= r.e.c; ++C) {
|
||||
var RS = 0, CS = 0;
|
||||
for(var j = 0; j < M.length; ++j) {
|
||||
if(M[j].s.r > R || M[j].s.c > C) continue;
|
||||
if(M[j].e.r < R || M[j].e.c < C) continue;
|
||||
if(M[j].s.r < R || M[j].s.c < C) { RS = -1; break; }
|
||||
RS = M[j].e.r - M[j].s.r + 1; CS = M[j].e.c - M[j].s.c + 1; break;
|
||||
}
|
||||
if(RS < 0) continue;
|
||||
var coord = encode_cell({r:R,c:C});
|
||||
cell = dense ? (ws[R]||[])[C] : ws[coord];
|
||||
if(!cell || cell.v == null) { oo.push("<td></td>"); continue; }
|
||||
/* TODO: html entities */
|
||||
var w = cell.h || escapexml(cell.w || (format_cell(cell), cell.w) || "");
|
||||
var sp = {};
|
||||
if(RS > 1) sp.rowspan = RS;
|
||||
if(CS > 1) sp.colspan = CS;
|
||||
oo.push(writextag('td', w, sp));
|
||||
}
|
||||
o.push("<tr>" + oo.join("") + "</tr>");
|
||||
}
|
||||
return "<html><body><table>" + o.join("") + "</table></body></html>";
|
||||
}
|
||||
|
||||
return {
|
||||
to_workbook: html_to_book,
|
||||
to_sheet: html_to_sheet,
|
||||
from_sheet: sheet_to_html
|
||||
};
|
||||
})();
|
||||
|
||||
function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
|
||||
var opts = _opts || {};
|
||||
@ -61,7 +108,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
|
||||
}
|
||||
/* TODO: figure out how to extract nonstandard mso- style */
|
||||
CS = +elt.getAttribute("colspan") || 1;
|
||||
if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}});
|
||||
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
|
||||
var o = {t:'s', v:v};
|
||||
if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)};
|
||||
if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
|
||||
|
@ -8,6 +8,7 @@ function write_obj_str(factory/*:WriteObjStrFactory*/) {
|
||||
};
|
||||
}
|
||||
|
||||
var write_htm_str = write_obj_str(HTML_);
|
||||
var write_csv_str = write_obj_str({from_sheet:sheet_to_csv});
|
||||
var write_slk_str = write_obj_str(SYLK);
|
||||
var write_dif_str = write_obj_str(DIF);
|
||||
|
@ -64,6 +64,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) {
|
||||
case 'xlml': return write_string_type(write_xlml(wb, o), o);
|
||||
case 'slk':
|
||||
case 'sylk': return write_string_type(write_slk_str(wb, o), o);
|
||||
case 'html': return write_string_type(write_htm_str(wb, o), o);
|
||||
case 'txt': return write_bstr_type(write_txt_str(wb, o), o);
|
||||
case 'csv': return write_string_type(write_csv_str(wb, o), o);
|
||||
case 'dif': return write_string_type(write_dif_str(wb, o), o);
|
||||
@ -86,6 +87,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) {
|
||||
case '.fods': o.bookType = 'fods'; break;
|
||||
case '.xlml': o.bookType = 'xlml'; break;
|
||||
case '.sylk': o.bookType = 'sylk'; break;
|
||||
case '.html': o.bookType = 'html'; break;
|
||||
case '.xls': o.bookType = 'biff2'; break;
|
||||
case '.xml': o.bookType = 'xml'; break;
|
||||
case '.ods': o.bookType = 'ods'; break;
|
||||
@ -94,6 +96,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) {
|
||||
case '.dif': o.bookType = 'dif'; break;
|
||||
case '.prn': o.bookType = 'prn'; break;
|
||||
case '.slk': o.bookType = 'sylk'; break;
|
||||
case '.htm': o.bookType = 'html'; break;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -154,42 +154,47 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
|
||||
return out;
|
||||
}
|
||||
|
||||
function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
|
||||
var out = "", txt = "", qreg = /"/g;
|
||||
var qreg = /"/g;
|
||||
function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array<string>*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ {
|
||||
var isempty = true;
|
||||
var row = "", txt = "", rr = encode_row(R);
|
||||
for(var C = r.s.c; C <= r.e.c; ++C) {
|
||||
var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr];
|
||||
if(val == null) txt = "";
|
||||
else if(val.v != null) {
|
||||
isempty = false;
|
||||
txt = ''+format_cell(val, null, o);
|
||||
for(var i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) {
|
||||
txt = "\"" + txt.replace(qreg, '""') + "\""; break; }
|
||||
} else if(val.f != null && !val.F) {
|
||||
isempty = false;
|
||||
txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"';
|
||||
} else txt = "";
|
||||
/* NOTE: Excel CSV does not support array formulae */
|
||||
row += (C === r.s.c ? "" : FS) + txt;
|
||||
}
|
||||
if(o.blankrows === false && isempty) return null;
|
||||
return row;
|
||||
}
|
||||
|
||||
function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/)/*:string*/ {
|
||||
var out = "";
|
||||
var o = opts == null ? {} : opts;
|
||||
if(sheet == null || sheet["!ref"] == null) return "";
|
||||
var r = safe_decode_range(sheet["!ref"]);
|
||||
var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0);
|
||||
var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0);
|
||||
var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$");
|
||||
var row = "", rr = "", cols = [];
|
||||
var i = 0, cc = 0, val;
|
||||
var R = 0, C = 0;
|
||||
var dense = Array.isArray(sheet);
|
||||
for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C);
|
||||
for(R = r.s.r; R <= r.e.r; ++R) {
|
||||
var isempty = true;
|
||||
row = "";
|
||||
rr = encode_row(R);
|
||||
for(C = r.s.c; C <= r.e.c; ++C) {
|
||||
val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr];
|
||||
if(val == null) txt = "";
|
||||
else if(val.v != null) {
|
||||
isempty = false;
|
||||
txt = ''+format_cell(val, null, o);
|
||||
for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) {
|
||||
txt = "\"" + txt.replace(qreg, '""') + "\""; break; }
|
||||
} else if(val.f != null && !val.F) {
|
||||
isempty = false;
|
||||
txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"';
|
||||
} else txt = "";
|
||||
/* NOTE: Excel CSV does not support array formulae */
|
||||
row += (C === r.s.c ? "" : FS) + txt;
|
||||
}
|
||||
if(o.blankrows === false && isempty) continue;
|
||||
var row = "", cols = [];
|
||||
o.dense = Array.isArray(sheet);
|
||||
for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C);
|
||||
for(var R = r.s.r; R <= r.e.r; ++R) {
|
||||
row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o);
|
||||
if(row == null) { continue; }
|
||||
if(o.strip) row = row.replace(endregex,"");
|
||||
out += row + RS;
|
||||
}
|
||||
delete o.dense;
|
||||
return out;
|
||||
}
|
||||
|
||||
|
@ -3,45 +3,26 @@ if(has_buf && typeof require != 'undefined') (function() {
|
||||
|
||||
var write_csv_stream = function(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
|
||||
var stream = Readable();
|
||||
var out = "", txt = "", qreg = /"/g;
|
||||
var out = "";
|
||||
var o = opts == null ? {} : opts;
|
||||
if(sheet == null || sheet["!ref"] == null) { stream.push(null); return stream; }
|
||||
var r = safe_decode_range(sheet["!ref"]);
|
||||
var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0);
|
||||
var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0);
|
||||
var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$");
|
||||
var row = "", rr = "", cols = [];
|
||||
var i = 0, cc = 0, val;
|
||||
var R = 0, C = 0;
|
||||
var dense = Array.isArray(sheet);
|
||||
for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C);
|
||||
R = r.s.r;
|
||||
var row = "", cols = [];
|
||||
o.dense = Array.isArray(sheet);
|
||||
for(var C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C);
|
||||
var R = r.s.r;
|
||||
stream._read = function() {
|
||||
if(R > r.e.r) return stream.push(null);
|
||||
while(true) {
|
||||
var isempty = true;
|
||||
row = "";
|
||||
rr = encode_row(R);
|
||||
for(C = r.s.c; C <= r.e.c; ++C) {
|
||||
val = dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr];
|
||||
if(val == null) txt = "";
|
||||
else if(val.v != null) {
|
||||
isempty = false;
|
||||
txt = ''+format_cell(val, null, o);
|
||||
for(i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) {
|
||||
txt = "\"" + txt.replace(qreg, '""') + "\""; break; }
|
||||
} else if(val.f != null && !val.F) {
|
||||
isempty = false;
|
||||
txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"';
|
||||
} else txt = "";
|
||||
/* NOTE: Excel CSV does not support array formulae */
|
||||
row += (C === r.s.c ? "" : FS) + txt;
|
||||
}
|
||||
if(o.blankrows === false && isempty) { ++R; continue; }
|
||||
if(o.strip) row = row.replace(endregex,"");
|
||||
stream.push(row + RS);
|
||||
++R;
|
||||
break;
|
||||
while(R <= r.e.r) {
|
||||
row = make_csv_row(sheet, r, R, cols, fs, rs, FS, o);
|
||||
if(row == null) { ++R; continue; }
|
||||
if(o.strip) row = row.replace(endregex,"");
|
||||
stream.push(row + RS);
|
||||
++R;
|
||||
break;
|
||||
}
|
||||
};
|
||||
return stream;
|
||||
|
@ -124,3 +124,24 @@ function handleFile(e) {
|
||||
input_dom_element.addEventListener('change', handleFile, false);
|
||||
```
|
||||
|
||||
**Complete examples:**
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/> HTML5 File API / Base64 Text / Web Workers
|
||||
|
||||
Note that older versions of IE do not support HTML5 File API, so the base64 mode
|
||||
is used for testing. On OSX you can get the base64 encoding with:
|
||||
|
||||
```bash
|
||||
$ <target_file base64 | pbcopy
|
||||
```
|
||||
|
||||
On Windows XP and up you can get the base64 encoding using `certutil`:
|
||||
|
||||
```cmd
|
||||
> certutil -encode target_file target_file.b64
|
||||
```
|
||||
|
||||
(note: You have to open the file and remove the header and footer lines)
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/ajax.html> XMLHttpRequest
|
||||
|
||||
|
12
docbits/21_readstream.md
Normal file
12
docbits/21_readstream.md
Normal file
@ -0,0 +1,12 @@
|
||||
### Note on Streaming Read
|
||||
|
||||
The most common and interesting formats (XLS, XLSX/M, XLSB, ODS) are ultimately
|
||||
ZIP or CFB containers of files. Neither format puts the directory structure at
|
||||
the beginning of the file: ZIP files place the Central Directory records at the
|
||||
end of the logical file, while CFB files can place the FAT structure anywhere in
|
||||
the file! As a result, to properly handle these formats, a streaming function
|
||||
would have to buffer the entire file before commencing. That belies the
|
||||
expectations of streaming, so we do not provide any streaming read API. If you
|
||||
really want to stream, there are node modules like `concat-stream` that will do
|
||||
the buffering for you.
|
||||
|
@ -20,25 +20,6 @@ var desired_value = (desired_cell ? desired_cell.v : undefined);
|
||||
|
||||
**Complete examples:**
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/> HTML5 File API / Base64 Text / Web Workers
|
||||
|
||||
Note that older versions of IE do not support HTML5 File API, so the base64 mode
|
||||
is used for testing. On OSX you can get the base64 encoding with:
|
||||
|
||||
```bash
|
||||
$ <target_file base64 | pbcopy
|
||||
```
|
||||
|
||||
On Windows XP and up you can get the base64 encoding using `certutil`:
|
||||
|
||||
```cmd
|
||||
> certutil -encode target_file target_file.b64
|
||||
```
|
||||
|
||||
(note: You have to open the file and remove the header and footer lines)
|
||||
|
||||
- <http://oss.sheetjs.com/js-xlsx/ajax.html> XMLHttpRequest
|
||||
|
||||
- <https://github.com/SheetJS/js-xlsx/blob/master/bin/xlsx.njs> node
|
||||
|
||||
The node version installs a command line tool `xlsx` which can read spreadsheet
|
||||
|
6
docbits/31_writestream.md
Normal file
6
docbits/31_writestream.md
Normal file
@ -0,0 +1,6 @@
|
||||
### Streaming Write
|
||||
|
||||
`XLSX.stream.to_csv` is the streaming version of `XLSX.utils.sheet_to_csv`. It
|
||||
takes the same arguments but returns a readable stream.
|
||||
|
||||
<https://github.com/sheetjs/sheetaki> pipes CSV write stream to nodejs response.
|
@ -45,7 +45,7 @@ worksheet['C1'] = { t:'n', f: "SUM(A1:A3*B1:B3)", F:"C1:C1" };
|
||||
```
|
||||
|
||||
For a multi-cell array formula, every cell has the same array range but only the
|
||||
first cell has content. Consider `D1:D3=A1:A3*B1:B3`:
|
||||
first cell specifies the formula. Consider `D1:D3=A1:A3*B1:B3`:
|
||||
|
||||
```js
|
||||
worksheet['D1'] = { t:'n', F:"D1:D3", f:"A1:A3*B1:B3" };
|
||||
|
@ -77,7 +77,8 @@ Plaintext format guessing follows the priority order:
|
||||
|
||||
| Format | Test |
|
||||
|:-------|:--------------------------------------------------------------------|
|
||||
| XML | starts with < |
|
||||
| HTML | starts with \<html |
|
||||
| XML | starts with \< |
|
||||
| DSV | starts with `/sep=.$/`, separator is the specified character |
|
||||
| TSV | one of the first 1024 characters is a tab char `"\t"` |
|
||||
| CSV | one of the first 1024 characters is a comma char `","` |
|
||||
|
@ -39,6 +39,7 @@ output formats. The specific file type is controlled with `bookType` option:
|
||||
| `csv` | `.csv` | none | single | Comma Separated Values |
|
||||
| `txt` | `.txt` | none | single | UTF-16 Unicode Text (TXT) |
|
||||
| `sylk` | `.sylk` | none | single | Symbolic Link (SYLK) |
|
||||
| `html` | `.html` | none | single | HTML Document |
|
||||
| `dif` | `.dif` | none | single | Data Interchange Format (DIF) |
|
||||
| `prn` | `.prn` | none | single | Lotus Formatted Text |
|
||||
|
||||
|
@ -27,7 +27,7 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats:
|
||||
| Lotus 1-2-3 (WKS/WK1/WK2/WK3/WK4/123) | :o: | |
|
||||
| Quattro Pro Spreadsheet (WQ1/WQ2/WB1/WB2/WB3/QPW) | :o: | |
|
||||
| **Other Common Spreadsheet Output Formats** |:-----:|:-----:|
|
||||
| HTML Tables | :o: | |
|
||||
| HTML Tables | :o: | :o: |
|
||||
|
||||
### Excel 2007+ XML (XLSX/XLSM)
|
||||
|
||||
|
@ -69,5 +69,6 @@ digraph G {
|
||||
csf -> txt
|
||||
dbf -> csf
|
||||
html -> csf
|
||||
csf -> html
|
||||
}
|
||||
}
|
||||
|
BIN
formats.png
BIN
formats.png
Binary file not shown.
Before Width: | Height: | Size: 168 KiB After Width: | Height: | Size: 168 KiB |
@ -6,8 +6,10 @@
|
||||
* [Optional Modules](README.md#optional-modules)
|
||||
* [ECMAScript 5 Compatibility](README.md#ecmascript-5-compatibility)
|
||||
- [Parsing Workbooks](README.md#parsing-workbooks)
|
||||
* [Note on Streaming Read](README.md#note-on-streaming-read)
|
||||
- [Working with the Workbook](README.md#working-with-the-workbook)
|
||||
- [Writing Workbooks](README.md#writing-workbooks)
|
||||
* [Streaming Write](README.md#streaming-write)
|
||||
- [Interface](README.md#interface)
|
||||
* [Parsing functions](README.md#parsing-functions)
|
||||
* [Writing functions](README.md#writing-functions)
|
||||
|
12
test.js
12
test.js
@ -961,7 +961,7 @@ describe('parse features', function() {
|
||||
var bef = (function() {
|
||||
ws = X.utils.aoa_to_sheet([
|
||||
["a","b","c"],
|
||||
["&","<",">"]
|
||||
["&","<",">","\n"]
|
||||
]);
|
||||
wb = {SheetNames:["Sheet1"],Sheets:{Sheet1:ws}};
|
||||
});
|
||||
@ -972,6 +972,7 @@ describe('parse features', function() {
|
||||
assert.equal(get_cell(wb2.Sheets.Sheet1, "A2").h, "&");
|
||||
assert.equal(get_cell(wb2.Sheets.Sheet1, "B2").h, "<");
|
||||
assert.equal(get_cell(wb2.Sheets.Sheet1, "C2").h, ">");
|
||||
assert.equal(get_cell(wb2.Sheets.Sheet1, "D2").h, "
");
|
||||
}); });
|
||||
});
|
||||
|
||||
@ -1135,6 +1136,15 @@ describe('write features', function() {
|
||||
}); });
|
||||
});
|
||||
});
|
||||
describe('HTML', function() {
|
||||
it('should use `h` value when present', function() {
|
||||
var sheet = X.utils.aoa_to_sheet([["abc"]]);
|
||||
get_cell(sheet, "A1").h = "<b>abc</b>";
|
||||
var wb = {SheetNames:["Sheet1"], Sheets:{Sheet1:sheet}};
|
||||
var str = X.write(wb, {bookType:"html", type:"binary"});
|
||||
assert(str.indexOf("<b>abc</b>") > 0);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
||||
function seq(end, start) {
|
||||
|
262
xlsx.flow.js
262
xlsx.flow.js
@ -1549,7 +1549,7 @@ function resolve_path(path/*:string*/, base/*:string*/)/*:string*/ {
|
||||
}
|
||||
return result.join('/');
|
||||
}
|
||||
var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g;
|
||||
var attregexg=/([^"\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:')|([^'">\s]+))/g;
|
||||
var tagregex=/<[^>]*>/g;
|
||||
var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/;
|
||||
function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ {
|
||||
@ -1558,11 +1558,13 @@ function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ {
|
||||
for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break;
|
||||
if(!skip_root) z[0] = tag.substr(0, eq);
|
||||
if(eq === tag.length) return z;
|
||||
var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="";
|
||||
var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="", quot = 1;
|
||||
if(m) for(i = 0; i != m.length; ++i) {
|
||||
cc = m[i];
|
||||
for(c=0; c != cc.length; ++c) if(cc.charCodeAt(c) === 61) break;
|
||||
q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1);
|
||||
q = cc.substr(0,c);
|
||||
quot = ((eq=cc.charCodeAt(c+1)) == 34 || eq == 39) ? 1 : 0;
|
||||
v = cc.substring(c+1+quot, cc.length-quot);
|
||||
for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break;
|
||||
if(j===q.length) {
|
||||
if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods
|
||||
@ -1605,9 +1607,10 @@ function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{
|
||||
}
|
||||
function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); }
|
||||
|
||||
var htmlcharegex = /[\u0000-\u001f]/g;
|
||||
function escapehtml(text){
|
||||
var s = text + '';
|
||||
return s.replace(decregex, function(y) { return rencoding[y]; });
|
||||
return s.replace(decregex, function(y) { return rencoding[y]; }).replace(htmlcharegex,function(s) { return "&#x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + ";"; });
|
||||
}
|
||||
|
||||
/* TODO: handle codepages */
|
||||
@ -5332,7 +5335,7 @@ var WK_ = (function() {
|
||||
if(!d) return d;
|
||||
var o = opts || {};
|
||||
if(DENSE != null && o.dense == null) o.dense = DENSE;
|
||||
var s = (o.dense ? [] : {}), n = "Sheet1", sidx = 0;
|
||||
var s/*:Worksheet*/ = ((o.dense ? [] : {})/*:any*/), n = "Sheet1", sidx = 0;
|
||||
var sheets = {}, snames = [n];
|
||||
|
||||
var refguess = {s: {r:0, c:0}, e: {r:0, c:0} };
|
||||
@ -5862,8 +5865,8 @@ function parse_sst_bin(data, opts)/*:SST*/ {
|
||||
pass = false; break;
|
||||
|
||||
default:
|
||||
if(R_n.indexOf("Begin") > 0) state.push(R_n);
|
||||
else if(R_n.indexOf("End") > 0) state.pop();
|
||||
if(R_n.indexOf("Begin") > 0){}
|
||||
else if(R_n.indexOf("End") > 0){}
|
||||
if(!pass || opts.WTF) throw new Error("Unexpected record " + RT + " " + R_n);
|
||||
}
|
||||
});
|
||||
@ -7462,7 +7465,7 @@ function parse_ms_xml() { return {'!type':'macro'}; }
|
||||
/* TODO: it will be useful to parse the function str */
|
||||
var rc_to_a1 = (function(){
|
||||
var rcregex = /(^|[^A-Za-z])R(\[?)(-?\d+|)\]?C(\[?)(-?\d+|)\]?/g;
|
||||
var rcbase;
|
||||
var rcbase/*:Cell*/ = ({r:0,c:0}/*:any*/);
|
||||
function rcfunc($$,$1,$2,$3,$4,$5) {
|
||||
var R = $3.length>0?parseInt($3,10)|0:0, C = $5.length>0?parseInt($5,10)|0:0;
|
||||
if(C<0 && $4.length === 0) C=0;
|
||||
@ -7471,7 +7474,7 @@ var rc_to_a1 = (function(){
|
||||
if($2.length > 0 || $3.length == 0) rRel = true; if(rRel) R += rcbase.r; else --R;
|
||||
return $1 + (cRel ? "" : "$") + encode_col(C) + (rRel ? "" : "$") + encode_row(R);
|
||||
}
|
||||
return function rc_to_a1(fstr, base) {
|
||||
return function rc_to_a1(fstr/*:string*/, base/*:Cell*/)/*:string*/ {
|
||||
rcbase = base;
|
||||
return fstr.replace(rcregex, rcfunc);
|
||||
};
|
||||
@ -11527,6 +11530,8 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ {
|
||||
var o = [XML_HEADER];
|
||||
o[o.length] = WB_XML_ROOT;
|
||||
|
||||
var write_names = (wb.Workbook && (wb.Workbook.Names||[]).length > 0);
|
||||
|
||||
/* fileVersion */
|
||||
/* fileSharing */
|
||||
|
||||
@ -11552,9 +11557,9 @@ function write_wb_xml(wb/*:Workbook*/, opts/*:?WriteOpts*/)/*:string*/ {
|
||||
/* functionGroups */
|
||||
/* externalReferences */
|
||||
|
||||
if(wb.Workbook && (wb.Workbook.Names||[]).length > 0) {
|
||||
if(write_names) {
|
||||
o[o.length] = "<definedNames>";
|
||||
wb.Workbook.Names.forEach(function(n) {
|
||||
if(wb.Workbook && wb.Workbook.Names) wb.Workbook.Names.forEach(function(n) {
|
||||
var d = {name:n.Name};
|
||||
if(n.Comment) d.comment = n.Comment;
|
||||
if(n.Sheet != null) d.localSheetId = ""+n.Sheet;
|
||||
@ -12080,7 +12085,7 @@ function parse_xlml_xml(d, opts)/*:Workbook*/ {
|
||||
make_ssf(SSF);
|
||||
var str = debom(xlml_normalize(d));
|
||||
if(opts && opts.type == 'binary' && typeof cptable !== 'undefined') str = cptable.utils.decode(65001, char_codes(str));
|
||||
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
|
||||
if(str.substr(0,1000).indexOf("<html") >= 0) return HTML_.to_workbook(str, opts);
|
||||
var Rn;
|
||||
var state = [], tmp;
|
||||
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
|
||||
@ -15054,48 +15059,95 @@ function write_biff_buf(wb/*:Workbook*/, opts/*:WriteOpts*/) {
|
||||
return ba.end();
|
||||
}
|
||||
/* TODO: in browser attach to DOM; in node use an html parser */
|
||||
function parse_html(str/*:string*/, _opts)/*:Workbook*/ {
|
||||
var opts = _opts || {};
|
||||
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
|
||||
var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/);
|
||||
var o/*:Workbook*/ = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
|
||||
var i = str.indexOf("<table"), j = str.indexOf("</table");
|
||||
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
|
||||
var rows = str.slice(i, j).split(/<tr[^>]*>/);
|
||||
var R = 0, C = 0;
|
||||
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
|
||||
for(i = 0; i < rows.length; ++i) {
|
||||
if(rows[i].substr(0,3) != "<td") continue;
|
||||
var cells = rows[i].split("</td>");
|
||||
for(j = 0; j < cells.length; ++j) {
|
||||
if(cells[j].substr(0,3) != "<td") continue;
|
||||
++C;
|
||||
var m = cells[j], cc = 0;
|
||||
/* TODO: parse styles etc */
|
||||
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
|
||||
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
|
||||
/* TODO: generate stub cells */
|
||||
if(!m.length) continue;
|
||||
if(range.s.r > R) range.s.r = R;
|
||||
if(range.e.r < R) range.e.r = R;
|
||||
if(range.s.c > C) range.s.c = C;
|
||||
if(range.e.c < C) range.e.c = C;
|
||||
if(opts.dense) {
|
||||
if(!ws[R]) ws[R] = [];
|
||||
if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m};
|
||||
else ws[R][C] = {t:'s', v:m};
|
||||
} else {
|
||||
var coord/*:string*/ = encode_cell({r:R, c:C});
|
||||
/* TODO: value parsing */
|
||||
if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m};
|
||||
else ws[coord] = {t:'s', v:m};
|
||||
var HTML_ = (function() {
|
||||
function html_to_sheet(str/*:string*/, _opts)/*:Workbook*/ {
|
||||
var opts = _opts || {};
|
||||
if(DENSE != null && opts.dense == null) opts.dense = DENSE;
|
||||
var ws/*:Worksheet*/ = opts.dense ? ([]/*:any*/) : ({}/*:any*/);
|
||||
var i = str.indexOf("<table"), j = str.indexOf("</table");
|
||||
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
|
||||
var rows = str.slice(i, j).split(/(:?<tr[^>]*>)/);
|
||||
var R = -1, C = 0, RS = 0, CS = 0;
|
||||
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
|
||||
var merges = [], midx = 0;
|
||||
for(i = 0; i < rows.length; ++i) {
|
||||
var row = rows[i].trim();
|
||||
if(row.substr(0,3) == "<tr") { ++R; C = 0; continue; }
|
||||
if(row.substr(0,3) != "<td") continue;
|
||||
var cells = row.split("</td>");
|
||||
for(j = 0; j < cells.length; ++j) {
|
||||
var cell = cells[j].trim();
|
||||
if(cell.substr(0,3) != "<td") continue;
|
||||
var m = cell, cc = 0;
|
||||
/* TODO: parse styles etc */
|
||||
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
|
||||
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
|
||||
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
|
||||
CS = tag.colspan ? +tag.colspan : 1;
|
||||
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
|
||||
/* TODO: generate stub cells */
|
||||
if(!m.length) { C += CS; continue; }
|
||||
m = unescapexml(m).replace(/[\r\n]/g,"");
|
||||
if(range.s.r > R) range.s.r = R;
|
||||
if(range.e.r < R) range.e.r = R;
|
||||
if(range.s.c > C) range.s.c = C;
|
||||
if(range.e.c < C) range.e.c = C;
|
||||
if(opts.dense) {
|
||||
if(!ws[R]) ws[R] = [];
|
||||
if(Number(m) == Number(m)) ws[R][C] = {t:'n', v:+m};
|
||||
else ws[R][C] = {t:'s', v:m};
|
||||
} else {
|
||||
var coord/*:string*/ = encode_cell({r:R, c:C});
|
||||
/* TODO: value parsing */
|
||||
if(Number(m) == Number(m)) ws[coord] = {t:'n', v:+m};
|
||||
else ws[coord] = {t:'s', v:m};
|
||||
}
|
||||
C += CS;
|
||||
}
|
||||
}
|
||||
++R; C = 0;
|
||||
ws['!ref'] = encode_range(range);
|
||||
return ws;
|
||||
}
|
||||
ws['!ref'] = encode_range(range);
|
||||
return o;
|
||||
}
|
||||
function html_to_book(str/*:string*/, opts)/*:Workbook*/ {
|
||||
return sheet_to_workbook(html_to_sheet(str, opts), opts);
|
||||
}
|
||||
function sheet_to_html(ws/*:Worksheet*/, opts)/*:string*/ {
|
||||
var o/*:Array<string>*/ = [];
|
||||
var r = decode_range(ws['!ref']), cell/*:Cell*/;
|
||||
var dense = Array.isArray(ws);
|
||||
var M = (ws['!merges'] ||[]);
|
||||
for(var R = r.s.r; R <= r.e.r; ++R) {
|
||||
var oo = [];
|
||||
for(var C = r.s.c; C <= r.e.c; ++C) {
|
||||
var RS = 0, CS = 0;
|
||||
for(var j = 0; j < M.length; ++j) {
|
||||
if(M[j].s.r > R || M[j].s.c > C) continue;
|
||||
if(M[j].e.r < R || M[j].e.c < C) continue;
|
||||
if(M[j].s.r < R || M[j].s.c < C) { RS = -1; break; }
|
||||
RS = M[j].e.r - M[j].s.r + 1; CS = M[j].e.c - M[j].s.c + 1; break;
|
||||
}
|
||||
if(RS < 0) continue;
|
||||
var coord = encode_cell({r:R,c:C});
|
||||
cell = dense ? (ws[R]||[])[C] : ws[coord];
|
||||
if(!cell || cell.v == null) { oo.push("<td></td>"); continue; }
|
||||
/* TODO: html entities */
|
||||
var w = cell.h || escapexml(cell.w || (format_cell(cell), cell.w) || "");
|
||||
var sp = {};
|
||||
if(RS > 1) sp.rowspan = RS;
|
||||
if(CS > 1) sp.colspan = CS;
|
||||
oo.push(writextag('td', w, sp));
|
||||
}
|
||||
o.push("<tr>" + oo.join("") + "</tr>");
|
||||
}
|
||||
return "<html><body><table>" + o.join("") + "</table></body></html>";
|
||||
}
|
||||
|
||||
return {
|
||||
to_workbook: html_to_book,
|
||||
to_sheet: html_to_sheet,
|
||||
from_sheet: sheet_to_html
|
||||
};
|
||||
})();
|
||||
|
||||
function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
|
||||
var opts = _opts || {};
|
||||
@ -15116,7 +15168,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
|
||||
}
|
||||
/* TODO: figure out how to extract nonstandard mso- style */
|
||||
CS = +elt.getAttribute("colspan") || 1;
|
||||
if((RS = +elt.getAttribute("rowspan"))>0) merges.push({s:{r:R,c:C},e:{r:R + RS - 1, c:C + CS - 1}});
|
||||
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
|
||||
var o = {t:'s', v:v};
|
||||
if(v != null && v.length && !isNaN(Number(v))) o = {t:'n', v:Number(v)};
|
||||
if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
|
||||
@ -15652,6 +15704,7 @@ function write_obj_str(factory/*:WriteObjStrFactory*/) {
|
||||
};
|
||||
}
|
||||
|
||||
var write_htm_str = write_obj_str(HTML_);
|
||||
var write_csv_str = write_obj_str({from_sheet:sheet_to_csv});
|
||||
var write_slk_str = write_obj_str(SYLK);
|
||||
var write_dif_str = write_obj_str(DIF);
|
||||
@ -16213,6 +16266,7 @@ function writeSync(wb/*:Workbook*/, opts/*:?WriteOpts*/) {
|
||||
case 'xlml': return write_string_type(write_xlml(wb, o), o);
|
||||
case 'slk':
|
||||
case 'sylk': return write_string_type(write_slk_str(wb, o), o);
|
||||
case 'html': return write_string_type(write_htm_str(wb, o), o);
|
||||
case 'txt': return write_bstr_type(write_txt_str(wb, o), o);
|
||||
case 'csv': return write_string_type(write_csv_str(wb, o), o);
|
||||
case 'dif': return write_string_type(write_dif_str(wb, o), o);
|
||||
@ -16235,6 +16289,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) {
|
||||
case '.fods': o.bookType = 'fods'; break;
|
||||
case '.xlml': o.bookType = 'xlml'; break;
|
||||
case '.sylk': o.bookType = 'sylk'; break;
|
||||
case '.html': o.bookType = 'html'; break;
|
||||
case '.xls': o.bookType = 'biff2'; break;
|
||||
case '.xml': o.bookType = 'xml'; break;
|
||||
case '.ods': o.bookType = 'ods'; break;
|
||||
@ -16243,6 +16298,7 @@ function resolve_book_type(o/*?WriteFileOpts*/) {
|
||||
case '.dif': o.bookType = 'dif'; break;
|
||||
case '.prn': o.bookType = 'prn'; break;
|
||||
case '.slk': o.bookType = 'sylk'; break;
|
||||
case '.htm': o.bookType = 'html'; break;
|
||||
}
|
||||
}
|
||||
|
||||
@ -16417,42 +16473,47 @@ function sheet_to_json(sheet/*:Worksheet*/, opts/*:?Sheet2JSONOpts*/){
|
||||
return out;
|
||||
}
|
||||
|
||||
function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/) {
|
||||
var out = "", txt = "", qreg = /"/g;
|
||||
var qreg = /"/g;
|
||||
function make_csv_row(sheet/*:Worksheet*/, r/*:Range*/, R/*:number*/, cols/*:Array<string>*/, fs/*:number*/, rs/*:number*/, FS/*:string*/, o/*:Sheet2CSVOpts*/)/*:?string*/ {
|
||||
var isempty = true;
|
||||
var row = "", txt = "", rr = encode_row(R);
|
||||
for(var C = r.s.c; C <= r.e.c; ++C) {
|
||||
var val = o.dense ? (sheet[R]||[])[C]: sheet[cols[C] + rr];
|
||||
if(val == null) txt = "";
|
||||
else if(val.v != null) {
|
||||
isempty = false;
|
||||
txt = ''+format_cell(val, null, o);
|
||||
for(var i = 0, cc = 0; i !== txt.length; ++i) if((cc = txt.charCodeAt(i)) === fs || cc === rs || cc === 34) {
|
||||
txt = "\"" + txt.replace(qreg, '""') + "\""; break; }
|
||||
} else if(val.f != null && !val.F) {
|
||||
isempty = false;
|
||||
txt = '=' + val.f; if(txt.indexOf(",") >= 0) txt = '"' + txt.replace(qreg, '""') + '"';
|
||||
} else txt = "";
|
||||
/* NOTE: Excel CSV does not support array formulae */
|
||||
row += (C === r.s.c ? "" : FS) + txt;
|
||||
}
|
||||
if(o.blankrows === false && isempty) return null;
|
||||
return row;
|
||||
}
|
||||
|
||||
function sheet_to_csv(sheet/*:Worksheet*/, opts/*:?Sheet2CSVOpts*/)/*:string*/ {
|
||||
var out = "";
|
||||
var o = opts == null ? {} : opts;
|
||||
if(sheet == null || sheet["!ref"] == null) return "";
|
||||
var r = safe_decode_range(sheet["!ref"]);
|
||||
var FS = o.FS !== undefined ? o.FS : ",", fs = FS.charCodeAt(0);
|
||||
var RS = o.RS !== undefined ? o.RS : "\n", rs = RS.charCodeAt(0);
|
||||
var endregex = new RegExp((FS=="|" ? "\\|" : FS)+"+$");
|
||||
var row = "", rr = "", cols = [];
|
||||
var i = 0, cc = 0, val;
|
||||
var R = 0, C = 0;
|
||||
var dense = Array.isArray(sheet);
|
||||
for(C = r.s.c; C <= r.e.c; ++C) cols[C] = encode_col(C);
|
||||
for(R = r.s.r; R <= r.e.r; ++ |