version bump 0.8.8: HTML

- read MSO HTML (fixes  h/t @vineetl fixes  h/t @tienne)
- roll out xml namespace fix (closes  h/t @PierreOCXP)
- cellDates clarifications
This commit is contained in:
SheetJS 2017-03-09 00:24:32 -05:00
parent d2b5506dec
commit 7b6fb7b327
38 changed files with 286 additions and 114 deletions

@ -24,6 +24,8 @@ File format support for known spreadsheet data formats:
| OpenDocument Spreadsheet (ODS) | :o: | :o: |
| Flat XML ODF Spreadsheet (FODS) | :o: | :o: |
| Uniform Office Format Spreadsheet (标文通 UOS1/UOS2) | :o: | |
| **Other Common Spreadsheet Output Formats** |:-----:|:-----:|
| HTML Tables | :o: | |
Demo: <http://oss.sheetjs.com/js-xlsx>
@ -565,7 +567,7 @@ file but Excel will know how to handle it. This library applies similar logic:
|:-------|:--------------|:----------------------------------------------------|
| `0xD0` | CFB Container | BIFF 5/8 or password-protected XLSX/XLSB |
| `0x09` | BIFF Stream | BIFF 2/3/4/5 |
| `0x3C` | XML | SpreadsheetML or Flat ODS or UOS1 |
| `0x3C` | XML/HTML | SpreadsheetML or Flat ODS or UOS1 or HTML |
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 |
| `0xFE` | UTF8 Text | SpreadsheetML or Flat ODS or UOS1 |
@ -679,6 +681,11 @@ Excel CSV deviates from RFC4180 in a number of important ways. The generated
CSV files should generally work in Excel although they may not work in RFC4180
compatible readers.
### HTML
Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.
## Tested Environments

@ -1 +1 @@
XLSX.version = '0.8.7';
XLSX.version = '0.8.8';

@ -29,10 +29,9 @@ function evert_arr(obj/*:any*/)/*:EvertArrType*/ {
return o;
}
/* TODO: date1904 logic */
function datenum(v/*:number*/, date1904/*:?boolean*/)/*:number*/ {
if(date1904) v+=1462;
var epoch = Date.parse(v);
function datenum(v/*:Date*/, date1904/*:?boolean*/)/*:number*/ {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}

@ -23,6 +23,7 @@ function get_cell_style(styles, cell, opts) {
}
function safe_format(p, fmtid, fillid, opts) {
if(p.t === 'd' && typeof p.v === 'string') p.v = new Date(p.v);
try {
if(p.t === 'e') p.w = p.w || BErr[p.v];
else if(fmtid === 0) {

@ -137,7 +137,7 @@ function write_ws_xml_cell(cell, ref, ws, opts, idx, wb) {
if(opts.cellDates) vv = new Date(cell.v).toISOString();
else {
cell.t = 'n';
vv = ''+(cell.v = datenum(cell.v));
vv = ''+(cell.v = datenum(new Date(cell.v)));
if(typeof cell.z === 'undefined') cell.z = SSF._table[14];
}
break;
@ -265,7 +265,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess) {
break; // inline string
case 'b': p.v = parsexmlbool(p.v); break;
case 'd':
if(!opts.cellDates) { p.v = datenum(p.v); p.t = 'n'; }
if(!opts.cellDates) { p.v = datenum(new Date(p.v)); p.t = 'n'; }
break;
/* error string in .v, number in .v */
case 'e': p.w = p.v; p.v = RBErr[p.v]; break;

@ -157,8 +157,9 @@ function xlml_normalize(d)/*:string*/ {
/* TODO: Everything */
var xlmlregex = /<(\/?)([a-z0-9]*:|)(\w+)[^>]*>/mg;
function parse_xlml_xml(d, opts) {
function parse_xlml_xml(d, opts)/*:Workbook*/ {
var str = debom(xlml_normalize(d));
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
var Rn;
var state = [], tmp;
var sheets = {}, sheetnames = [], cursheet = {}, sheetname = "";

35
bits/79_html.js Normal file

@ -0,0 +1,35 @@
/* TODO: in browser attach to DOM; in node use an html parser */
function parse_html(str/*:string*/, opts)/*:Workbook*/ {
var ws/*:Worksheet*/ = ({}/*:any*/);
var o/*:Workbook*/ = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
var i = str.indexOf("<table"), j = str.indexOf("</table");
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
var rows = str.slice(i, j).split(/<tr[^>]*>/);
var R = 0, C = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
for(i = 0; i < rows.length; ++i) {
if(rows[i].substr(0,3) != "<td") continue;
var cells = rows[i].split("</td>");
for(j = 0; j < cells.length; ++j) {
if(cells[j].substr(0,3) != "<td") continue;
++C;
var m = cells[j], cc = 0;
/* TODO: parse styles etc */
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
/* TODO: generate stub cells */
if(!m.length) continue;
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(m == +m) ws[coord] = {t:'n', v:+m};
else ws[coord] = {t:'s', v:m};
}
++R; C = 0;
}
ws['!ref'] = encode_range(range);
return o;
}

@ -19,12 +19,12 @@ function write_string_type(out/*:string*/, opts/*:WriteOpts*/) {
switch(opts.type) {
case "base64": return Base64.encode(out);
case "binary": return out;
case "file": return _fs.writeFileSync(opts.file, out, {encoding:'utf8'});
case "file": return _fs.writeFileSync(opts.file, out, 'utf8');
case "buffer": {
if(has_buf) return new Buffer(out, 'utf8');
else return out.split("").map(function(c) { return c.charCodeAt(0); });
} break;
default: return out;
default: throw new Error("Unrecognized type " + opts.type);
}
}

14
dist/ods.js vendored

@ -114,7 +114,8 @@ var rencoding = {
var rencstr = "&<>'\"".split("");
// TODO: CP remap (need to read file version to determine OS)
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]+)_/g;
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]{4})_/g;
function unescapexml(text){
var s = text + '';
return s.replace(encregex, function($$) { return encodings[$$]; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
@ -133,8 +134,9 @@ function parsexmlbool(value) {
}
}
function datenum(v) {
var epoch = Date.parse(v);
function datenum(v, date1904) {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
@ -275,7 +277,8 @@ var parse_content_xml = (function() {
"day-of-week": ["ddd", "dddd"]
};
return function pcx(d, opts) {
return function pcx(d, _opts) {
var opts = _opts || {};
var str = xlml_normalize(d);
var state = [], tmp;
var tag;
@ -363,7 +366,7 @@ var parse_content_xml = (function() {
case 'float': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'percentage': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'currency': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'date': q.t = 'n'; q.v = datenum(ctag['date-value']); q.z = 'm/d/yy'; break;
case 'date': q.t = 'n'; q.v = datenum(new Date(ctag['date-value'])); q.z = 'm/d/yy'; break;
case 'time': q.t = 'n'; q.v = parse_isodur(ctag['time-value'])/86400; break;
case 'number': q.t = 'n'; q.v = parseFloat(ctag['数据数值']); break;
default:
@ -699,6 +702,7 @@ function parse_ods(zip, opts) {
var ods = !!safegetzipfile(zip, 'objectdata');
if(ods) var manifest = parse_manifest(getzipdata(zip, 'META-INF/manifest.xml'), opts);
var content = getzipdata(zip, 'content.xml');
if(!content) throw new Error("Missing content.xml in " + (ods ? "ODS" : "UOF")+ " file");
return parse_content_xml(ods ? content : utf8read(content), opts);
}

2
dist/ods.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/ods.min.map vendored

File diff suppressed because one or more lines are too long

22
dist/xlsx.core.min.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

18
dist/xlsx.full.min.js vendored

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

61
dist/xlsx.js vendored

@ -4,7 +4,7 @@
/*jshint funcscope:true, eqnull:true */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.8.7';
XLSX.version = '0.8.8';
var current_codepage = 1200, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel.js');
@ -1293,10 +1293,9 @@ function evert_arr(obj) {
return o;
}
/* TODO: date1904 logic */
function datenum(v, date1904) {
if(date1904) v+=1462;
var epoch = Date.parse(v);
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
@ -1415,7 +1414,8 @@ var rencstr = "&<>'\"".split("");
// TODO: CP remap (need to read file version to determine OS)
var unescapexml = (function() {
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]+)_/g;
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&[a-z]*;/g, coderegex = /_x([\da-fA-F]{4})_/g;
return function unescapexml(text) {
var s = text + '';
return s.replace(encregex, function($$) { return encodings[$$]; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
@ -4320,6 +4320,7 @@ var parse_rs = (function parse_rs_factory() {
/* ** not required . */
case '<shadow':
/* falls through */
case '<shadow>':
case '<shadow/>': break;
/* 18.4.1 charset CT_IntProperty TODO */
@ -4331,6 +4332,7 @@ var parse_rs = (function parse_rs_factory() {
/* 18.4.2 outline CT_BooleanProperty TODO */
case '<outline':
/* falls through */
case '<outline>':
case '<outline/>': break;
/* 18.4.5 rFont CT_FontName */
@ -4343,6 +4345,7 @@ var parse_rs = (function parse_rs_factory() {
case '<strike':
if(!y.val) break;
/* falls through */
case '<strike>':
case '<strike/>': font.strike = 1; break;
case '</strike>': break;
@ -4350,6 +4353,7 @@ var parse_rs = (function parse_rs_factory() {
case '<u':
if(y.val == '0') break;
/* falls through */
case '<u>':
case '<u/>': font.u = 1; break;
case '</u>': break;
@ -4357,6 +4361,7 @@ var parse_rs = (function parse_rs_factory() {
case '<b':
if(y.val == '0') break;
/* falls through */
case '<b>':
case '<b/>': font.b = 1; break;
case '</b>': break;
@ -4364,6 +4369,7 @@ var parse_rs = (function parse_rs_factory() {
case '<i':
if(y.val == '0') break;
/* falls through */
case '<i>':
case '<i/>': font.i = 1; break;
case '</i>': break;
@ -7852,6 +7858,7 @@ function get_cell_style(styles, cell, opts) {
}
function safe_format(p, fmtid, fillid, opts) {
if(p.t === 'd' && typeof p.v === 'string') p.v = new Date(p.v);
try {
if(p.t === 'e') p.w = p.w || BErr[p.v];
else if(fmtid === 0) {
@ -8022,7 +8029,7 @@ function write_ws_xml_cell(cell, ref, ws, opts, idx, wb) {
if(opts.cellDates) vv = new Date(cell.v).toISOString();
else {
cell.t = 'n';
vv = ''+(cell.v = datenum(cell.v));
vv = ''+(cell.v = datenum(new Date(cell.v)));
if(typeof cell.z === 'undefined') cell.z = SSF._table[14];
}
break;
@ -8150,7 +8157,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess) {
break; // inline string
case 'b': p.v = parsexmlbool(p.v); break;
case 'd':
if(!opts.cellDates) { p.v = datenum(p.v); p.t = 'n'; }
if(!opts.cellDates) { p.v = datenum(new Date(p.v)); p.t = 'n'; }
break;
/* error string in .v, number in .v */
case 'e': p.w = p.v; p.v = RBErr[p.v]; break;
@ -9479,6 +9486,7 @@ function xlml_normalize(d) {
var xlmlregex = /<(\/?)([a-z0-9]*:|)(\w+)[^>]*>/mg;
function parse_xlml_xml(d, opts) {
var str = debom(xlml_normalize(d));
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
var Rn;
var state = [], tmp;
var sheets = {}, sheetnames = [], cursheet = {}, sheetname = "";
@ -12102,6 +12110,41 @@ function write_biff_buf(wb, o) {
// TODO
return ba.end();
}
/* TODO: in browser attach to DOM; in node use an html parser */
function parse_html(str, opts) {
var ws = ({});
var o = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
var i = str.indexOf("<table"), j = str.indexOf("</table");
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
var rows = str.slice(i, j).split(/<tr[^>]*>/);
var R = 0, C = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
for(i = 0; i < rows.length; ++i) {
if(rows[i].substr(0,3) != "<td") continue;
var cells = rows[i].split("</td>");
for(j = 0; j < cells.length; ++j) {
if(cells[j].substr(0,3) != "<td") continue;
++C;
var m = cells[j], cc = 0;
/* TODO: parse styles etc */
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
/* TODO: generate stub cells */
if(!m.length) continue;
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
var coord = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(m == +m) ws[coord] = {t:'n', v:+m};
else ws[coord] = {t:'s', v:m};
}
++R; C = 0;
}
ws['!ref'] = encode_range(range);
return o;
}
/* actual implementation in utils, wrappers are for read/write */
function write_csv_str(wb, o) {
var idx = 0;
@ -12469,12 +12512,12 @@ function write_string_type(out, opts) {
switch(opts.type) {
case "base64": return Base64.encode(out);
case "binary": return out;
case "file": return _fs.writeFileSync(opts.file, out, {encoding:'utf8'});
case "file": return _fs.writeFileSync(opts.file, out, 'utf8');
case "buffer": {
if(has_buf) return new Buffer(out, 'utf8');
else return out.split("").map(function(c) { return c.charCodeAt(0); });
} break;
default: return out;
default: throw new Error("Unrecognized type " + opts.type);
}
}

18
dist/xlsx.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/xlsx.min.map vendored

File diff suppressed because one or more lines are too long

@ -40,7 +40,7 @@ Use readAsBinaryString: (when available) <input type="checkbox" name="userabs" c
<pre id="out"></pre>
<br />
<!-- uncomment the next line here and in xlsxworker.js for encoding support -->
<!--<script src="dist/cpexcel.js"></script>-->
<script src="dist/cpexcel.js"></script>
<script src="shim.js"></script>
<script src="jszip.js"></script>
<script src="xlsx.js"></script>

@ -5,6 +5,7 @@ declare module 'xlsx' { declare var exports:XLSXModule; };
declare module '../' { declare var exports:XLSXModule; };
declare module 'commander' { declare var exports:any; };
declare module './jszip.js' { declare var exports:any; };
type ZIP = any;
*/

@ -136,8 +136,9 @@ function parsexmlbool(value) {
}
}
function datenum(v) {
var epoch = Date.parse(v);
function datenum(v/*:Date*/, date1904/*:?boolean*/)/*:number*/ {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
@ -170,7 +171,7 @@ function parse_isodur(s) {
var XML_HEADER = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\r\n';
/* copied from js-xls (C) SheetJS Apache2 license */
function xlml_normalize(d) {
function xlml_normalize(d)/*:string*/ {
if(has_buf &&/*::typeof Buffer !== "undefined" && d != null &&*/ Buffer.isBuffer(d)) return d.toString('utf8');
if(typeof d === 'string') return d;
throw "badf";
@ -237,7 +238,7 @@ var parse_text_p = function(text, tag) {
return unescapexml(text.replace(/<text:s\/>/g," ").replace(/<[^>]*>/g,""));
};
var utf8read = function utf8reada(orig) {
var utf8read = function utf8reada(orig/*:string*/)/*:string*/ {
var out = "", i = 0, c = 0, d = 0, e = 0, f = 0, w = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);
@ -278,12 +279,13 @@ var parse_content_xml = (function() {
"day-of-week": ["ddd", "dddd"]
};
return function pcx(d, opts) {
return function pcx(d, _opts) {
var opts = _opts || {};
var str = xlml_normalize(d);
var state/*:Array<any>*/ = [], tmp;
var tag/*:: = {}*/;
var NFtag = {name:""}, NF = "", pidx = 0;
var sheetag/*:: = {name:""}*/;
var sheetag/*:: = {name:"", '名称':""}*/;
var rowtag/*:: = {'行号':""}*/;
var Sheets = {}, SheetNames/*:Array<string>*/ = [], ws = {};
var Rn, q/*:: = ({t:"", v:null, z:null, w:""}:any)*/;
@ -366,7 +368,7 @@ var parse_content_xml = (function() {
case 'float': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'percentage': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'currency': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'date': q.t = 'n'; q.v = datenum(ctag['date-value']); q.z = 'm/d/yy'; break;
case 'date': q.t = 'n'; q.v = datenum(new Date(ctag['date-value'])); q.z = 'm/d/yy'; break;
case 'time': q.t = 'n'; q.v = parse_isodur(ctag['time-value'])/86400; break;
case 'number': q.t = 'n'; q.v = parseFloat(ctag['数据数值']); break;
default:
@ -702,6 +704,7 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/) {
var ods = !!safegetzipfile(zip, 'objectdata');
if(ods) var manifest = parse_manifest(getzipdata(zip, 'META-INF/manifest.xml'), opts);
var content = getzipdata(zip, 'content.xml');
if(!content) throw new Error("Missing content.xml in " + (ods ? "ODS" : "UOF")+ " file");
return parse_content_xml(ods ? content : utf8read(content), opts);
}

11
ods.js

@ -134,8 +134,9 @@ function parsexmlbool(value) {
}
}
function datenum(v) {
var epoch = Date.parse(v);
function datenum(v, date1904) {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
@ -276,7 +277,8 @@ var parse_content_xml = (function() {
"day-of-week": ["ddd", "dddd"]
};
return function pcx(d, opts) {
return function pcx(d, _opts) {
var opts = _opts || {};
var str = xlml_normalize(d);
var state = [], tmp;
var tag;
@ -364,7 +366,7 @@ var parse_content_xml = (function() {
case 'float': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'percentage': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'currency': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'date': q.t = 'n'; q.v = datenum(ctag['date-value']); q.z = 'm/d/yy'; break;
case 'date': q.t = 'n'; q.v = datenum(new Date(ctag['date-value'])); q.z = 'm/d/yy'; break;
case 'time': q.t = 'n'; q.v = parse_isodur(ctag['time-value'])/86400; break;
case 'number': q.t = 'n'; q.v = parseFloat(ctag['数据数值']); break;
default:
@ -700,6 +702,7 @@ function parse_ods(zip, opts) {
var ods = !!safegetzipfile(zip, 'objectdata');
if(ods) var manifest = parse_manifest(getzipdata(zip, 'META-INF/manifest.xml'), opts);
var content = getzipdata(zip, 'content.xml');
if(!content) throw new Error("Missing content.xml in " + (ods ? "ODS" : "UOF")+ " file");
return parse_content_xml(ods ? content : utf8read(content), opts);
}

@ -64,8 +64,9 @@ function parsexmlbool(value) {
}
}
function datenum(v) {
var epoch = Date.parse(v);
function datenum(v/*:Date*/, date1904/*:?boolean*/)/*:number*/ {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}

@ -1,5 +1,5 @@
/* copied from js-xls (C) SheetJS Apache2 license */
function xlml_normalize(d) {
function xlml_normalize(d)/*:string*/ {
if(has_buf &&/*::typeof Buffer !== "undefined" && d != null &&*/ Buffer.isBuffer(d)) return d.toString('utf8');
if(typeof d === 'string') return d;
throw "badf";

@ -2,7 +2,7 @@ var parse_text_p = function(text, tag) {
return unescapexml(text.replace(/<text:s\/>/g," ").replace(/<[^>]*>/g,""));
};
var utf8read = function utf8reada(orig) {
var utf8read = function utf8reada(orig/*:string*/)/*:string*/ {
var out = "", i = 0, c = 0, d = 0, e = 0, f = 0, w = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);

@ -12,12 +12,13 @@ var parse_content_xml = (function() {
"day-of-week": ["ddd", "dddd"]
};
return function pcx(d, opts) {
return function pcx(d, _opts) {
var opts = _opts || {};
var str = xlml_normalize(d);
var state/*:Array<any>*/ = [], tmp;
var tag/*:: = {}*/;
var NFtag = {name:""}, NF = "", pidx = 0;
var sheetag/*:: = {name:""}*/;
var sheetag/*:: = {name:"", '名称':""}*/;
var rowtag/*:: = {'行号':""}*/;
var Sheets = {}, SheetNames/*:Array<string>*/ = [], ws = {};
var Rn, q/*:: = ({t:"", v:null, z:null, w:""}:any)*/;
@ -100,7 +101,7 @@ var parse_content_xml = (function() {
case 'float': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'percentage': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'currency': q.t = 'n'; q.v = parseFloat(ctag.value); break;
case 'date': q.t = 'n'; q.v = datenum(ctag['date-value']); q.z = 'm/d/yy'; break;
case 'date': q.t = 'n'; q.v = datenum(new Date(ctag['date-value'])); q.z = 'm/d/yy'; break;
case 'time': q.t = 'n'; q.v = parse_isodur(ctag['time-value'])/86400; break;
case 'number': q.t = 'n'; q.v = parseFloat(ctag['数据数值']); break;
default:

@ -4,6 +4,7 @@ function parse_ods(zip/*:ZIPFile*/, opts/*:?ParseOpts*/) {
var ods = !!safegetzipfile(zip, 'objectdata');
if(ods) var manifest = parse_manifest(getzipdata(zip, 'META-INF/manifest.xml'), opts);
var content = getzipdata(zip, 'content.xml');
if(!content) throw new Error("Missing content.xml in " + (ods ? "ODS" : "UOF")+ " file");
return parse_content_xml(ods ? content : utf8read(content), opts);
}

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.8.7",
"version": "0.8.8",
"author": "sheetjs",
"description": "Excel (XLSB/XLSX/XLSM/XLS/XML) and ODS (ODS/FODS/UOS) spreadsheet parser and writer",
"keywords": [ "excel", "xls", "xlsx", "xlsb", "xlsm", "ods", "office", "spreadsheet" ],

13
test.js

@ -869,8 +869,9 @@ describe('roundtrip features', function() {
else if(dj && dk && !di); /* TODO: convert to date */
else assert.equal(m[1].t, 'n');
if(m[0].t === m[1].t) assert.equal(m[0].v, m[1].v);
else if(m[0].t === 'd') assert(Math.abs(datenum(new Date(m[0].v)) - m[1].v) < 0.01); /* TODO: 1sec adjustment */
if(m[0].t === 'n' && m[1].t === 'n') assert.equal(m[0].v, m[1].v);
else if(m[0].t === 'd' && m[1].t === 'd') assert.equal(m[0].v.toString(), m[1].v.toString());
else if(m[1].t === 'n') assert(Math.abs(datenum(new Date(m[0].v)) - m[1].v) < 0.01); /* TODO: 1sec adjustment */
});
});
});
@ -932,10 +933,10 @@ describe('invalid files', function() {
});
});
function datenum(v, date1904) {
if(date1904) v+=1462;
var epoch = Date.parse(v);
return (epoch - new Date(Date.UTC(1899, 11, 30))) / (24 * 60 * 60 * 1000);
function datenum(v/*:Date*/, date1904/*:?boolean*/)/*:number*/ {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
function sheet_from_array_of_arrays(data, opts) {
var ws = {};

@ -37,11 +37,10 @@ function Workbook() {
var wb = new Workbook();
/* TODO: date1904 logic */
function datenum(v, date1904) {
if(date1904) v+=1462;
var epoch = Date.parse(v);
return (epoch - new Date(Date.UTC(1899, 11, 30))) / (24 * 60 * 60 * 1000);
function datenum(v/*:Date*/, date1904/*:?boolean*/)/*:number*/ {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
/* convert an array of arrays in JS to a CSF spreadsheet */

@ -4,7 +4,7 @@
/*jshint funcscope:true, eqnull:true */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.8.7';
XLSX.version = '0.8.8';
var current_codepage = 1200, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel.js');
@ -1334,10 +1334,9 @@ function evert_arr(obj/*:any*/)/*:EvertArrType*/ {
return o;
}
/* TODO: date1904 logic */
function datenum(v/*:number*/, date1904/*:?boolean*/)/*:number*/ {
if(date1904) v+=1462;
var epoch = Date.parse(v);
function datenum(v/*:Date*/, date1904/*:?boolean*/)/*:number*/ {
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
@ -7901,6 +7900,7 @@ function get_cell_style(styles, cell, opts) {
}
function safe_format(p, fmtid, fillid, opts) {
if(p.t === 'd' && typeof p.v === 'string') p.v = new Date(p.v);
try {
if(p.t === 'e') p.w = p.w || BErr[p.v];
else if(fmtid === 0) {
@ -8071,7 +8071,7 @@ function write_ws_xml_cell(cell, ref, ws, opts, idx, wb) {
if(opts.cellDates) vv = new Date(cell.v).toISOString();
else {
cell.t = 'n';
vv = ''+(cell.v = datenum(cell.v));
vv = ''+(cell.v = datenum(new Date(cell.v)));
if(typeof cell.z === 'undefined') cell.z = SSF._table[14];
}
break;
@ -8199,7 +8199,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess) {
break; // inline string
case 'b': p.v = parsexmlbool(p.v); break;
case 'd':
if(!opts.cellDates) { p.v = datenum(p.v); p.t = 'n'; }
if(!opts.cellDates) { p.v = datenum(new Date(p.v)); p.t = 'n'; }
break;
/* error string in .v, number in .v */
case 'e': p.w = p.v; p.v = RBErr[p.v]; break;
@ -9528,8 +9528,9 @@ function xlml_normalize(d)/*:string*/ {
/* TODO: Everything */
var xlmlregex = /<(\/?)([a-z0-9]*:|)(\w+)[^>]*>/mg;
function parse_xlml_xml(d, opts) {
function parse_xlml_xml(d, opts)/*:Workbook*/ {
var str = debom(xlml_normalize(d));
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
var Rn;
var state = [], tmp;
var sheets = {}, sheetnames = [], cursheet = {}, sheetname = "";
@ -12153,6 +12154,41 @@ function write_biff_buf(wb/*:Workbook*/, o/*:WriteOpts*/) {
// TODO
return ba.end();
}
/* TODO: in browser attach to DOM; in node use an html parser */
function parse_html(str/*:string*/, opts)/*:Workbook*/ {
var ws/*:Worksheet*/ = ({}/*:any*/);
var o/*:Workbook*/ = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
var i = str.indexOf("<table"), j = str.indexOf("</table");
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
var rows = str.slice(i, j).split(/<tr[^>]*>/);
var R = 0, C = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
for(i = 0; i < rows.length; ++i) {
if(rows[i].substr(0,3) != "<td") continue;
var cells = rows[i].split("</td>");
for(j = 0; j < cells.length; ++j) {
if(cells[j].substr(0,3) != "<td") continue;
++C;
var m = cells[j], cc = 0;
/* TODO: parse styles etc */
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
/* TODO: generate stub cells */
if(!m.length) continue;
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(m == +m) ws[coord] = {t:'n', v:+m};
else ws[coord] = {t:'s', v:m};
}
++R; C = 0;
}
ws['!ref'] = encode_range(range);
return o;
}
/* actual implementation in utils, wrappers are for read/write */
function write_csv_str(wb/*:Workbook*/, o/*:WriteOpts*/) {
var idx = 0;
@ -12522,12 +12558,12 @@ function write_string_type(out/*:string*/, opts/*:WriteOpts*/) {
switch(opts.type) {
case "base64": return Base64.encode(out);
case "binary": return out;
case "file": return _fs.writeFileSync(opts.file, out, {encoding:'utf8'});
case "file": return _fs.writeFileSync(opts.file, out, 'utf8');
case "buffer": {
if(has_buf) return new Buffer(out, 'utf8');
else return out.split("").map(function(c) { return c.charCodeAt(0); });
} break;
default: return out;
default: throw new Error("Unrecognized type " + opts.type);
}
}

52
xlsx.js

@ -4,7 +4,7 @@
/*jshint funcscope:true, eqnull:true */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.8.7';
XLSX.version = '0.8.8';
var current_codepage = 1200, current_cptable;
if(typeof module !== "undefined" && typeof require !== 'undefined') {
if(typeof cptable === 'undefined') cptable = require('./dist/cpexcel.js');
@ -1293,10 +1293,9 @@ function evert_arr(obj) {
return o;
}
/* TODO: date1904 logic */
function datenum(v, date1904) {
if(date1904) v+=1462;
var epoch = Date.parse(v);
var epoch = v.getTime();
if(date1904) epoch += 1462*24*60*60*1000;
return (epoch + 2209161600000) / (24 * 60 * 60 * 1000);
}
@ -7859,6 +7858,7 @@ function get_cell_style(styles, cell, opts) {
}
function safe_format(p, fmtid, fillid, opts) {
if(p.t === 'd' && typeof p.v === 'string') p.v = new Date(p.v);
try {
if(p.t === 'e') p.w = p.w || BErr[p.v];
else if(fmtid === 0) {
@ -8029,7 +8029,7 @@ function write_ws_xml_cell(cell, ref, ws, opts, idx, wb) {
if(opts.cellDates) vv = new Date(cell.v).toISOString();
else {
cell.t = 'n';
vv = ''+(cell.v = datenum(cell.v));
vv = ''+(cell.v = datenum(new Date(cell.v)));
if(typeof cell.z === 'undefined') cell.z = SSF._table[14];
}
break;
@ -8157,7 +8157,7 @@ return function parse_ws_xml_data(sdata, s, opts, guess) {
break; // inline string
case 'b': p.v = parsexmlbool(p.v); break;
case 'd':
if(!opts.cellDates) { p.v = datenum(p.v); p.t = 'n'; }
if(!opts.cellDates) { p.v = datenum(new Date(p.v)); p.t = 'n'; }
break;
/* error string in .v, number in .v */
case 'e': p.w = p.v; p.v = RBErr[p.v]; break;
@ -9486,6 +9486,7 @@ function xlml_normalize(d) {
var xlmlregex = /<(\/?)([a-z0-9]*:|)(\w+)[^>]*>/mg;
function parse_xlml_xml(d, opts) {
var str = debom(xlml_normalize(d));
if(str.substr(0,1000).indexOf("<html") >= 0) return parse_html(str, opts);
var Rn;
var state = [], tmp;
var sheets = {}, sheetnames = [], cursheet = {}, sheetname = "";
@ -12109,6 +12110,41 @@ function write_biff_buf(wb, o) {
// TODO
return ba.end();
}
/* TODO: in browser attach to DOM; in node use an html parser */
function parse_html(str, opts) {
var ws = ({});
var o = { SheetNames: ["Sheet1"], Sheets: {Sheet1:ws} };
var i = str.indexOf("<table"), j = str.indexOf("</table");
if(i == -1 || j == -1) throw new Error("Invalid HTML: missing <table> / </table> pair");
var rows = str.slice(i, j).split(/<tr[^>]*>/);
var R = 0, C = 0;
var range = {s:{r:10000000, c:10000000},e:{r:0,c:0}};
for(i = 0; i < rows.length; ++i) {
if(rows[i].substr(0,3) != "<td") continue;
var cells = rows[i].split("</td>");
for(j = 0; j < cells.length; ++j) {
if(cells[j].substr(0,3) != "<td") continue;
++C;
var m = cells[j], cc = 0;
/* TODO: parse styles etc */
while(m.charAt(0) == "<" && (cc = m.indexOf(">")) > -1) m = m.slice(cc+1);
while(m.indexOf(">") > -1) m = m.slice(0, m.lastIndexOf("<"));
/* TODO: generate stub cells */
if(!m.length) continue;
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
var coord = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(m == +m) ws[coord] = {t:'n', v:+m};
else ws[coord] = {t:'s', v:m};
}
++R; C = 0;
}
ws['!ref'] = encode_range(range);
return o;
}
/* actual implementation in utils, wrappers are for read/write */
function write_csv_str(wb, o) {
var idx = 0;
@ -12476,12 +12512,12 @@ function write_string_type(out, opts) {
switch(opts.type) {
case "base64": return Base64.encode(out);
case "binary": return out;
case "file": return _fs.writeFileSync(opts.file, out, {encoding:'utf8'});
case "file": return _fs.writeFileSync(opts.file, out, 'utf8');
case "buffer": {
if(has_buf) return new Buffer(out, 'utf8');
else return out.split("").map(function(c) { return c.charCodeAt(0); });
} break;
default: return out;
default: throw new Error("Unrecognized type " + opts.type);
}
}

@ -2,7 +2,7 @@
/* uncomment the next line for encoding support */
/*:: declare var XLSX: XLSXModule; */
/*:: declare var self: DedicatedWorkerGlobalScope; */
//importScripts('dist/cpexcel.js');
importScripts('dist/cpexcel.js');
importScripts('jszip.js');
importScripts('xlsx.js');
/* uncomment the next line for ODS support */

@ -1,6 +1,6 @@
/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */
/* uncomment the next line for encoding support */
//importScripts('dist/cpexcel.js');
importScripts('dist/cpexcel.js');
importScripts('jszip.js');
importScripts('xlsx.js');
/* uncomment the next line for ODS support */

@ -2,7 +2,7 @@
/* uncomment the next line for encoding support */
/*:: declare var XLSX: XLSXModule; */
/*:: declare var self: DedicatedWorkerGlobalScope; */
//importScripts('dist/cpexcel.js');
importScripts('dist/cpexcel.js');
importScripts('jszip.js');
importScripts('xlsx.js');
/* uncomment the next line for ODS support */

@ -1,6 +1,6 @@
/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */
/* uncomment the next line for encoding support */
//importScripts('dist/cpexcel.js');
importScripts('dist/cpexcel.js');
importScripts('jszip.js');
importScripts('xlsx.js');
/* uncomment the next line for ODS support */

@ -2,7 +2,7 @@
/* uncomment the next line for encoding support */
/*:: declare var XLSX: XLSXModule; */
/*:: declare var self: DedicatedWorkerGlobalScope; */
//importScripts('dist/cpexcel.js');
importScripts('dist/cpexcel.js');
importScripts('jszip.js');
importScripts('xlsx.js');
/* uncomment the next line for ODS support */

@ -1,6 +1,6 @@
/* xlsx.js (C) 2013-present SheetJS -- http://sheetjs.com */
/* uncomment the next line for encoding support */
//importScripts('dist/cpexcel.js');
importScripts('dist/cpexcel.js');
importScripts('jszip.js');
importScripts('xlsx.js');
/* uncomment the next line for ODS support */