diff --git a/README.md b/README.md
index 7906be6..262bf34 100644
--- a/README.md
+++ b/README.md
@@ -1493,7 +1493,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
-|`codepage` | 1252 | If specified, use code page when appropriate ** |
+|`codepage` | | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
@@ -2230,6 +2230,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.
+The writer adds type metadata to the TD elements via the `t` tag. The parser
+looks for those tags and overrides the default interpretation. For example, text
+like `
12345 | ` will be parsed as numbers but `12345 | ` will
+be parsed as text.
+
#### Rich Text Format (RTF)
diff --git a/bits/79_html.js b/bits/79_html.js
index ea6e672..5e56fcd 100644
--- a/bits/79_html.js
+++ b/bits/79_html.js
@@ -28,32 +28,25 @@ var HTML_ = (function() {
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
CS = tag.colspan ? +tag.colspan : 1;
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
+ var _t/*:string*/ = tag.t || "";
/* TODO: generate stub cells */
if(!m.length) { C += CS; continue; }
m = htmldecode(unescapexml(m));
- if(range.s.r > R) range.s.r = R;
- if(range.e.r < R) range.e.r = R;
- if(range.s.c > C) range.s.c = C;
- if(range.e.c < C) range.e.c = C;
- if(opts.dense) {
- if(!ws[R]) ws[R] = [];
- if(!m.length){}
- else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m};
- else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
- else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
- else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
- else ws[R][C] = {t:'s', v:m};
- } else {
- var coord/*:string*/ = encode_cell({r:R, c:C});
- /* TODO: value parsing */
- if(!m.length){}
- else if(opts.raw) ws[coord] = {t:'s', v:m};
- else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m};
- else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
- else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
- else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
- else ws[coord] = {t:'s', v:m};
+ if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R;
+ if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C;
+ if(!m.length) continue;
+ var o/*:Cell*/ = {t:'s', v:m};
+ if(opts.raw || !m.trim().length || _t == 's'){}
+ else if(m === 'TRUE') o = {t:'b', v:true};
+ else if(m === 'FALSE') o = {t:'b', v:false};
+ else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)};
+ else if(!isNaN(fuzzydate(m).getDate())) {
+ o = ({t:'d', v:parseDate(m)}/*:any*/);
+ if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/);
+ o.z = opts.dateNF || SSF._table[14];
}
+ if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
+ else ws[encode_cell({r:R, c:C})] = o;
C += CS;
}
}
@@ -84,6 +77,7 @@ var HTML_ = (function() {
var sp = {};
if(RS > 1) sp.rowspan = RS;
if(CS > 1) sp.colspan = CS;
+ sp.t = cell.t;
if(o.editable) w = '' + w + '';
sp.id = "sjs-" + coord;
oo.push(writextag('td', w, sp));
@@ -142,10 +136,10 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
+ var _t/*:string*/ = elt.getAttribute("t") || "";
if(v != null) {
- if(v.length == 0) o.t = 'z';
- else if(opts.raw){}
- else if(v.trim().length == 0) o.t = 's';
+ if(v.length == 0) o.t = _t || 'z';
+ else if(opts.raw || v.trim().length == 0 || _t == "s"){}
else if(v === 'TRUE') o = {t:'b', v:true};
else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
diff --git a/docbits/85_filetype.md b/docbits/85_filetype.md
index e75f205..8aa4592 100644
--- a/docbits/85_filetype.md
+++ b/docbits/85_filetype.md
@@ -253,6 +253,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.
+The writer adds type metadata to the TD elements via the `t` tag. The parser
+looks for those tags and overrides the default interpretation. For example, text
+like `12345 | ` will be parsed as numbers but `12345 | ` will
+be parsed as text.
+
#### Rich Text Format (RTF)
diff --git a/misc/docs/README.md b/misc/docs/README.md
index 0f1bf4b..0bd4214 100644
--- a/misc/docs/README.md
+++ b/misc/docs/README.md
@@ -1364,7 +1364,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
-|`codepage` | 1252 | If specified, use code page when appropriate ** |
+|`codepage` | | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
@@ -2027,6 +2027,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.
+The writer adds type metadata to the TD elements via the `t` tag. The parser
+looks for those tags and overrides the default interpretation. For example, text
+like `12345 | ` will be parsed as numbers but `12345 | ` will
+be parsed as text.
+
#### Rich Text Format (RTF)
diff --git a/test.js b/test.js
index cce7b48..159c927 100644
--- a/test.js
+++ b/test.js
@@ -1881,6 +1881,20 @@ describe('HTML', function() {
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A·B");
});
+ describe('type override', function() {
+ function chk(ws) {
+ assert.equal(get_cell(ws, "A1").t, "s");
+ assert.equal(get_cell(ws, "A1").v, "1234567890");
+ assert.equal(get_cell(ws, "B1").t, "n");
+ assert.equal(get_cell(ws, "B1").v, 1234567890);
+ }
+ var html = "";
+ it('HTML string', function() {
+ var ws = X.read(html, {type:'string'}).Sheets.Sheet1; chk(ws);
+ chk(X.read(X.utils.sheet_to_html(ws), {type:'string'}).Sheets.Sheet1);
+ });
+ if(domtest) it('DOM', function() { chk(X.utils.table_to_sheet(get_dom_element(html))); });
+ });
});
describe('js -> file -> js', function() {
diff --git a/tests/core.js b/tests/core.js
index cce7b48..159c927 100644
--- a/tests/core.js
+++ b/tests/core.js
@@ -1881,6 +1881,20 @@ describe('HTML', function() {
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A·B");
});
+ describe('type override', function() {
+ function chk(ws) {
+ assert.equal(get_cell(ws, "A1").t, "s");
+ assert.equal(get_cell(ws, "A1").v, "1234567890");
+ assert.equal(get_cell(ws, "B1").t, "n");
+ assert.equal(get_cell(ws, "B1").v, 1234567890);
+ }
+ var html = "";
+ it('HTML string', function() {
+ var ws = X.read(html, {type:'string'}).Sheets.Sheet1; chk(ws);
+ chk(X.read(X.utils.sheet_to_html(ws), {type:'string'}).Sheets.Sheet1);
+ });
+ if(domtest) it('DOM', function() { chk(X.utils.table_to_sheet(get_dom_element(html))); });
+ });
});
describe('js -> file -> js', function() {
diff --git a/xlsx.flow.js b/xlsx.flow.js
index 49651a3..248821e 100644
--- a/xlsx.flow.js
+++ b/xlsx.flow.js
@@ -17358,32 +17358,25 @@ var HTML_ = (function() {
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
CS = tag.colspan ? +tag.colspan : 1;
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
+ var _t/*:string*/ = tag.t || "";
/* TODO: generate stub cells */
if(!m.length) { C += CS; continue; }
m = htmldecode(unescapexml(m));
- if(range.s.r > R) range.s.r = R;
- if(range.e.r < R) range.e.r = R;
- if(range.s.c > C) range.s.c = C;
- if(range.e.c < C) range.e.c = C;
- if(opts.dense) {
- if(!ws[R]) ws[R] = [];
- if(!m.length){}
- else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m};
- else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
- else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
- else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
- else ws[R][C] = {t:'s', v:m};
- } else {
- var coord/*:string*/ = encode_cell({r:R, c:C});
- /* TODO: value parsing */
- if(!m.length){}
- else if(opts.raw) ws[coord] = {t:'s', v:m};
- else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m};
- else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
- else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
- else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
- else ws[coord] = {t:'s', v:m};
+ if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R;
+ if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C;
+ if(!m.length) continue;
+ var o/*:Cell*/ = {t:'s', v:m};
+ if(opts.raw || !m.trim().length || _t == 's'){}
+ else if(m === 'TRUE') o = {t:'b', v:true};
+ else if(m === 'FALSE') o = {t:'b', v:false};
+ else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)};
+ else if(!isNaN(fuzzydate(m).getDate())) {
+ o = ({t:'d', v:parseDate(m)}/*:any*/);
+ if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/);
+ o.z = opts.dateNF || SSF._table[14];
}
+ if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
+ else ws[encode_cell({r:R, c:C})] = o;
C += CS;
}
}
@@ -17414,6 +17407,7 @@ var HTML_ = (function() {
var sp = {};
if(RS > 1) sp.rowspan = RS;
if(CS > 1) sp.colspan = CS;
+ sp.t = cell.t;
if(o.editable) w = '' + w + '';
sp.id = "sjs-" + coord;
oo.push(writextag('td', w, sp));
@@ -17472,10 +17466,10 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
+ var _t/*:string*/ = elt.getAttribute("t") || "";
if(v != null) {
- if(v.length == 0) o.t = 'z';
- else if(opts.raw){}
- else if(v.trim().length == 0) o.t = 's';
+ if(v.length == 0) o.t = _t || 'z';
+ else if(opts.raw || v.trim().length == 0 || _t == "s"){}
else if(v === 'TRUE') o = {t:'b', v:true};
else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
diff --git a/xlsx.js b/xlsx.js
index e10fecd..dc8b0e6 100644
--- a/xlsx.js
+++ b/xlsx.js
@@ -17257,32 +17257,25 @@ var HTML_ = (function() {
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
CS = tag.colspan ? +tag.colspan : 1;
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
+ var _t = tag.t || "";
/* TODO: generate stub cells */
if(!m.length) { C += CS; continue; }
m = htmldecode(unescapexml(m));
- if(range.s.r > R) range.s.r = R;
- if(range.e.r < R) range.e.r = R;
- if(range.s.c > C) range.s.c = C;
- if(range.e.c < C) range.e.c = C;
- if(opts.dense) {
- if(!ws[R]) ws[R] = [];
- if(!m.length){}
- else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m};
- else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
- else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
- else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
- else ws[R][C] = {t:'s', v:m};
- } else {
- var coord = encode_cell({r:R, c:C});
- /* TODO: value parsing */
- if(!m.length){}
- else if(opts.raw) ws[coord] = {t:'s', v:m};
- else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m};
- else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
- else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
- else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
- else ws[coord] = {t:'s', v:m};
+ if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R;
+ if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C;
+ if(!m.length) continue;
+ var o = {t:'s', v:m};
+ if(opts.raw || !m.trim().length || _t == 's'){}
+ else if(m === 'TRUE') o = {t:'b', v:true};
+ else if(m === 'FALSE') o = {t:'b', v:false};
+ else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)};
+ else if(!isNaN(fuzzydate(m).getDate())) {
+ o = ({t:'d', v:parseDate(m)});
+ if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)});
+ o.z = opts.dateNF || SSF._table[14];
}
+ if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
+ else ws[encode_cell({r:R, c:C})] = o;
C += CS;
}
}
@@ -17313,6 +17306,7 @@ var HTML_ = (function() {
var sp = {};
if(RS > 1) sp.rowspan = RS;
if(CS > 1) sp.colspan = CS;
+ sp.t = cell.t;
if(o.editable) w = '' + w + '';
sp.id = "sjs-" + coord;
oo.push(writextag('td', w, sp));
@@ -17371,10 +17365,10 @@ function parse_dom_table(table, _opts) {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o = {t:'s', v:v};
+ var _t = elt.getAttribute("t") || "";
if(v != null) {
- if(v.length == 0) o.t = 'z';
- else if(opts.raw){}
- else if(v.trim().length == 0) o.t = 's';
+ if(v.length == 0) o.t = _t || 'z';
+ else if(opts.raw || v.trim().length == 0 || _t == "s"){}
else if(v === 'TRUE') o = {t:'b', v:true};
else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};