HTML TD 't' attribute (fixes #917)

note: @sheetjsdev authored commit, original PR date/author used
This commit is contained in:
大黄蜂coder 2017-12-12 14:21:28 +08:00 committed by SheetJS
parent c9cab8078c
commit b17a09849a
8 changed files with 102 additions and 77 deletions

View File

@ -1493,7 +1493,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | 1252 | If specified, use code page when appropriate ** |
|`codepage` | | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
@ -2230,6 +2230,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.
The writer adds type metadata to the TD elements via the `t` tag. The parser
looks for those tags and overrides the default interpretation. For example, text
like `<td>12345</td>` will be parsed as numbers but `<td t="s">12345</td>` will
be parsed as text.
</details>
#### Rich Text Format (RTF)

View File

@ -28,32 +28,25 @@ var HTML_ = (function() {
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
CS = tag.colspan ? +tag.colspan : 1;
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var _t/*:string*/ = tag.t || "";
/* TODO: generate stub cells */
if(!m.length) { C += CS; continue; }
m = htmldecode(unescapexml(m));
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
if(!m.length){}
else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m};
else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(!m.length){}
else if(opts.raw) ws[coord] = {t:'s', v:m};
else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m};
else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C;
if(!m.length) continue;
var o/*:Cell*/ = {t:'s', v:m};
if(opts.raw || !m.trim().length || _t == 's'){}
else if(m === 'TRUE') o = {t:'b', v:true};
else if(m === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)};
else if(!isNaN(fuzzydate(m).getDate())) {
o = ({t:'d', v:parseDate(m)}/*:any*/);
if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/);
o.z = opts.dateNF || SSF._table[14];
}
if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
else ws[encode_cell({r:R, c:C})] = o;
C += CS;
}
}
@ -84,6 +77,7 @@ var HTML_ = (function() {
var sp = {};
if(RS > 1) sp.rowspan = RS;
if(CS > 1) sp.colspan = CS;
sp.t = cell.t;
if(o.editable) w = '<span contenteditable="true">' + w + '</span>';
sp.id = "sjs-" + coord;
oo.push(writextag('td', w, sp));
@ -142,10 +136,10 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
var _t/*:string*/ = elt.getAttribute("t") || "";
if(v != null) {
if(v.length == 0) o.t = 'z';
else if(opts.raw){}
else if(v.trim().length == 0) o.t = 's';
if(v.length == 0) o.t = _t || 'z';
else if(opts.raw || v.trim().length == 0 || _t == "s"){}
else if(v === 'TRUE') o = {t:'b', v:true};
else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};

View File

@ -253,6 +253,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.
The writer adds type metadata to the TD elements via the `t` tag. The parser
looks for those tags and overrides the default interpretation. For example, text
like `<td>12345</td>` will be parsed as numbers but `<td t="s">12345</td>` will
be parsed as text.
</details>
#### Rich Text Format (RTF)

View File

@ -1364,7 +1364,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | 1252 | If specified, use code page when appropriate ** |
|`codepage` | | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
@ -2027,6 +2027,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example,
`mso-number-format` is a localized string containing the number format. Despite
the metadata the output is valid HTML, although it does accept bare `&` symbols.
The writer adds type metadata to the TD elements via the `t` tag. The parser
looks for those tags and overrides the default interpretation. For example, text
like `<td>12345</td>` will be parsed as numbers but `<td t="s">12345</td>` will
be parsed as text.
#### Rich Text Format (RTF)

14
test.js
View File

@ -1881,6 +1881,20 @@ describe('HTML', function() {
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A·B");
});
describe('type override', function() {
function chk(ws) {
assert.equal(get_cell(ws, "A1").t, "s");
assert.equal(get_cell(ws, "A1").v, "1234567890");
assert.equal(get_cell(ws, "B1").t, "n");
assert.equal(get_cell(ws, "B1").v, 1234567890);
}
var html = "<table><tr><td t=\"s\">1234567890</td><td>1234567890</td></tr></table>";
it('HTML string', function() {
var ws = X.read(html, {type:'string'}).Sheets.Sheet1; chk(ws);
chk(X.read(X.utils.sheet_to_html(ws), {type:'string'}).Sheets.Sheet1);
});
if(domtest) it('DOM', function() { chk(X.utils.table_to_sheet(get_dom_element(html))); });
});
});
describe('js -> file -> js', function() {

View File

@ -1881,6 +1881,20 @@ describe('HTML', function() {
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A·B");
});
describe('type override', function() {
function chk(ws) {
assert.equal(get_cell(ws, "A1").t, "s");
assert.equal(get_cell(ws, "A1").v, "1234567890");
assert.equal(get_cell(ws, "B1").t, "n");
assert.equal(get_cell(ws, "B1").v, 1234567890);
}
var html = "<table><tr><td t=\"s\">1234567890</td><td>1234567890</td></tr></table>";
it('HTML string', function() {
var ws = X.read(html, {type:'string'}).Sheets.Sheet1; chk(ws);
chk(X.read(X.utils.sheet_to_html(ws), {type:'string'}).Sheets.Sheet1);
});
if(domtest) it('DOM', function() { chk(X.utils.table_to_sheet(get_dom_element(html))); });
});
});
describe('js -> file -> js', function() {

View File

@ -17358,32 +17358,25 @@ var HTML_ = (function() {
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
CS = tag.colspan ? +tag.colspan : 1;
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var _t/*:string*/ = tag.t || "";
/* TODO: generate stub cells */
if(!m.length) { C += CS; continue; }
m = htmldecode(unescapexml(m));
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
if(!m.length){}
else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m};
else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(!m.length){}
else if(opts.raw) ws[coord] = {t:'s', v:m};
else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m};
else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C;
if(!m.length) continue;
var o/*:Cell*/ = {t:'s', v:m};
if(opts.raw || !m.trim().length || _t == 's'){}
else if(m === 'TRUE') o = {t:'b', v:true};
else if(m === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)};
else if(!isNaN(fuzzydate(m).getDate())) {
o = ({t:'d', v:parseDate(m)}/*:any*/);
if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/);
o.z = opts.dateNF || SSF._table[14];
}
if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
else ws[encode_cell({r:R, c:C})] = o;
C += CS;
}
}
@ -17414,6 +17407,7 @@ var HTML_ = (function() {
var sp = {};
if(RS > 1) sp.rowspan = RS;
if(CS > 1) sp.colspan = CS;
sp.t = cell.t;
if(o.editable) w = '<span contenteditable="true">' + w + '</span>';
sp.id = "sjs-" + coord;
oo.push(writextag('td', w, sp));
@ -17472,10 +17466,10 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
var _t/*:string*/ = elt.getAttribute("t") || "";
if(v != null) {
if(v.length == 0) o.t = 'z';
else if(opts.raw){}
else if(v.trim().length == 0) o.t = 's';
if(v.length == 0) o.t = _t || 'z';
else if(opts.raw || v.trim().length == 0 || _t == "s"){}
else if(v === 'TRUE') o = {t:'b', v:true};
else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};

44
xlsx.js generated
View File

@ -17257,32 +17257,25 @@ var HTML_ = (function() {
var tag = parsexmltag(cell.slice(0, cell.indexOf(">")));
CS = tag.colspan ? +tag.colspan : 1;
if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var _t = tag.t || "";
/* TODO: generate stub cells */
if(!m.length) { C += CS; continue; }
m = htmldecode(unescapexml(m));
if(range.s.r > R) range.s.r = R;
if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C;
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
if(!m.length){}
else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m};
else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord = encode_cell({r:R, c:C});
/* TODO: value parsing */
if(!m.length){}
else if(opts.raw) ws[coord] = {t:'s', v:m};
else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m};
else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R;
if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C;
if(!m.length) continue;
var o = {t:'s', v:m};
if(opts.raw || !m.trim().length || _t == 's'){}
else if(m === 'TRUE') o = {t:'b', v:true};
else if(m === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)};
else if(!isNaN(fuzzydate(m).getDate())) {
o = ({t:'d', v:parseDate(m)});
if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)});
o.z = opts.dateNF || SSF._table[14];
}
if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; }
else ws[encode_cell({r:R, c:C})] = o;
C += CS;
}
}
@ -17313,6 +17306,7 @@ var HTML_ = (function() {
var sp = {};
if(RS > 1) sp.rowspan = RS;
if(CS > 1) sp.colspan = CS;
sp.t = cell.t;
if(o.editable) w = '<span contenteditable="true">' + w + '</span>';
sp.id = "sjs-" + coord;
oo.push(writextag('td', w, sp));
@ -17371,10 +17365,10 @@ function parse_dom_table(table, _opts) {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o = {t:'s', v:v};
var _t = elt.getAttribute("t") || "";
if(v != null) {
if(v.length == 0) o.t = 'z';
else if(opts.raw){}
else if(v.trim().length == 0) o.t = 's';
if(v.length == 0) o.t = _t || 'z';
else if(opts.raw || v.trim().length == 0 || _t == "s"){}
else if(v === 'TRUE') o = {t:'b', v:true};
else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};