From b17a09849a0f0bbd58715d14c5254eb2477c6dff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E5=A4=A7=E9=BB=84=E8=9C=82coder?= Date: Tue, 12 Dec 2017 14:21:28 +0800 Subject: [PATCH] HTML TD 't' attribute (fixes #917) note: @sheetjsdev authored commit, original PR date/author used --- README.md | 7 ++++++- bits/79_html.js | 44 ++++++++++++++++++------------------------ docbits/85_filetype.md | 5 +++++ misc/docs/README.md | 7 ++++++- test.js | 14 ++++++++++++++ tests/core.js | 14 ++++++++++++++ xlsx.flow.js | 44 ++++++++++++++++++------------------------ xlsx.js | 44 ++++++++++++++++++------------------------ 8 files changed, 102 insertions(+), 77 deletions(-) diff --git a/README.md b/README.md index 7906be6..262bf34 100644 --- a/README.md +++ b/README.md @@ -1493,7 +1493,7 @@ The exported `read` and `readFile` functions accept an options argument: | :---------- | ------: | :--------------------------------------------------- | |`type` | | Input data encoding (see Input Type below) | |`raw` | false | If true, plain text parsing will not parse values ** | -|`codepage` | 1252 | If specified, use code page when appropriate ** | +|`codepage` | | If specified, use code page when appropriate ** | |`cellFormula`| true | Save formulae to the .f field | |`cellHTML` | true | Parse rich text and save HTML to the `.h` field | |`cellNF` | false | Save number format string to the `.z` field | @@ -2230,6 +2230,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example, `mso-number-format` is a localized string containing the number format. Despite the metadata the output is valid HTML, although it does accept bare `&` symbols. +The writer adds type metadata to the TD elements via the `t` tag. The parser +looks for those tags and overrides the default interpretation. For example, text +like `12345` will be parsed as numbers but `12345` will +be parsed as text. + #### Rich Text Format (RTF) diff --git a/bits/79_html.js b/bits/79_html.js index ea6e672..5e56fcd 100644 --- a/bits/79_html.js +++ b/bits/79_html.js @@ -28,32 +28,25 @@ var HTML_ = (function() { var tag = parsexmltag(cell.slice(0, cell.indexOf(">"))); CS = tag.colspan ? +tag.colspan : 1; if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + var _t/*:string*/ = tag.t || ""; /* TODO: generate stub cells */ if(!m.length) { C += CS; continue; } m = htmldecode(unescapexml(m)); - if(range.s.r > R) range.s.r = R; - if(range.e.r < R) range.e.r = R; - if(range.s.c > C) range.s.c = C; - if(range.e.c < C) range.e.c = C; - if(opts.dense) { - if(!ws[R]) ws[R] = []; - if(!m.length){} - else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m}; - else if(m === 'TRUE') ws[R][C] = {t:'b', v:true}; - else if(m === 'FALSE') ws[R][C] = {t:'b', v:false}; - else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)}; - else ws[R][C] = {t:'s', v:m}; - } else { - var coord/*:string*/ = encode_cell({r:R, c:C}); - /* TODO: value parsing */ - if(!m.length){} - else if(opts.raw) ws[coord] = {t:'s', v:m}; - else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m}; - else if(m === 'TRUE') ws[coord] = {t:'b', v:true}; - else if(m === 'FALSE') ws[coord] = {t:'b', v:false}; - else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)}; - else ws[coord] = {t:'s', v:m}; + if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R; + if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C; + if(!m.length) continue; + var o/*:Cell*/ = {t:'s', v:m}; + if(opts.raw || !m.trim().length || _t == 's'){} + else if(m === 'TRUE') o = {t:'b', v:true}; + else if(m === 'FALSE') o = {t:'b', v:false}; + else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)}; + else if(!isNaN(fuzzydate(m).getDate())) { + o = ({t:'d', v:parseDate(m)}/*:any*/); + if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/); + o.z = opts.dateNF || SSF._table[14]; } + if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; } + else ws[encode_cell({r:R, c:C})] = o; C += CS; } } @@ -84,6 +77,7 @@ var HTML_ = (function() { var sp = {}; if(RS > 1) sp.rowspan = RS; if(CS > 1) sp.colspan = CS; + sp.t = cell.t; if(o.editable) w = '' + w + ''; sp.id = "sjs-" + coord; oo.push(writextag('td', w, sp)); @@ -142,10 +136,10 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { CS = +elt.getAttribute("colspan") || 1; if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o/*:Cell*/ = {t:'s', v:v}; + var _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { - if(v.length == 0) o.t = 'z'; - else if(opts.raw){} - else if(v.trim().length == 0) o.t = 's'; + if(v.length == 0) o.t = _t || 'z'; + else if(opts.raw || v.trim().length == 0 || _t == "s"){} else if(v === 'TRUE') o = {t:'b', v:true}; else if(v === 'FALSE') o = {t:'b', v:false}; else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)}; diff --git a/docbits/85_filetype.md b/docbits/85_filetype.md index e75f205..8aa4592 100644 --- a/docbits/85_filetype.md +++ b/docbits/85_filetype.md @@ -253,6 +253,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example, `mso-number-format` is a localized string containing the number format. Despite the metadata the output is valid HTML, although it does accept bare `&` symbols. +The writer adds type metadata to the TD elements via the `t` tag. The parser +looks for those tags and overrides the default interpretation. For example, text +like `12345` will be parsed as numbers but `12345` will +be parsed as text. + #### Rich Text Format (RTF) diff --git a/misc/docs/README.md b/misc/docs/README.md index 0f1bf4b..0bd4214 100644 --- a/misc/docs/README.md +++ b/misc/docs/README.md @@ -1364,7 +1364,7 @@ The exported `read` and `readFile` functions accept an options argument: | :---------- | ------: | :--------------------------------------------------- | |`type` | | Input data encoding (see Input Type below) | |`raw` | false | If true, plain text parsing will not parse values ** | -|`codepage` | 1252 | If specified, use code page when appropriate ** | +|`codepage` | | If specified, use code page when appropriate ** | |`cellFormula`| true | Save formulae to the .f field | |`cellHTML` | true | Parse rich text and save HTML to the `.h` field | |`cellNF` | false | Save number format string to the `.z` field | @@ -2027,6 +2027,11 @@ Excel HTML worksheets include special metadata encoded in styles. For example, `mso-number-format` is a localized string containing the number format. Despite the metadata the output is valid HTML, although it does accept bare `&` symbols. +The writer adds type metadata to the TD elements via the `t` tag. The parser +looks for those tags and overrides the default interpretation. For example, text +like `12345` will be parsed as numbers but `12345` will +be parsed as text. + #### Rich Text Format (RTF) diff --git a/test.js b/test.js index cce7b48..159c927 100644 --- a/test.js +++ b/test.js @@ -1881,6 +1881,20 @@ describe('HTML', function() { assert.equal(get_cell(ws, "A1").v, "A&B"); assert.equal(get_cell(ws, "B1").v, "A·B"); }); + describe('type override', function() { + function chk(ws) { + assert.equal(get_cell(ws, "A1").t, "s"); + assert.equal(get_cell(ws, "A1").v, "1234567890"); + assert.equal(get_cell(ws, "B1").t, "n"); + assert.equal(get_cell(ws, "B1").v, 1234567890); + } + var html = "
12345678901234567890
"; + it('HTML string', function() { + var ws = X.read(html, {type:'string'}).Sheets.Sheet1; chk(ws); + chk(X.read(X.utils.sheet_to_html(ws), {type:'string'}).Sheets.Sheet1); + }); + if(domtest) it('DOM', function() { chk(X.utils.table_to_sheet(get_dom_element(html))); }); + }); }); describe('js -> file -> js', function() { diff --git a/tests/core.js b/tests/core.js index cce7b48..159c927 100644 --- a/tests/core.js +++ b/tests/core.js @@ -1881,6 +1881,20 @@ describe('HTML', function() { assert.equal(get_cell(ws, "A1").v, "A&B"); assert.equal(get_cell(ws, "B1").v, "A·B"); }); + describe('type override', function() { + function chk(ws) { + assert.equal(get_cell(ws, "A1").t, "s"); + assert.equal(get_cell(ws, "A1").v, "1234567890"); + assert.equal(get_cell(ws, "B1").t, "n"); + assert.equal(get_cell(ws, "B1").v, 1234567890); + } + var html = "
12345678901234567890
"; + it('HTML string', function() { + var ws = X.read(html, {type:'string'}).Sheets.Sheet1; chk(ws); + chk(X.read(X.utils.sheet_to_html(ws), {type:'string'}).Sheets.Sheet1); + }); + if(domtest) it('DOM', function() { chk(X.utils.table_to_sheet(get_dom_element(html))); }); + }); }); describe('js -> file -> js', function() { diff --git a/xlsx.flow.js b/xlsx.flow.js index 49651a3..248821e 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -17358,32 +17358,25 @@ var HTML_ = (function() { var tag = parsexmltag(cell.slice(0, cell.indexOf(">"))); CS = tag.colspan ? +tag.colspan : 1; if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + var _t/*:string*/ = tag.t || ""; /* TODO: generate stub cells */ if(!m.length) { C += CS; continue; } m = htmldecode(unescapexml(m)); - if(range.s.r > R) range.s.r = R; - if(range.e.r < R) range.e.r = R; - if(range.s.c > C) range.s.c = C; - if(range.e.c < C) range.e.c = C; - if(opts.dense) { - if(!ws[R]) ws[R] = []; - if(!m.length){} - else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m}; - else if(m === 'TRUE') ws[R][C] = {t:'b', v:true}; - else if(m === 'FALSE') ws[R][C] = {t:'b', v:false}; - else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)}; - else ws[R][C] = {t:'s', v:m}; - } else { - var coord/*:string*/ = encode_cell({r:R, c:C}); - /* TODO: value parsing */ - if(!m.length){} - else if(opts.raw) ws[coord] = {t:'s', v:m}; - else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m}; - else if(m === 'TRUE') ws[coord] = {t:'b', v:true}; - else if(m === 'FALSE') ws[coord] = {t:'b', v:false}; - else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)}; - else ws[coord] = {t:'s', v:m}; + if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R; + if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C; + if(!m.length) continue; + var o/*:Cell*/ = {t:'s', v:m}; + if(opts.raw || !m.trim().length || _t == 's'){} + else if(m === 'TRUE') o = {t:'b', v:true}; + else if(m === 'FALSE') o = {t:'b', v:false}; + else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)}; + else if(!isNaN(fuzzydate(m).getDate())) { + o = ({t:'d', v:parseDate(m)}/*:any*/); + if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}/*:any*/); + o.z = opts.dateNF || SSF._table[14]; } + if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; } + else ws[encode_cell({r:R, c:C})] = o; C += CS; } } @@ -17414,6 +17407,7 @@ var HTML_ = (function() { var sp = {}; if(RS > 1) sp.rowspan = RS; if(CS > 1) sp.colspan = CS; + sp.t = cell.t; if(o.editable) w = '' + w + ''; sp.id = "sjs-" + coord; oo.push(writextag('td', w, sp)); @@ -17472,10 +17466,10 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ { CS = +elt.getAttribute("colspan") || 1; if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o/*:Cell*/ = {t:'s', v:v}; + var _t/*:string*/ = elt.getAttribute("t") || ""; if(v != null) { - if(v.length == 0) o.t = 'z'; - else if(opts.raw){} - else if(v.trim().length == 0) o.t = 's'; + if(v.length == 0) o.t = _t || 'z'; + else if(opts.raw || v.trim().length == 0 || _t == "s"){} else if(v === 'TRUE') o = {t:'b', v:true}; else if(v === 'FALSE') o = {t:'b', v:false}; else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)}; diff --git a/xlsx.js b/xlsx.js index e10fecd..dc8b0e6 100644 --- a/xlsx.js +++ b/xlsx.js @@ -17257,32 +17257,25 @@ var HTML_ = (function() { var tag = parsexmltag(cell.slice(0, cell.indexOf(">"))); CS = tag.colspan ? +tag.colspan : 1; if((RS = +tag.rowspan)>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); + var _t = tag.t || ""; /* TODO: generate stub cells */ if(!m.length) { C += CS; continue; } m = htmldecode(unescapexml(m)); - if(range.s.r > R) range.s.r = R; - if(range.e.r < R) range.e.r = R; - if(range.s.c > C) range.s.c = C; - if(range.e.c < C) range.e.c = C; - if(opts.dense) { - if(!ws[R]) ws[R] = []; - if(!m.length){} - else if(opts.raw || !m.trim().length) ws[R][C] = {t:'s', v:m}; - else if(m === 'TRUE') ws[R][C] = {t:'b', v:true}; - else if(m === 'FALSE') ws[R][C] = {t:'b', v:false}; - else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)}; - else ws[R][C] = {t:'s', v:m}; - } else { - var coord = encode_cell({r:R, c:C}); - /* TODO: value parsing */ - if(!m.length){} - else if(opts.raw) ws[coord] = {t:'s', v:m}; - else if(opts.raw || !m.trim().length) ws[coord] = {t:'s', v:m}; - else if(m === 'TRUE') ws[coord] = {t:'b', v:true}; - else if(m === 'FALSE') ws[coord] = {t:'b', v:false}; - else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)}; - else ws[coord] = {t:'s', v:m}; + if(range.s.r > R) range.s.r = R; if(range.e.r < R) range.e.r = R; + if(range.s.c > C) range.s.c = C; if(range.e.c < C) range.e.c = C; + if(!m.length) continue; + var o = {t:'s', v:m}; + if(opts.raw || !m.trim().length || _t == 's'){} + else if(m === 'TRUE') o = {t:'b', v:true}; + else if(m === 'FALSE') o = {t:'b', v:false}; + else if(!isNaN(fuzzynum(m))) o = {t:'n', v:fuzzynum(m)}; + else if(!isNaN(fuzzydate(m).getDate())) { + o = ({t:'d', v:parseDate(m)}); + if(!opts.cellDates) o = ({t:'n', v:datenum(o.v)}); + o.z = opts.dateNF || SSF._table[14]; } + if(opts.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = o; } + else ws[encode_cell({r:R, c:C})] = o; C += CS; } } @@ -17313,6 +17306,7 @@ var HTML_ = (function() { var sp = {}; if(RS > 1) sp.rowspan = RS; if(CS > 1) sp.colspan = CS; + sp.t = cell.t; if(o.editable) w = '' + w + ''; sp.id = "sjs-" + coord; oo.push(writextag('td', w, sp)); @@ -17371,10 +17365,10 @@ function parse_dom_table(table, _opts) { CS = +elt.getAttribute("colspan") || 1; if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}}); var o = {t:'s', v:v}; + var _t = elt.getAttribute("t") || ""; if(v != null) { - if(v.length == 0) o.t = 'z'; - else if(opts.raw){} - else if(v.trim().length == 0) o.t = 's'; + if(v.length == 0) o.t = _t || 'z'; + else if(opts.raw || v.trim().length == 0 || _t == "s"){} else if(v === 'TRUE') o = {t:'b', v:true}; else if(v === 'FALSE') o = {t:'b', v:false}; else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};