diff --git a/.travis.yml b/.travis.yml
index aaf734d..278255a 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -32,6 +32,8 @@ before_install:
- "npm install blanket"
- "npm install xlsjs"
- "npm install coveralls mocha-lcov-reporter"
+# note: jsdom 11.x expects node >= 6 but is missing engines.node
+ - "npm install jsdom@11.x"
before_script:
- "make init"
- "cd test_files; make all; cd -"
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 332de5f..8c7d18a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -4,6 +4,10 @@ This log is intended to keep track of backwards-incompatible changes, including
but not limited to API changes and file location changes. Minor behavioral
changes may not be included if they are not expected to break existing code.
+## Unreleased (2017-08-??)
+
+* XLS cell ixfe/XF removed
+
## 0.11.0 (2017-07-31)
* Strip `require` statements from minified version
diff --git a/README.md b/README.md
index e6d9ca2..09f07ad 100644
--- a/README.md
+++ b/README.md
@@ -1385,7 +1385,7 @@ The exported `read` and `readFile` functions accept an options argument:
| Option Name | Default | Description |
| :---------- | ------: | :--------------------------------------------------- |
| type | | Input data encoding (see Input Type below) |
-| raw | | If true, plaintext parsing will not parse values ** |
+| raw | false | If true, plaintext parsing will not parse values ** |
| cellFormula | true | Save formulae to the .f field |
| cellHTML | true | Parse rich text and save HTML to the `.h` field |
| cellNF | false | Save number format string to the `.z` field |
@@ -1473,8 +1473,8 @@ Plaintext format guessing follows the priority order:
| XML | starts with `<` |
| RTF | starts with `{\rt` |
| DSV | starts with `/sep=.$/`, separator is the specified character |
+| CSV | more unquoted `","` characters than `"\t"` chars in the first 1024 |
| TSV | one of the first 1024 characters is a tab char `"\t"` |
-| CSV | one of the first 1024 characters is a comma char `","` |
| PRN | (default) |
- HTML tags include: `html`, `table`, `head`, `meta`, `script`, `style`, `div`
@@ -1964,6 +1964,14 @@ writer proactively generates cells for formulae if values are unavailable.
Excel TXT uses tab as the delimiter and codepage 1200.
+Notes:
+
+- Like in Excel, files starting with `0x49 0x44 ("ID")` are treated as Symbolic
+ Link files. Unlike Excel, if the file does not have a valid SYLK header, it
+ will be proactively reinterpreted as CSV. There are some files with semicolon
+ delimiter that align with a valid SYLK file. For the broadest compatibility,
+ all cells with the value of `ID` are automatically wrapped in double-quotes.
+
### Other Workbook Formats
diff --git a/bits/40_harb.js b/bits/40_harb.js
index 01e83aa..abb14cf 100644
--- a/bits/40_harb.js
+++ b/bits/40_harb.js
@@ -512,6 +512,16 @@ var PRN = (function() {
return arr;
}
+ function guess_sep(str) {
+ var cnt = [], instr = false, end = 0, cc = 0;
+ for(;end < str.length;++end) {
+ if((cc=str.charCodeAt(end)) == 0x22) instr = !instr;
+ else if(!instr) cnt[cc] = (cnt[cc]||0)+1;
+ }
+ if(cnt[0x2C] > cnt[0x09]) return ",";
+ return ",";
+ }
+
function dsv_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
var o = opts || {};
var sep = "";
@@ -519,9 +529,8 @@ var PRN = (function() {
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:0}}/*:any*/);
- /* known sep */
if(str.substr(0,4) == "sep=" && str.charCodeAt(5) == 10) { sep = str.charAt(4); str = str.substr(6); }
- else if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t";
+ else sep = guess_sep(str.substr(0,1024));
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0;
str = str.replace(/\r\n/mg, "\n");
@@ -529,24 +538,30 @@ var PRN = (function() {
function finish_cell() {
var s = str.slice(start, end);
var cell = ({}/*:any*/);
- if(o.raw) { cell.t = 's'; cell.v = s; }
- else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
+ if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
+ if(s.length == 0) cell.t = 'z';
+ else if(o.raw) { cell.t = 's'; cell.v = s; }
+ else if(s.charCodeAt(0) == 0x3D) {
+ if(s.charCodeAt(1) == 0x22 && s.charCodeAt(s.length - 1) == 0x22) { cell.t = 's'; cell.v = s.slice(2,-1).replace(/""/g,'"'); }
+ else if(fuzzyfmla(s)) { cell.t = 'n'; cell.f = s.substr(1); }
+ else { cell.t = 's'; cell.v = s; } }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
- else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
+ else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; if(o.cellText !== false) cell.w = s; cell.v = v; }
else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) {
cell.z = o.dateNF || SSF._table[14];
var k = 0;
if(_re && s.match(_re)){ s=dateNF_fix(s, o.dateNF, (s.match(_re)||[])); k=1; }
if(o.cellDates) { cell.t = 'd'; cell.v = parseDate(s, k); }
else { cell.t = 'n'; cell.v = datenum(parseDate(s, k)); }
- cell.w = SSF.format(cell.z, cell.v instanceof Date ? datenum(cell.v):cell.v);
+ if(o.cellText !== false) cell.w = SSF.format(cell.z, cell.v instanceof Date ? datenum(cell.v):cell.v);
+ if(!o.cellNF) delete cell.z;
} else {
cell.t = 's';
- if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
cell.v = s;
}
- if(o.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = cell; }
+ if(cell.t == 'z'){}
+ else if(o.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = cell; }
else ws[encode_cell({c:C,r:R})] = cell;
start = end+1;
if(range.e.c < C) range.e.c = C;
@@ -579,7 +594,7 @@ var PRN = (function() {
case 'array': str = cc2str(d); break;
default: throw new Error("Unrecognized type " + opts.type);
}
- if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str);
+ if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
return prn_to_sheet_str(str, opts);
}
diff --git a/bits/61_fcommon.js b/bits/61_fcommon.js
index a23bb9a..1590c40 100644
--- a/bits/61_fcommon.js
+++ b/bits/61_fcommon.js
@@ -40,3 +40,9 @@ function shift_formula_xlsx(f/*:string*/, range/*:string*/, cell/*:string*/)/*:s
var delta = {r:c.r - s.r, c:c.c - s.c};
return shift_formula_str(f, delta);
}
+
+/* TODO: parse formula */
+function fuzzyfmla(f/*:string*/)/*:boolean*/ {
+ if(f.length == 1) return false;
+ return true;
+}
diff --git a/bits/76_xls.js b/bits/76_xls.js
index 60f26ed..32287b5 100644
--- a/bits/76_xls.js
+++ b/bits/76_xls.js
@@ -131,6 +131,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
if(file_depth > 1) return;
if(!cell_valid) return;
if(options.cellStyles && line.XF && line.XF.data) process_cell_style(cell, line, options);
+ delete line.ixfe; delete line.XF;
lastcell = cell;
last_cell = encode_cell(cell);
if(range.s) {
@@ -240,8 +241,11 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 'FileSharing': break; //TODO
case 'CodePage':
/* overrides based on test cases */
- if(val === 0x5212) val = 1200;
- else if(val === 0x8001) val = 1252;
+ switch(val) {
+ case 0x5212: val = 1200; break;
+ case 0x8000: val = 10000; break;
+ case 0x8001: val = 1252; break;
+ }
opts.codepage = val;
set_cp(val);
break;
diff --git a/bits/79_html.js b/bits/79_html.js
index 8ff4bf6..ef1f835 100644
--- a/bits/79_html.js
+++ b/bits/79_html.js
@@ -37,13 +37,19 @@ var HTML_ = (function() {
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
- if(opts.raw) ws[R][C] = {t:'s', v:m};
+ if(!m.length){}
+ else if(opts.raw) ws[R][C] = {t:'s', v:m};
+ else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
+ else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
- if(opts.raw) ws[coord] = {t:'s', v:m};
+ if(!m.length){}
+ else if(opts.raw) ws[coord] = {t:'s', v:m};
+ else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
+ else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
}
@@ -126,7 +132,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
var row = rows[R];
var elts = row.children;
for(_C = C = 0; _C < elts.length; ++_C) {
- var elt = elts[_C], v = elts[_C].innerText || elts[_C].textContent;
+ var elt = elts[_C], v = elts[_C].innerText || elts[_C].textContent || "";
for(midx = 0; midx < merges.length; ++midx) {
var m = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
@@ -135,8 +141,11 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
- if(v != null && v.length) {
- if(opts.raw) o = {t:'s', v:v};
+ if(v != null) {
+ if(v.length == 0) o.t = 'z';
+ else if(opts.raw){}
+ else if(v === 'TRUE') o = {t:'b', v:true};
+ else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
else if(!isNaN(fuzzydate(v).getDate())) {
o = ({t:'d', v:parseDate(v)}/*:any*/);
diff --git a/docbits/80_parseopts.md b/docbits/80_parseopts.md
index c4c4b1c..0dce235 100644
--- a/docbits/80_parseopts.md
+++ b/docbits/80_parseopts.md
@@ -5,7 +5,7 @@ The exported `read` and `readFile` functions accept an options argument:
| Option Name | Default | Description |
| :---------- | ------: | :--------------------------------------------------- |
| type | | Input data encoding (see Input Type below) |
-| raw | | If true, plaintext parsing will not parse values ** |
+| raw | false | If true, plaintext parsing will not parse values ** |
| cellFormula | true | Save formulae to the .f field |
| cellHTML | true | Parse rich text and save HTML to the `.h` field |
| cellNF | false | Save number format string to the `.z` field |
@@ -93,8 +93,8 @@ Plaintext format guessing follows the priority order:
| XML | starts with `<` |
| RTF | starts with `{\rt` |
| DSV | starts with `/sep=.$/`, separator is the specified character |
+| CSV | more unquoted `","` characters than `"\t"` chars in the first 1024 |
| TSV | one of the first 1024 characters is a tab char `"\t"` |
-| CSV | one of the first 1024 characters is a comma char `","` |
| PRN | (default) |
- HTML tags include: `html`, `table`, `head`, `meta`, `script`, `style`, `div`
diff --git a/docbits/85_filetype.md b/docbits/85_filetype.md
index cbffded..1772a8c 100644
--- a/docbits/85_filetype.md
+++ b/docbits/85_filetype.md
@@ -113,6 +113,14 @@ writer proactively generates cells for formulae if values are unavailable.
Excel TXT uses tab as the delimiter and codepage 1200.
+Notes:
+
+- Like in Excel, files starting with `0x49 0x44 ("ID")` are treated as Symbolic
+ Link files. Unlike Excel, if the file does not have a valid SYLK header, it
+ will be proactively reinterpreted as CSV. There are some files with semicolon
+ delimiter that align with a valid SYLK file. For the broadest compatibility,
+ all cells with the value of `ID` are automatically wrapped in double-quotes.
+
### Other Workbook Formats
diff --git a/misc/docs/README.md b/misc/docs/README.md
index 901bd87..ea0fe00 100644
--- a/misc/docs/README.md
+++ b/misc/docs/README.md
@@ -1274,7 +1274,7 @@ The exported `read` and `readFile` functions accept an options argument:
| Option Name | Default | Description |
| :---------- | ------: | :--------------------------------------------------- |
| type | | Input data encoding (see Input Type below) |
-| raw | | If true, plaintext parsing will not parse values ** |
+| raw | false | If true, plaintext parsing will not parse values ** |
| cellFormula | true | Save formulae to the .f field |
| cellHTML | true | Parse rich text and save HTML to the `.h` field |
| cellNF | false | Save number format string to the `.z` field |
@@ -1360,8 +1360,8 @@ Plaintext format guessing follows the priority order:
| XML | starts with `<` |
| RTF | starts with `{\rt` |
| DSV | starts with `/sep=.$/`, separator is the specified character |
+| CSV | more unquoted `","` characters than `"\t"` chars in the first 1024 |
| TSV | one of the first 1024 characters is a tab char `"\t"` |
-| CSV | one of the first 1024 characters is a comma char `","` |
| PRN | (default) |
- HTML tags include: `html`, `table`, `head`, `meta`, `script`, `style`, `div`
@@ -1809,6 +1809,14 @@ writer proactively generates cells for formulae if values are unavailable.
Excel TXT uses tab as the delimiter and codepage 1200.
+Notes:
+
+- Like in Excel, files starting with `0x49 0x44 ("ID")` are treated as Symbolic
+ Link files. Unlike Excel, if the file does not have a valid SYLK header, it
+ will be proactively reinterpreted as CSV. There are some files with semicolon
+ delimiter that align with a valid SYLK file. For the broadest compatibility,
+ all cells with the value of `ID` are automatically wrapped in double-quotes.
+
### Other Workbook Formats
diff --git a/package.json b/package.json
index c7c952e..c771ecd 100644
--- a/package.json
+++ b/package.json
@@ -31,6 +31,7 @@
"@sheetjs/uglify-js":"~2.7.3",
"@types/node":"^8.0.7",
"@types/commander":"^2.9.0",
+ "jsdom": "~11.1.0",
"dtslint": "^0.1.2",
"typescript": "2.2.0"
},
diff --git a/test.js b/test.js
index 780aa7b..57934de 100644
--- a/test.js
+++ b/test.js
@@ -1723,6 +1723,39 @@ describe('json output', function() {
});
});
+
+var codes = [["あ 1", "\u00E3\u0081\u0082 1"]]
+var plaintext_val = [
+ ["A1", 'n', -0.08, "-0.08"],
+ ["B1", 'n', 4001, "4,001"],
+ ["C1", 's', "あ 1", "あ 1"],
+ ["A2", 'n', 41.08, "$41.08"],
+ ["B2", 'n', 0.11, "11%"],
+ ["B3", 'b', true, "TRUE"],
+ ["C3", 'b', false, "FALSE"],
+ ["A3"]];
+function plaintext_test(wb, raw, sn) {
+ var sheet = wb.Sheets[sn || wb.SheetNames[0]];
+ plaintext_val.forEach(function(x) {
+ var cell = get_cell(sheet, x[0]);
+ if(x.length == 1) { if(cell) { assert.equal(cell.t, 'z'); assert(!cell.v); } return; }
+ assert.equal(cell.v, x[2+!!raw]); assert.equal(cell.t, raw ? 's' : x[1]);
+ });
+}
+function make_html_str(idx) { return ["
",
+ "-0.08 | 4,001 | ", codes[0][idx], " |
",
+ "$41.08 | 11% |
",
+ " | TRUE | FALSE |
",
+"
" ].join(""); }
+function make_csv_str(idx) { return [ '\u00EF\u00BB\u00BF' +
+ '-0.08,"4,001",' + codes[0][idx] + '',
+ '$41.08,11%',
+ ',TRUE,FALSE'
+].join("\n"); }
+var html_bstr = make_html_str(1), html_str = make_html_str(0);
+var csv_bstr = make_csv_str(1);
+
+
describe('csv', function() {
describe('input', function(){
var b = "1,2,3,\nTRUE,FALSE,,sheetjs\nfoo,bar,2/19/14,0.3\n,,,\nbaz,,qux,\n";
@@ -1769,6 +1802,17 @@ describe('csv', function() {
assert.equal(cell.v.getMonth(), 2);
assert.equal(cell.w, "2/3/14");
});
+ it('should interpret values by default', function() { plaintext_test(X.read(csv_bstr, {type:"binary"}), false); });
+ it('should generate strings if raw option is passed', function() { plaintext_test(X.read(csv_bstr, {type:"binary", raw:true}), true); });
+ it('should handle formulae', function() {
+ var bb = '=,=1+1,="100"';
+ var sheet = X.read(bb, {type:"binary"}).Sheets.Sheet1;
+ assert.equal(get_cell(sheet, "A1").t, 's');
+ assert.equal(get_cell(sheet, "A1").v, '=');
+ assert.equal(get_cell(sheet, "B1").f, '1+1');
+ assert.equal(get_cell(sheet, "C1").t, 's');
+ assert.equal(get_cell(sheet, "C1").v, '100');
+ });
});
describe('output', function(){
var data, ws;
@@ -1845,37 +1889,26 @@ describe('csv', function() {
});
});
+var JSDOM = null;
+var domtest = browser || (function(){try{return !!(JSDOM=require('jsdom').JSDOM);}catch(e){return 0;}})();
+
+function get_dom_element(html) {
+ if(browser) {
+ var domelt = document.createElement('div');
+ domelt.innerHTML = html;
+ return domelt;
+ }
+ return new JSDOM(html).window.document.body.children[0];
+}
+
describe('HTML', function() {
- describe('input', function(){
- var b = "-0.08 | 4,001 | \u00e3\u0081\u0082 1 |
$41.08 | 11% |
";
- it('should generate numbers by default', function() {
- var sheet = X.read(b, {type:"binary"}).Sheets.Sheet1;
- var cell = get_cell(sheet, "A1");
- assert.equal(cell.v, -0.08);
- assert.equal(cell.t, 'n');
- cell = get_cell(sheet, "B1");
- assert.equal(cell.v, 4001);
- cell = get_cell(sheet, "C1");
- assert.equal(cell.v, "あ 1");
- cell = get_cell(sheet, "A2");
- assert.equal(cell.v, 41.08);
- cell = get_cell(sheet, "B2");
- assert.equal(cell.v, .11);
- });
- it('should generate strings if raw option is passed', function() {
- var sheet = X.read(b, {type:"binary", raw:true}).Sheets.Sheet1;
- var cell = get_cell(sheet, "A1");
- assert.equal(cell.v, "-0.08");
- assert.equal(cell.t, 's');
- cell = get_cell(sheet, "B1");
- assert.equal(cell.v, "4,001");
- cell = get_cell(sheet, "C1");
- assert.equal(cell.v, "あ 1");
- cell = get_cell(sheet, "A2");
- assert.equal(cell.v, "$41.08");
- cell = get_cell(sheet, "B2");
- assert.equal(cell.v, "11%");
- });
+ describe('input string', function(){
+ it('should interpret values by default', function() { plaintext_test(X.read(html_bstr, {type:"binary"}), false); });
+ it('should generate strings if raw option is passed', function() { plaintext_test(X.read(html_bstr, {type:"binary", raw:true}), true); });
+ });
+ (domtest ? describe : describe.skip)('input DOM', function() {
+ it('should interpret values by default', function() { plaintext_test(X.utils.table_to_book(get_dom_element(html_str)), false); });
+ it('should generate strings if raw option is passed', function() { plaintext_test(X.utils.table_to_book(get_dom_element(html_str), {raw:true}), true); });
});
});
diff --git a/tests/core.js b/tests/core.js
index c4aec3e..d5a7079 100644
--- a/tests/core.js
+++ b/tests/core.js
@@ -1723,6 +1723,39 @@ describe('json output', function() {
});
});
+
+var codes = [["あ 1", "\u00E3\u0081\u0082 1"]]
+var plaintext_val = [
+ ["A1", 'n', -0.08, "-0.08"],
+ ["B1", 'n', 4001, "4,001"],
+ ["C1", 's', "あ 1", "あ 1"],
+ ["A2", 'n', 41.08, "$41.08"],
+ ["B2", 'n', 0.11, "11%"],
+ ["B3", 'b', true, "TRUE"],
+ ["C3", 'b', false, "FALSE"],
+ ["A3"]];
+function plaintext_test(wb, raw, sn) {
+ var sheet = wb.Sheets[sn || wb.SheetNames[0]];
+ plaintext_val.forEach(function(x) {
+ var cell = get_cell(sheet, x[0]);
+ if(x.length == 1) { if(cell) { assert.equal(cell.t, 'z'); assert(!cell.v); } return; }
+ assert.equal(cell.v, x[2+!!raw]); assert.equal(cell.t, raw ? 's' : x[1]);
+ });
+}
+function make_html_str(idx) { return ["",
+ "-0.08 | 4,001 | ", codes[0][idx], " |
",
+ "$41.08 | 11% |
",
+ " | TRUE | FALSE |
",
+"
" ].join(""); }
+function make_csv_str(idx) { return [ '\u00EF\u00BB\u00BF' +
+ '-0.08,"4,001",' + codes[0][idx] + '',
+ '$41.08,11%',
+ ',TRUE,FALSE'
+].join("\n"); }
+var html_bstr = make_html_str(1), html_str = make_html_str(0);
+var csv_bstr = make_csv_str(1);
+
+
describe('csv', function() {
describe('input', function(){
var b = "1,2,3,\nTRUE,FALSE,,sheetjs\nfoo,bar,2/19/14,0.3\n,,,\nbaz,,qux,\n";
@@ -1769,6 +1802,17 @@ describe('csv', function() {
assert.equal(cell.v.getMonth(), 2);
assert.equal(cell.w, "2/3/14");
});
+ it('should interpret values by default', function() { plaintext_test(X.read(csv_bstr, {type:"binary"}), false); });
+ it('should generate strings if raw option is passed', function() { plaintext_test(X.read(csv_bstr, {type:"binary", raw:true}), true); });
+ it('should handle formulae', function() {
+ var bb = '=,=1+1,="100"';
+ var sheet = X.read(bb, {type:"binary"}).Sheets.Sheet1;
+ assert.equal(get_cell(sheet, "A1").t, 's');
+ assert.equal(get_cell(sheet, "A1").v, '=');
+ assert.equal(get_cell(sheet, "B1").f, '1+1');
+ assert.equal(get_cell(sheet, "C1").t, 's');
+ assert.equal(get_cell(sheet, "C1").v, '100');
+ });
});
describe('output', function(){
var data, ws;
@@ -1845,37 +1889,26 @@ describe('csv', function() {
});
});
+var JSDOM = null;
+var domtest = browser || (function(){try{return !!(JSDOM=require('jsdom').JSDOM);}catch(e){return 0;}})();
+
+function get_dom_element(html) {
+ if(browser) {
+ var domelt = document.createElement('div');
+ domelt.innerHTML = html;
+ return domelt;
+ }
+ return new JSDOM(html).window.document.body.children[0];
+}
+
describe('HTML', function() {
- describe('input', function(){
- var b = "-0.08 | 4,001 | \u00e3\u0081\u0082 1 |
$41.08 | 11% |
";
- it('should generate numbers by default', function() {
- var sheet = X.read(b, {type:"binary"}).Sheets.Sheet1;
- var cell = get_cell(sheet, "A1");
- assert.equal(cell.v, -0.08);
- assert.equal(cell.t, 'n');
- cell = get_cell(sheet, "B1");
- assert.equal(cell.v, 4001);
- cell = get_cell(sheet, "C1");
- assert.equal(cell.v, "あ 1");
- cell = get_cell(sheet, "A2");
- assert.equal(cell.v, 41.08);
- cell = get_cell(sheet, "B2");
- assert.equal(cell.v, .11);
- });
- it('should generate strings if raw option is passed', function() {
- var sheet = X.read(b, {type:"binary", raw:true}).Sheets.Sheet1;
- var cell = get_cell(sheet, "A1");
- assert.equal(cell.v, "-0.08");
- assert.equal(cell.t, 's');
- cell = get_cell(sheet, "B1");
- assert.equal(cell.v, "4,001");
- cell = get_cell(sheet, "C1");
- assert.equal(cell.v, "あ 1");
- cell = get_cell(sheet, "A2");
- assert.equal(cell.v, "$41.08");
- cell = get_cell(sheet, "B2");
- assert.equal(cell.v, "11%");
- });
+ describe('input string', function(){
+ it('should interpret values by default', function() { plaintext_test(X.read(html_bstr, {type:"binary"}), false); });
+ it('should generate strings if raw option is passed', function() { plaintext_test(X.read(html_bstr, {type:"binary", raw:true}), true); });
+ });
+ (domtest ? describe : describe.skip)('input DOM', function() {
+ it('should interpret values by default', function() { plaintext_test(X.utils.table_to_book(get_dom_element(html_str)), false); });
+ it('should generate strings if raw option is passed', function() { plaintext_test(X.utils.table_to_book(get_dom_element(html_str), {raw:true}), true); });
});
});
diff --git a/types/write.ts b/types/write.ts
index ae83389..540f7ef 100644
--- a/types/write.ts
+++ b/types/write.ts
@@ -80,7 +80,7 @@ ws['!rows'] = wsrows;
/* TEST: hyperlink note: Excel does not automatically style hyperlinks */
(ws['A3']).l = { Target: "http://sheetjs.com", Tooltip: "Visit us " };
-XLSX.utils.cell_set_hyperlink(ws['A3'], "http://sheetjs.com", "Visit us " );
+XLSX.utils.cell_set_hyperlink(ws['A3'], "http://sheetjs.com", "Visit us ");
/* TEST: built-in format */
(ws['B1']).z = "0%"; // Format Code 9
diff --git a/xlsx.flow.js b/xlsx.flow.js
index 038b89b..68aa9ad 100644
--- a/xlsx.flow.js
+++ b/xlsx.flow.js
@@ -5786,6 +5786,16 @@ var PRN = (function() {
return arr;
}
+ function guess_sep(str) {
+ var cnt = [], instr = false, end = 0, cc = 0;
+ for(;end < str.length;++end) {
+ if((cc=str.charCodeAt(end)) == 0x22) instr = !instr;
+ else if(!instr) cnt[cc] = (cnt[cc]||0)+1;
+ }
+ if(cnt[0x2C] > cnt[0x09]) return ",";
+ return ",";
+ }
+
function dsv_to_sheet_str(str/*:string*/, opts)/*:Worksheet*/ {
var o = opts || {};
var sep = "";
@@ -5793,9 +5803,8 @@ var PRN = (function() {
var ws/*:Worksheet*/ = o.dense ? ([]/*:any*/) : ({}/*:any*/);
var range/*:Range*/ = ({s: {c:0, r:0}, e: {c:0, r:0}}/*:any*/);
- /* known sep */
if(str.substr(0,4) == "sep=" && str.charCodeAt(5) == 10) { sep = str.charAt(4); str = str.substr(6); }
- else if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t";
+ else sep = guess_sep(str.substr(0,1024));
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0;
str = str.replace(/\r\n/mg, "\n");
@@ -5803,24 +5812,30 @@ var PRN = (function() {
function finish_cell() {
var s = str.slice(start, end);
var cell = ({}/*:any*/);
- if(o.raw) { cell.t = 's'; cell.v = s; }
- else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
+ if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
+ if(s.length == 0) cell.t = 'z';
+ else if(o.raw) { cell.t = 's'; cell.v = s; }
+ else if(s.charCodeAt(0) == 0x3D) {
+ if(s.charCodeAt(1) == 0x22 && s.charCodeAt(s.length - 1) == 0x22) { cell.t = 's'; cell.v = s.slice(2,-1).replace(/""/g,'"'); }
+ else if(fuzzyfmla(s)) { cell.t = 'n'; cell.f = s.substr(1); }
+ else { cell.t = 's'; cell.v = s; } }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
- else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
+ else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; if(o.cellText !== false) cell.w = s; cell.v = v; }
else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) {
cell.z = o.dateNF || SSF._table[14];
var k = 0;
if(_re && s.match(_re)){ s=dateNF_fix(s, o.dateNF, (s.match(_re)||[])); k=1; }
if(o.cellDates) { cell.t = 'd'; cell.v = parseDate(s, k); }
else { cell.t = 'n'; cell.v = datenum(parseDate(s, k)); }
- cell.w = SSF.format(cell.z, cell.v instanceof Date ? datenum(cell.v):cell.v);
+ if(o.cellText !== false) cell.w = SSF.format(cell.z, cell.v instanceof Date ? datenum(cell.v):cell.v);
+ if(!o.cellNF) delete cell.z;
} else {
cell.t = 's';
- if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
cell.v = s;
}
- if(o.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = cell; }
+ if(cell.t == 'z'){}
+ else if(o.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = cell; }
else ws[encode_cell({c:C,r:R})] = cell;
start = end+1;
if(range.e.c < C) range.e.c = C;
@@ -5853,7 +5868,7 @@ var PRN = (function() {
case 'array': str = cc2str(d); break;
default: throw new Error("Unrecognized type " + opts.type);
}
- if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str);
+ if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
return prn_to_sheet_str(str, opts);
}
@@ -8466,6 +8481,12 @@ function shift_formula_xlsx(f/*:string*/, range/*:string*/, cell/*:string*/)/*:s
var delta = {r:c.r - s.r, c:c.c - s.c};
return shift_formula_str(f, delta);
}
+
+/* TODO: parse formula */
+function fuzzyfmla(f/*:string*/)/*:boolean*/ {
+ if(f.length == 1) return false;
+ return true;
+}
/* --- formula references point to MS-XLS --- */
/* Small helpers */
function parseread(l) { return function(blob, length) { blob.l+=l; return; }; }
@@ -14285,6 +14306,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
if(file_depth > 1) return;
if(!cell_valid) return;
if(options.cellStyles && line.XF && line.XF.data) process_cell_style(cell, line, options);
+ delete line.ixfe; delete line.XF;
lastcell = cell;
last_cell = encode_cell(cell);
if(range.s) {
@@ -14394,8 +14416,11 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 'FileSharing': break; //TODO
case 'CodePage':
/* overrides based on test cases */
- if(val === 0x5212) val = 1200;
- else if(val === 0x8001) val = 1252;
+ switch(val) {
+ case 0x5212: val = 1200; break;
+ case 0x8000: val = 10000; break;
+ case 0x8001: val = 1252; break;
+ }
opts.codepage = val;
set_cp(val);
break;
@@ -16412,13 +16437,19 @@ var HTML_ = (function() {
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
- if(opts.raw) ws[R][C] = {t:'s', v:m};
+ if(!m.length){}
+ else if(opts.raw) ws[R][C] = {t:'s', v:m};
+ else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
+ else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord/*:string*/ = encode_cell({r:R, c:C});
/* TODO: value parsing */
- if(opts.raw) ws[coord] = {t:'s', v:m};
+ if(!m.length){}
+ else if(opts.raw) ws[coord] = {t:'s', v:m};
+ else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
+ else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
}
@@ -16501,7 +16532,7 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
var row = rows[R];
var elts = row.children;
for(_C = C = 0; _C < elts.length; ++_C) {
- var elt = elts[_C], v = elts[_C].innerText || elts[_C].textContent;
+ var elt = elts[_C], v = elts[_C].innerText || elts[_C].textContent || "";
for(midx = 0; midx < merges.length; ++midx) {
var m = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
@@ -16510,8 +16541,11 @@ function parse_dom_table(table/*:HTMLElement*/, _opts/*:?any*/)/*:Worksheet*/ {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o/*:Cell*/ = {t:'s', v:v};
- if(v != null && v.length) {
- if(opts.raw) o = {t:'s', v:v};
+ if(v != null) {
+ if(v.length == 0) o.t = 'z';
+ else if(opts.raw){}
+ else if(v === 'TRUE') o = {t:'b', v:true};
+ else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
else if(!isNaN(fuzzydate(v).getDate())) {
o = ({t:'d', v:parseDate(v)}/*:any*/);
diff --git a/xlsx.js b/xlsx.js
index 8ee8c93..5bccca1 100644
--- a/xlsx.js
+++ b/xlsx.js
@@ -5712,6 +5712,16 @@ var PRN = (function() {
return arr;
}
+ function guess_sep(str) {
+ var cnt = [], instr = false, end = 0, cc = 0;
+ for(;end < str.length;++end) {
+ if((cc=str.charCodeAt(end)) == 0x22) instr = !instr;
+ else if(!instr) cnt[cc] = (cnt[cc]||0)+1;
+ }
+ if(cnt[0x2C] > cnt[0x09]) return ",";
+ return ",";
+ }
+
function dsv_to_sheet_str(str, opts) {
var o = opts || {};
var sep = "";
@@ -5719,9 +5729,8 @@ var PRN = (function() {
var ws = o.dense ? ([]) : ({});
var range = ({s: {c:0, r:0}, e: {c:0, r:0}});
- /* known sep */
if(str.substr(0,4) == "sep=" && str.charCodeAt(5) == 10) { sep = str.charAt(4); str = str.substr(6); }
- else if(str.substr(0,1024).indexOf("\t") == -1) sep = ","; else sep = "\t";
+ else sep = guess_sep(str.substr(0,1024));
var R = 0, C = 0, v = 0;
var start = 0, end = 0, sepcc = sep.charCodeAt(0), instr = false, cc=0;
str = str.replace(/\r\n/mg, "\n");
@@ -5729,24 +5738,30 @@ var PRN = (function() {
function finish_cell() {
var s = str.slice(start, end);
var cell = ({});
- if(o.raw) { cell.t = 's'; cell.v = s; }
- else if(s.charCodeAt(0) == 0x3D) { cell.t = 'n'; cell.f = s.substr(1); }
+ if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
+ if(s.length == 0) cell.t = 'z';
+ else if(o.raw) { cell.t = 's'; cell.v = s; }
+ else if(s.charCodeAt(0) == 0x3D) {
+ if(s.charCodeAt(1) == 0x22 && s.charCodeAt(s.length - 1) == 0x22) { cell.t = 's'; cell.v = s.slice(2,-1).replace(/""/g,'"'); }
+ else if(fuzzyfmla(s)) { cell.t = 'n'; cell.f = s.substr(1); }
+ else { cell.t = 's'; cell.v = s; } }
else if(s == "TRUE") { cell.t = 'b'; cell.v = true; }
else if(s == "FALSE") { cell.t = 'b'; cell.v = false; }
- else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; cell.w = s; cell.v = v; }
+ else if(!isNaN(v = fuzzynum(s))) { cell.t = 'n'; if(o.cellText !== false) cell.w = s; cell.v = v; }
else if(!isNaN(fuzzydate(s).getDate()) || _re && s.match(_re)) {
cell.z = o.dateNF || SSF._table[14];
var k = 0;
if(_re && s.match(_re)){ s=dateNF_fix(s, o.dateNF, (s.match(_re)||[])); k=1; }
if(o.cellDates) { cell.t = 'd'; cell.v = parseDate(s, k); }
else { cell.t = 'n'; cell.v = datenum(parseDate(s, k)); }
- cell.w = SSF.format(cell.z, cell.v instanceof Date ? datenum(cell.v):cell.v);
+ if(o.cellText !== false) cell.w = SSF.format(cell.z, cell.v instanceof Date ? datenum(cell.v):cell.v);
+ if(!o.cellNF) delete cell.z;
} else {
cell.t = 's';
- if(s.charAt(0) == '"' && s.charAt(s.length - 1) == '"') s = s.slice(1,-1).replace(/""/g,'"');
cell.v = s;
}
- if(o.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = cell; }
+ if(cell.t == 'z'){}
+ else if(o.dense) { if(!ws[R]) ws[R] = []; ws[R][C] = cell; }
else ws[encode_cell({c:C,r:R})] = cell;
start = end+1;
if(range.e.c < C) range.e.c = C;
@@ -5779,7 +5794,7 @@ var PRN = (function() {
case 'array': str = cc2str(d); break;
default: throw new Error("Unrecognized type " + opts.type);
}
- if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str);
+ if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
return prn_to_sheet_str(str, opts);
}
@@ -8389,6 +8404,12 @@ function shift_formula_xlsx(f, range, cell) {
var delta = {r:c.r - s.r, c:c.c - s.c};
return shift_formula_str(f, delta);
}
+
+/* TODO: parse formula */
+function fuzzyfmla(f) {
+ if(f.length == 1) return false;
+ return true;
+}
/* --- formula references point to MS-XLS --- */
/* Small helpers */
function parseread(l) { return function(blob, length) { blob.l+=l; return; }; }
@@ -14200,6 +14221,7 @@ function parse_workbook(blob, options) {
if(file_depth > 1) return;
if(!cell_valid) return;
if(options.cellStyles && line.XF && line.XF.data) process_cell_style(cell, line, options);
+ delete line.ixfe; delete line.XF;
lastcell = cell;
last_cell = encode_cell(cell);
if(range.s) {
@@ -14308,8 +14330,11 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
case 'FileSharing': break; //TODO
case 'CodePage':
/* overrides based on test cases */
- if(val === 0x5212) val = 1200;
- else if(val === 0x8001) val = 1252;
+ switch(val) {
+ case 0x5212: val = 1200; break;
+ case 0x8000: val = 10000; break;
+ case 0x8001: val = 1252; break;
+ }
opts.codepage = val;
set_cp(val);
break;
@@ -16326,13 +16351,19 @@ var HTML_ = (function() {
if(range.e.c < C) range.e.c = C;
if(opts.dense) {
if(!ws[R]) ws[R] = [];
- if(opts.raw) ws[R][C] = {t:'s', v:m};
+ if(!m.length){}
+ else if(opts.raw) ws[R][C] = {t:'s', v:m};
+ else if(m === 'TRUE') ws[R][C] = {t:'b', v:true};
+ else if(m === 'FALSE') ws[R][C] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[R][C] = {t:'n', v:fuzzynum(m)};
else ws[R][C] = {t:'s', v:m};
} else {
var coord = encode_cell({r:R, c:C});
/* TODO: value parsing */
- if(opts.raw) ws[coord] = {t:'s', v:m};
+ if(!m.length){}
+ else if(opts.raw) ws[coord] = {t:'s', v:m};
+ else if(m === 'TRUE') ws[coord] = {t:'b', v:true};
+ else if(m === 'FALSE') ws[coord] = {t:'b', v:false};
else if(!isNaN(fuzzynum(m))) ws[coord] = {t:'n', v:fuzzynum(m)};
else ws[coord] = {t:'s', v:m};
}
@@ -16415,7 +16446,7 @@ function parse_dom_table(table, _opts) {
var row = rows[R];
var elts = row.children;
for(_C = C = 0; _C < elts.length; ++_C) {
- var elt = elts[_C], v = elts[_C].innerText || elts[_C].textContent;
+ var elt = elts[_C], v = elts[_C].innerText || elts[_C].textContent || "";
for(midx = 0; midx < merges.length; ++midx) {
var m = merges[midx];
if(m.s.c == C && m.s.r <= R && R <= m.e.r) { C = m.e.c+1; midx = -1; }
@@ -16424,8 +16455,11 @@ function parse_dom_table(table, _opts) {
CS = +elt.getAttribute("colspan") || 1;
if((RS = +elt.getAttribute("rowspan"))>0 || CS>1) merges.push({s:{r:R,c:C},e:{r:R + (RS||1) - 1, c:C + CS - 1}});
var o = {t:'s', v:v};
- if(v != null && v.length) {
- if(opts.raw) o = {t:'s', v:v};
+ if(v != null) {
+ if(v.length == 0) o.t = 'z';
+ else if(opts.raw){}
+ else if(v === 'TRUE') o = {t:'b', v:true};
+ else if(v === 'FALSE') o = {t:'b', v:false};
else if(!isNaN(fuzzynum(v))) o = {t:'n', v:fuzzynum(v)};
else if(!isNaN(fuzzydate(v).getDate())) {
o = ({t:'d', v:parseDate(v)});