version bump 0.11.13: codepage

- binary CSV `codepage` read option (fixes #907 h/t @popovserhii)
- BIFF2-5 `codepage` read option (fixes #912 h/t @makcbrain)
- `xlsx` utility `--codepage` override option
- HTML support some common entities (fixes #914 h/t @razvanioan)
This commit is contained in:
SheetJS 2017-12-09 02:17:25 -05:00
parent eff7d153e8
commit c9cab8078c
26 changed files with 189 additions and 83 deletions

5
.gitattributes vendored
View File

@ -3,3 +3,8 @@ bits/*.js text eol=lf
test.js text eol=lf
xlsx*.js text eol=lf
*.flow.js text eol=lf
docbits/* linguist-documentation
dist/* linguist-generated=true
xlsx.js linguist-generated=true
xlsxworker.js linguist-generated=true

View File

@ -92,7 +92,6 @@ nexe: xlsx.exe ## Build nexe standalone executable
xlsx.exe: bin/xlsx.njs xlsx.js
tail -n+2 $< | sed 's#\.\./#./xlsx#g' > nexe.js
nexe -i nexe.js -o $@
head nexe.js
rm nexe.js
.PHONY: pkg

View File

@ -1493,6 +1493,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | 1252 | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
@ -1526,6 +1527,8 @@ The exported `read` and `readFile` functions accept an options argument:
XLSM and XLSB store the VBA CFB object in `xl/vbaProject.bin`. BIFF8 XLS mixes
the VBA entries alongside the core Workbook entry, so the library generates a
new XLSB-compatible blob from the XLS CFB container.
- `codepage` is applied to BIFF2 - BIFF5 files without `CodePage` records and to
CSV files without BOM in `type:"binary"`. BIFF8 XLS always defaults to 1200.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read

View File

@ -44,6 +44,8 @@ program
.option('-F, --field-sep <sep>', 'CSV field separator', ",")
.option('-R, --row-sep <sep>', 'CSV row separator', "\n")
.option('-n, --sheet-rows <num>', 'Number of rows to process (0=all rows)')
.option('--codepage <cp>', 'default to specified codepage when ambiguous')
.option('--req <module>', 'require module before processing')
.option('--sst', 'generate shared string table for XLS* formats')
.option('--compress', 'use compression when writing XLSX/M/B and ODS')
.option('--read', 'read but do not generate output')
@ -91,6 +93,10 @@ if(!fs.existsSync(filename)) {
process.exit(2);
}
if(program.req) program.req.split(",").forEach(function(r) {
require((fs.existsSync(r) || fs.existsSync(r + '.js')) ? require('path').resolve(r) : r);
});
var opts = {}, wb/*:?Workbook*/;
if(program.listSheets) opts.bookSheets = true;
if(program.sheetRows) opts.sheetRows = program.sheetRows;
@ -128,6 +134,7 @@ if(program.all) {
wopts.bookVBA = true;
}
if(program.sparse) opts.dense = false; else opts.dense = true;
if(program.codepage) opts.codepage = +program.codepage;
if(program.dev) {
opts.WTF = true;

View File

@ -1 +1 @@
XLSX.version = '0.11.12';
XLSX.version = '0.11.13';

View File

@ -163,9 +163,17 @@ var matchtag = (function() {
};
})();
function htmldecode(str/*:string*/)/*:string*/ {
return str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"").replace(/&nbsp;/g, " ");
}
var htmldecode = (function() {
var entities = [
['nbsp', ' '], ['middot', '路'],
['quot', '"'], ['apos', "'"], ['gt', '>'], ['lt', '<'], ['amp', '&']
].map(function(x) { return [new RegExp('&' + x[0] + ';', "g"), x[1]]; });
return function htmldecode(str/*:string*/)/*:string*/ {
var o = str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"");
for(var i = 0; i < entities.length; ++i) o = o.replace(entities[i][0], entities[i][1]);
return o;
};
})();
var vtregex = (function(){ var vt_cache = {};
return function vt_regex(bt) {

View File

@ -828,6 +828,7 @@ var PRN = (function() {
default: throw new Error("Unrecognized type " + opts.type);
}
if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
else if((opts.type == 'binary' || opts.type == 'buffer') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str));
if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts);
return prn_to_sheet_str(str, opts);
}

View File

@ -200,6 +200,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
/* explicit override for some broken writers */
opts.codepage = 1200;
set_cp(1200);
var seen_codepage = false;
while(blob.l < blob.length - 1) {
var s = blob.l;
var RecordType = blob.read_shift(2);
@ -247,8 +248,8 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 0x8000: val = 10000; break;
case 0x8001: val = 1252; break;
}
opts.codepage = val;
set_cp(val);
set_cp(opts.codepage = val);
seen_codepage = true;
break;
case 'RRTabId': opts.rrtabid = val; break;
case 'WinProtect': opts.winlocked = val; break;
@ -346,6 +347,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
cell_valid = true;
out = ((options.dense ? [] : {})/*:any*/);
if(opts.biff < 8 && !seen_codepage) { seen_codepage = true; set_cp(opts.codepage = options.codepage || 1252); }
if(opts.biff < 5) {
if(cur_sheet === "") cur_sheet = "Sheet1";
range = {s:{r:0,c:0},e:{r:0,c:0}};

View File

@ -12,6 +12,22 @@ $ npm install -g phantomjs
$ phantomjs phantomjs.js
```
## Chrome Automation
This was tested in puppeteer 0.9.0 (Chromium revision 494755) and `chromeless`:
```bash
$ npm install puppeteer
$ node puppeteer.js
$ npm install -g chromeless
$ node chromeless.js
```
Since the main process is node, the read and write features should be placed in
the webpage. The `dist` versions are suitable for web pages.
## wkhtmltopdf
This was tested in wkhtmltopdf 0.12.4, installed using the official binaries:
@ -20,18 +36,6 @@ This was tested in wkhtmltopdf 0.12.4, installed using the official binaries:
$ wkhtmltopdf --javascript-delay 20000 http://oss.sheetjs.com/js-xlsx/tests/ test.pdf
```
## Puppeteer
This was tested in puppeteer 0.9.0 and Chromium revision 494755:
```bash
$ npm install puppeteer
$ node puppeteer.js
```
Since the main process is node, the read and write features should be placed in
the webpage. The `dist` versions are suitable for web pages.
## SlimerJS
This was tested in SlimerJS 0.10.3 and FF 52.0, installed using `brew` on OSX:

View File

@ -0,0 +1,9 @@
const { Chromeless } = require('chromeless');
const TEST = 'http://localhost:8000', TIME = 30 * 1000;
(async() => {
const browser = new Chromeless();
const pth = await browser.goto(TEST).wait(TIME).screenshot();
console.log(pth);
await browser.end();
})().catch(e=>{ console.error(e); });

26
dist/xlsx.core.min.js generated vendored

File diff suppressed because one or more lines are too long

2
dist/xlsx.core.min.map generated vendored

File diff suppressed because one or more lines are too long

24
dist/xlsx.full.min.js generated vendored

File diff suppressed because one or more lines are too long

2
dist/xlsx.full.min.map generated vendored

File diff suppressed because one or more lines are too long

23
dist/xlsx.js generated vendored
View File

@ -4,7 +4,7 @@
/*global global, exports, module, require:false, process:false, Buffer:false */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.11.12';
XLSX.version = '0.11.13';
var current_codepage = 1200;
/*global cptable:true */
if(typeof module !== "undefined" && typeof require !== 'undefined') {
@ -2113,9 +2113,17 @@ var matchtag = (function() {
};
})();
function htmldecode(str) {
return str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"").replace(/&nbsp;/g, " ");
}
var htmldecode = (function() {
var entities = [
['nbsp', ' '], ['middot', '路'],
['quot', '"'], ['apos', "'"], ['gt', '>'], ['lt', '<'], ['amp', '&']
].map(function(x) { return [new RegExp('&' + x[0] + ';', "g"), x[1]]; });
return function htmldecode(str) {
var o = str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"");
for(var i = 0; i < entities.length; ++i) o = o.replace(entities[i][0], entities[i][1]);
return o;
};
})();
var vtregex = (function(){ var vt_cache = {};
return function vt_regex(bt) {
@ -6248,6 +6256,7 @@ var PRN = (function() {
default: throw new Error("Unrecognized type " + opts.type);
}
if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
else if((opts.type == 'binary' || opts.type == 'buffer') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str));
if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts);
return prn_to_sheet_str(str, opts);
}
@ -15029,6 +15038,7 @@ function parse_workbook(blob, options) {
/* explicit override for some broken writers */
opts.codepage = 1200;
set_cp(1200);
var seen_codepage = false;
while(blob.l < blob.length - 1) {
var s = blob.l;
var RecordType = blob.read_shift(2);
@ -15075,8 +15085,8 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
case 0x8000: val = 10000; break;
case 0x8001: val = 1252; break;
}
opts.codepage = val;
set_cp(val);
set_cp(opts.codepage = val);
seen_codepage = true;
break;
case 'RRTabId': opts.rrtabid = val; break;
case 'WinProtect': opts.winlocked = val; break;
@ -15174,6 +15184,7 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
cell_valid = true;
out = ((options.dense ? [] : {}));
if(opts.biff < 8 && !seen_codepage) { seen_codepage = true; set_cp(opts.codepage = options.codepage || 1252); }
if(opts.biff < 5) {
if(cur_sheet === "") cur_sheet = "Sheet1";
range = {s:{r:0,c:0},e:{r:0,c:0}};

24
dist/xlsx.min.js generated vendored

File diff suppressed because one or more lines are too long

2
dist/xlsx.min.map generated vendored

File diff suppressed because one or more lines are too long

View File

@ -6,6 +6,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
@ -39,6 +40,8 @@ The exported `read` and `readFile` functions accept an options argument:
XLSM and XLSB store the VBA CFB object in `xl/vbaProject.bin`. BIFF8 XLS mixes
the VBA entries alongside the core Workbook entry, so the library generates a
new XLSB-compatible blob from the XLS CFB container.
- `codepage` is applied to BIFF2 - BIFF5 files without `CodePage` records and to
CSV files without BOM in `type:"binary"`. BIFF8 XLS always defaults to 1200.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read

View File

@ -30,6 +30,7 @@ digraph G {
wk3 [label="WK3/4"];
wqb [label="WQ*\nWB*"];
qpw [label="QPW"];
eth [label="ETH"];
}
subgraph WORKBOOK {
@ -75,5 +76,7 @@ digraph G {
csf -> dbf
html -> csf
csf -> html
csf -> eth
eth -> csf
}
}

Binary file not shown.

Before

Width:  |  Height:  |  Size: 184 KiB

After

Width:  |  Height:  |  Size: 196 KiB

View File

@ -1364,6 +1364,7 @@ The exported `read` and `readFile` functions accept an options argument:
| :---------- | ------: | :--------------------------------------------------- |
|`type` | | Input data encoding (see Input Type below) |
|`raw` | false | If true, plain text parsing will not parse values ** |
|`codepage` | 1252 | If specified, use code page when appropriate ** |
|`cellFormula`| true | Save formulae to the .f field |
|`cellHTML` | true | Parse rich text and save HTML to the `.h` field |
|`cellNF` | false | Save number format string to the `.z` field |
@ -1397,6 +1398,8 @@ The exported `read` and `readFile` functions accept an options argument:
XLSM and XLSB store the VBA CFB object in `xl/vbaProject.bin`. BIFF8 XLS mixes
the VBA entries alongside the core Workbook entry, so the library generates a
new XLSB-compatible blob from the XLS CFB container.
- `codepage` is applied to BIFF2 - BIFF5 files without `CodePage` records and to
CSV files without BOM in `type:"binary"`. BIFF8 XLS always defaults to 1200.
- Currently only XOR encryption is supported. Unsupported error will be thrown
for files employing other encryption methods.
- WTF is mainly for development. By default, the parser will suppress read

View File

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.11.12",
"version": "0.11.13",
"author": "sheetjs",
"description": "SheetJS Spreadsheet data parser and writer",
"keywords": [

17
test.js
View File

@ -31,7 +31,7 @@ if(typeof process != 'undefined' && ((process||{}).env)) {
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
}
var exp = ex.map(function(x){ return x + ".pending"; });
function test_file(x){ return ex.indexOf(x.substr(-5))>=0||exp.indexOf(x.substr(-13))>=0 || ex.indexOf(x.substr(-4))>=0||exp.indexOf(x.substr(-12))>=0; }
function test_file(x){ return ex.indexOf(x.slice(-5))>=0||exp.indexOf(x.slice(-13))>=0 || ex.indexOf(x.slice(-4))>=0||exp.indexOf(x.slice(-12))>=0; }
var files = browser ? [] : (fs.existsSync('tests.lst') ? fs.readFileSync('tests.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : fs.readdirSync('test_files')).filter(test_file);
var fileA = browser ? [] : (fs.existsSync('tests/testA.lst') ? fs.readFileSync('tests/testA.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : []).filter(test_file);
@ -1686,7 +1686,7 @@ var html_bstr = make_html_str(1), html_str = make_html_str(0);
var csv_bstr = make_csv_str(1), csv_str = make_csv_str(0);
describe('csv', function() {
describe('CSV', function() {
describe('input', function(){
var b = "1,2,3,\nTRUE,FALSE,,sheetjs\nfoo,bar,2/19/14,0.3\n,,,\nbaz,,qux,\n";
it('should generate date numbers by default', function() {
@ -1743,6 +1743,13 @@ describe('csv', function() {
assert.equal(get_cell(sheet, "C1").t, 's');
assert.equal(get_cell(sheet, "C1").v, '100');
});
if(!browser || typeof cptable !== 'undefined') it('should honor codepage for binary strings', function() {
var data = "abc,def\nghi,j\xD3l";
[[1251, ''],[1252, '脫'], [1253, '危'], [1254, '脫'], [1255, ''], [1256, '爻'], [10000, '鈥']].forEach(function(m) {
var ws = X.read(data, {type:"binary", codepage:m[0]}).Sheets.Sheet1;
assert.equal(get_cell(ws, "B2").v, "j" + m[1] + "l");
});
});
});
describe('output', function(){
var data, ws;
@ -1868,6 +1875,12 @@ describe('HTML', function() {
assert.equal(get_cell(wb.Sheets.Sheet1, "A1").v, "foo\nbar");
});
});
if(domtest) it('should handle entities', function() {
var html = "<table><tr><td>A&amp;B</td><td>A&middot;B</td></tr></table>";
var ws = X.utils.table_to_sheet(get_dom_element(html));
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A路B");
});
});
describe('js -> file -> js', function() {

View File

@ -31,7 +31,7 @@ if(typeof process != 'undefined' && ((process||{}).env)) {
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
}
var exp = ex.map(function(x){ return x + ".pending"; });
function test_file(x){ return ex.indexOf(x.substr(-5))>=0||exp.indexOf(x.substr(-13))>=0 || ex.indexOf(x.substr(-4))>=0||exp.indexOf(x.substr(-12))>=0; }
function test_file(x){ return ex.indexOf(x.slice(-5))>=0||exp.indexOf(x.slice(-13))>=0 || ex.indexOf(x.slice(-4))>=0||exp.indexOf(x.slice(-12))>=0; }
var files = browser ? [] : (fs.existsSync('tests.lst') ? fs.readFileSync('tests.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : fs.readdirSync('test_files')).filter(test_file);
var fileA = browser ? [] : (fs.existsSync('tests/testA.lst') ? fs.readFileSync('tests/testA.lst', 'utf-8').split("\n").map(function(x) { return x.trim(); }) : []).filter(test_file);
@ -623,7 +623,7 @@ describe('output formats', function() {
["csv", true, true],
["txt", true, true],
["sylk", false, true],
["eth", true, true],
["eth", false, true],
["html", true, true],
["dif", false, true],
["dbf", false, false],
@ -1686,7 +1686,7 @@ var html_bstr = make_html_str(1), html_str = make_html_str(0);
var csv_bstr = make_csv_str(1), csv_str = make_csv_str(0);
describe('csv', function() {
describe('CSV', function() {
describe('input', function(){
var b = "1,2,3,\nTRUE,FALSE,,sheetjs\nfoo,bar,2/19/14,0.3\n,,,\nbaz,,qux,\n";
it('should generate date numbers by default', function() {
@ -1743,6 +1743,13 @@ describe('csv', function() {
assert.equal(get_cell(sheet, "C1").t, 's');
assert.equal(get_cell(sheet, "C1").v, '100');
});
if(!browser || typeof cptable !== 'undefined') it('should honor codepage for binary strings', function() {
var data = "abc,def\nghi,j\xD3l";
[[1251, ''],[1252, '脫'], [1253, '危'], [1254, '脫'], [1255, ''], [1256, '爻'], [10000, '鈥']].forEach(function(m) {
var ws = X.read(data, {type:"binary", codepage:m[0]}).Sheets.Sheet1;
assert.equal(get_cell(ws, "B2").v, "j" + m[1] + "l");
});
});
});
describe('output', function(){
var data, ws;
@ -1868,6 +1875,12 @@ describe('HTML', function() {
assert.equal(get_cell(wb.Sheets.Sheet1, "A1").v, "foo\nbar");
});
});
if(domtest) it('should handle entities', function() {
var html = "<table><tr><td>A&amp;B</td><td>A&middot;B</td></tr></table>";
var ws = X.utils.table_to_sheet(get_dom_element(html));
assert.equal(get_cell(ws, "A1").v, "A&B");
assert.equal(get_cell(ws, "B1").v, "A路B");
});
});
describe('js -> file -> js', function() {

View File

@ -4,7 +4,7 @@
/*global global, exports, module, require:false, process:false, Buffer:false */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.11.12';
XLSX.version = '0.11.13';
var current_codepage = 1200;
/*:: declare var cptable:any; */
/*global cptable:true */
@ -2185,9 +2185,17 @@ var matchtag = (function() {
};
})();
function htmldecode(str/*:string*/)/*:string*/ {
return str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"").replace(/&nbsp;/g, " ");
}
var htmldecode = (function() {
var entities = [
['nbsp', ' '], ['middot', '路'],
['quot', '"'], ['apos', "'"], ['gt', '>'], ['lt', '<'], ['amp', '&']
].map(function(x) { return [new RegExp('&' + x[0] + ';', "g"), x[1]]; });
return function htmldecode(str/*:string*/)/*:string*/ {
var o = str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"");
for(var i = 0; i < entities.length; ++i) o = o.replace(entities[i][0], entities[i][1]);
return o;
};
})();
var vtregex = (function(){ var vt_cache = {};
return function vt_regex(bt) {
@ -6333,6 +6341,7 @@ var PRN = (function() {
default: throw new Error("Unrecognized type " + opts.type);
}
if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
else if((opts.type == 'binary' || opts.type == 'buffer') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str));
if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts);
return prn_to_sheet_str(str, opts);
}
@ -15128,6 +15137,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
/* explicit override for some broken writers */
opts.codepage = 1200;
set_cp(1200);
var seen_codepage = false;
while(blob.l < blob.length - 1) {
var s = blob.l;
var RecordType = blob.read_shift(2);
@ -15175,8 +15185,8 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
case 0x8000: val = 10000; break;
case 0x8001: val = 1252; break;
}
opts.codepage = val;
set_cp(val);
set_cp(opts.codepage = val);
seen_codepage = true;
break;
case 'RRTabId': opts.rrtabid = val; break;
case 'WinProtect': opts.winlocked = val; break;
@ -15274,6 +15284,7 @@ function parse_workbook(blob, options/*:ParseOpts*/)/*:Workbook*/ {
cell_valid = true;
out = ((options.dense ? [] : {})/*:any*/);
if(opts.biff < 8 && !seen_codepage) { seen_codepage = true; set_cp(opts.codepage = options.codepage || 1252); }
if(opts.biff < 5) {
if(cur_sheet === "") cur_sheet = "Sheet1";
range = {s:{r:0,c:0},e:{r:0,c:0}};

23
xlsx.js generated
View File

@ -4,7 +4,7 @@
/*global global, exports, module, require:false, process:false, Buffer:false */
var XLSX = {};
(function make_xlsx(XLSX){
XLSX.version = '0.11.12';
XLSX.version = '0.11.13';
var current_codepage = 1200;
/*global cptable:true */
if(typeof module !== "undefined" && typeof require !== 'undefined') {
@ -2113,9 +2113,17 @@ var matchtag = (function() {
};
})();
function htmldecode(str) {
return str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"").replace(/&nbsp;/g, " ");
}
var htmldecode = (function() {
var entities = [
['nbsp', ' '], ['middot', '路'],
['quot', '"'], ['apos', "'"], ['gt', '>'], ['lt', '<'], ['amp', '&']
].map(function(x) { return [new RegExp('&' + x[0] + ';', "g"), x[1]]; });
return function htmldecode(str) {
var o = str.trim().replace(/\s+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"");
for(var i = 0; i < entities.length; ++i) o = o.replace(entities[i][0], entities[i][1]);
return o;
};
})();
var vtregex = (function(){ var vt_cache = {};
return function vt_regex(bt) {
@ -6248,6 +6256,7 @@ var PRN = (function() {
default: throw new Error("Unrecognized type " + opts.type);
}
if(bytes[0] == 0xEF && bytes[1] == 0xBB && bytes[2] == 0xBF) str = utf8read(str.slice(3));
else if((opts.type == 'binary' || opts.type == 'buffer') && typeof cptable !== 'undefined' && opts.codepage) str = cptable.utils.decode(opts.codepage, cptable.utils.encode(1252,str));
if(str.slice(0,19) == "socialcalc:version:") return ETH.to_sheet(opts.type == 'string' ? str : utf8read(str), opts);
return prn_to_sheet_str(str, opts);
}
@ -15029,6 +15038,7 @@ function parse_workbook(blob, options) {
/* explicit override for some broken writers */
opts.codepage = 1200;
set_cp(1200);
var seen_codepage = false;
while(blob.l < blob.length - 1) {
var s = blob.l;
var RecordType = blob.read_shift(2);
@ -15075,8 +15085,8 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
case 0x8000: val = 10000; break;
case 0x8001: val = 1252; break;
}
opts.codepage = val;
set_cp(val);
set_cp(opts.codepage = val);
seen_codepage = true;
break;
case 'RRTabId': opts.rrtabid = val; break;
case 'WinProtect': opts.winlocked = val; break;
@ -15174,6 +15184,7 @@ wb.opts.Date1904 = Workbook.WBProps.date1904 = val; break;
cell_valid = true;
out = ((options.dense ? [] : {}));
if(opts.biff < 8 && !seen_codepage) { seen_codepage = true; set_cp(opts.codepage = options.codepage || 1252); }
if(opts.biff < 5) {
if(cur_sheet === "") cur_sheet = "Sheet1";
range = {s:{r:0,c:0},e:{r:0,c:0}};