HTML string parse multiple tables [ci skip]

This commit is contained in:
reviewher 2021-11-05 20:34:54 -07:00
parent b5c697e5ed
commit 3a26260c84
2 changed files with 18 additions and 1 deletions

View File

@ -59,7 +59,12 @@ var HTML_ = (function() {
return ws;
}
function html_to_book(str/*:string*/, opts)/*:Workbook*/ {
return sheet_to_workbook(html_to_sheet(str, opts), opts);
var mtch = str.match(/<table.*?>[\s\S]*?<\/table>/gi);
if(!mtch || mtch.length == 0) throw new Error("Invalid HTML: could not find <table>");
if(mtch.length == 1) return sheet_to_workbook(html_to_sheet(mtch[0], opts), opts);
var wb = utils.book_new();
mtch.forEach(function(s, idx) { utils.book_append_sheet(wb, html_to_sheet(s, opts), "Sheet" + (idx+1)); });
return wb;
}
function make_html_row(ws/*:Worksheet*/, r/*:Range*/, R/*:number*/, o/*:Sheet2HTMLOpts*/)/*:string*/ {
var M/*:Array<Range>*/ = (ws['!merges'] ||[]);

12
test.js
View File

@ -2160,6 +2160,18 @@ describe('HTML', function() {
var wb = X.read(table, {type:"string"});
assert.equal(get_cell(wb.Sheets.Sheet1, "A1").v, "foo\nbar");
});
it.skip('should generate multi-sheet workbooks', function() {
var table = "";
for(var i = 0; i < 4; ++i) table += "<table><tr><td>" + X.utils.encode_col(i) + "</td><td>" + i + "</td></tr></table>";
table += table; table += table;
var wb = X.read(table, {type: "string"});
assert.equal(wb.SheetNames.length, 4);
assert.equal(wb.SheetNames[1], "Sheet2");
for(var j = 0; j < 4; ++j) {
assert.equal(get_cell(wb.Sheets.Sheet + (j+1), "A1"), X.utils.encode_col(j));
assert.equal(get_cell(wb.Sheets.Sheet + (j+1), "B1"), j);
}
});
});
(domtest ? describe : describe.skip)('input DOM', function() {
it('should interpret values by default', function() { plaintext_test(X.utils.table_to_book(get_dom_element(html_str)), false); });