2017-05-17 04:23:36 +00:00
/* note: browser DOM element cannot see mso- style attrs, must parse */
2017-04-16 04:32:13 +00:00
var HTML _ = ( function ( ) {
function html _to _sheet ( str /*:string*/ , _opts ) /*:Workbook*/ {
var opts = _opts || { } ;
if ( DENSE != null && opts . dense == null ) opts . dense = DENSE ;
var ws /*:Worksheet*/ = opts . dense ? ( [ ] /*:any*/ ) : ( { } /*:any*/ ) ;
2019-11-01 03:09:14 +00:00
str = str . replace ( /<!--.*?-->/g , "" ) ;
2017-08-05 06:32:57 +00:00
var mtch /*:any*/ = str . match ( /<table/i ) ;
2017-07-26 08:35:28 +00:00
if ( ! mtch ) throw new Error ( "Invalid HTML: could not find <table>" ) ;
2017-08-05 06:32:57 +00:00
var mtch2 /*:any*/ = str . match ( /<\/table/i ) ;
var i /*:number*/ = mtch . index , j /*:number*/ = mtch2 && mtch2 . index || str . length ;
2017-08-10 23:46:34 +00:00
var rows = split _regex ( str . slice ( i , j ) , /(:?<tr[^>]*>)/i , "<tr>" ) ;
2017-04-16 04:32:13 +00:00
var R = - 1 , C = 0 , RS = 0 , CS = 0 ;
2017-12-30 05:40:35 +00:00
var range /*:Range*/ = { s : { r : 10000000 , c : 10000000 } , e : { r : 0 , c : 0 } } ;
2018-01-23 09:07:51 +00:00
var merges /*:Array<Range>*/ = [ ] ;
2017-04-16 04:32:13 +00:00
for ( i = 0 ; i < rows . length ; ++ i ) {
var row = rows [ i ] . trim ( ) ;
2018-01-11 08:01:25 +00:00
var hd = row . slice ( 0 , 3 ) . toLowerCase ( ) ;
2018-04-06 06:39:48 +00:00
if ( hd == "<tr" ) { ++ R ; if ( opts . sheetRows && opts . sheetRows <= R ) { -- R ; break ; } C = 0 ; continue ; }
2018-05-05 06:34:37 +00:00
if ( hd != "<td" && hd != "<th" ) continue ;
var cells = row . split ( /<\/t[dh]>/i ) ;
2017-04-16 04:32:13 +00:00
for ( j = 0 ; j < cells . length ; ++ j ) {
var cell = cells [ j ] . trim ( ) ;
2018-05-05 06:34:37 +00:00
if ( ! cell . match ( /<t[dh]/i ) ) continue ;
2017-04-16 04:32:13 +00:00
var m = cell , cc = 0 ;
/* TODO: parse styles etc */
while ( m . charAt ( 0 ) == "<" && ( cc = m . indexOf ( ">" ) ) > - 1 ) m = m . slice ( cc + 1 ) ;
2019-08-14 20:11:16 +00:00
for ( var midx = 0 ; midx < merges . length ; ++ midx ) {
var _merge /*:Range*/ = merges [ midx ] ;
if ( _merge . s . c == C && _merge . s . r < R && R <= _merge . e . r ) { C = _merge . e . c + 1 ; midx = - 1 ; }
}
2017-04-16 04:32:13 +00:00
var tag = parsexmltag ( cell . slice ( 0 , cell . indexOf ( ">" ) ) ) ;
CS = tag . colspan ? + tag . colspan : 1 ;
2018-06-22 21:40:01 +00:00
if ( ( RS = + tag . rowspan ) > 1 || CS > 1 ) merges . push ( { s : { r : R , c : C } , e : { r : R + ( RS || 1 ) - 1 , c : C + CS - 1 } } ) ;
2017-12-12 06:21:28 +00:00
var _t /*:string*/ = tag . t || "" ;
2017-04-16 04:32:13 +00:00
/* TODO: generate stub cells */
if ( ! m . length ) { C += CS ; continue ; }
2018-07-13 19:49:22 +00:00
m = htmldecode ( m ) ;
2017-12-12 06:21:28 +00:00
if ( range . s . r > R ) range . s . r = R ; if ( range . e . r < R ) range . e . r = R ;
if ( range . s . c > C ) range . s . c = C ; if ( range . e . c < C ) range . e . c = C ;
if ( ! m . length ) continue ;
var o /*:Cell*/ = { t : 's' , v : m } ;
if ( opts . raw || ! m . trim ( ) . length || _t == 's' ) { }
else if ( m === 'TRUE' ) o = { t : 'b' , v : true } ;
else if ( m === 'FALSE' ) o = { t : 'b' , v : false } ;
else if ( ! isNaN ( fuzzynum ( m ) ) ) o = { t : 'n' , v : fuzzynum ( m ) } ;
else if ( ! isNaN ( fuzzydate ( m ) . getDate ( ) ) ) {
o = ( { t : 'd' , v : parseDate ( m ) } /*:any*/ ) ;
if ( ! opts . cellDates ) o = ( { t : 'n' , v : datenum ( o . v ) } /*:any*/ ) ;
o . z = opts . dateNF || SSF . _table [ 14 ] ;
2017-04-16 04:32:13 +00:00
}
2017-12-12 06:21:28 +00:00
if ( opts . dense ) { if ( ! ws [ R ] ) ws [ R ] = [ ] ; ws [ R ] [ C ] = o ; }
else ws [ encode _cell ( { r : R , c : C } ) ] = o ;
2017-04-16 04:32:13 +00:00
C += CS ;
2017-04-08 06:55:35 +00:00
}
2017-03-09 05:24:32 +00:00
}
2017-04-16 04:32:13 +00:00
ws [ '!ref' ] = encode _range ( range ) ;
2019-08-14 20:11:16 +00:00
if ( merges . length ) ws [ "!merges" ] = merges ;
2017-04-16 04:32:13 +00:00
return ws ;
2017-03-09 05:24:32 +00:00
}
2017-04-16 04:32:13 +00:00
function html _to _book ( str /*:string*/ , opts ) /*:Workbook*/ {
return sheet _to _workbook ( html _to _sheet ( str , opts ) , opts ) ;
}
2017-05-16 17:45:35 +00:00
function make _html _row ( ws /*:Worksheet*/ , r /*:Range*/ , R /*:number*/ , o /*:Sheet2HTMLOpts*/ ) /*:string*/ {
2017-12-30 05:40:35 +00:00
var M /*:Array<Range>*/ = ( ws [ '!merges' ] || [ ] ) ;
var oo /*:Array<string>*/ = [ ] ;
2017-04-16 07:31:21 +00:00
for ( var C = r . s . c ; C <= r . e . c ; ++ C ) {
var RS = 0 , CS = 0 ;
for ( var j = 0 ; j < M . length ; ++ j ) {
if ( M [ j ] . s . r > R || M [ j ] . s . c > C ) continue ;
if ( M [ j ] . e . r < R || M [ j ] . e . c < C ) continue ;
if ( M [ j ] . s . r < R || M [ j ] . s . c < C ) { RS = - 1 ; break ; }
RS = M [ j ] . e . r - M [ j ] . s . r + 1 ; CS = M [ j ] . e . c - M [ j ] . s . c + 1 ; break ;
2017-04-16 04:32:13 +00:00
}
2017-04-16 07:31:21 +00:00
if ( RS < 0 ) continue ;
var coord = encode _cell ( { r : R , c : C } ) ;
var cell = o . dense ? ( ws [ R ] || [ ] ) [ C ] : ws [ coord ] ;
2018-08-25 23:44:35 +00:00
/* TODO: html entities */
var w = ( cell && cell . v != null ) && ( cell . h || escapehtml ( cell . w || ( format _cell ( cell ) , cell . w ) || "" ) ) || "" ;
2019-08-14 20:11:16 +00:00
var sp = ( { } /*:any*/ ) ;
if ( RS > 1 ) sp . rowspan = RS ;
if ( CS > 1 ) sp . colspan = CS ;
2018-08-25 23:44:35 +00:00
sp . t = cell && cell . t || 'z' ;
2017-11-20 01:51:14 +00:00
if ( o . editable ) w = '<span contenteditable="true">' + w + '</span>' ;
2019-11-01 03:09:14 +00:00
sp . id = ( o . id || "sjs" ) + "-" + coord ;
2020-04-26 07:02:58 +00:00
if ( sp . t != "z" ) { sp . v = cell . v ; if ( cell . z != null ) sp . z = cell . z ; }
2017-04-16 07:31:21 +00:00
oo . push ( writextag ( 'td' , w , sp ) ) ;
2017-04-16 04:32:13 +00:00
}
2017-06-10 01:47:42 +00:00
var preamble = "<tr>" ;
return preamble + oo . join ( "" ) + "</tr>" ;
2017-04-16 07:31:21 +00:00
}
2018-02-03 20:46:32 +00:00
function make _html _preamble ( ws /*:Worksheet*/ , R /*:Range*/ , o /*:Sheet2HTMLOpts*/ ) /*:string*/ {
2017-12-30 05:40:35 +00:00
var out /*:Array<string>*/ = [ ] ;
2018-02-03 20:46:32 +00:00
return out . join ( "" ) + '<table' + ( o && o . id ? ' id="' + o . id + '"' : "" ) + '>' ;
2017-06-10 01:47:42 +00:00
}
var _BEGIN = '<html><head><meta charset="utf-8"/><title>SheetJS Table Export</title></head><body>' ;
var _END = '</body></html>' ;
2018-01-23 09:07:51 +00:00
function sheet _to _html ( ws /*:Worksheet*/ , opts /*:?Sheet2HTMLOpts*/ /*, wb:?Workbook*/ ) /*:string*/ {
2017-05-13 18:21:22 +00:00
var o = opts || { } ;
2017-06-10 01:47:42 +00:00
var header = o . header != null ? o . header : _BEGIN ;
var footer = o . footer != null ? o . footer : _END ;
var out /*:Array<string>*/ = [ header ] ;
2017-04-16 07:31:21 +00:00
var r = decode _range ( ws [ '!ref' ] ) ;
o . dense = Array . isArray ( ws ) ;
2017-06-10 01:47:42 +00:00
out . push ( make _html _preamble ( ws , r , o ) ) ;
2017-05-13 18:21:22 +00:00
for ( var R = r . s . r ; R <= r . e . r ; ++ R ) out . push ( make _html _row ( ws , r , R , o ) ) ;
2017-06-10 01:47:42 +00:00
out . push ( "</table>" + footer ) ;
return out . join ( "" ) ;
2017-04-16 04:32:13 +00:00
2020-04-26 07:02:58 +00:00
}
2017-04-16 04:32:13 +00:00
return {
to _workbook : html _to _book ,
to _sheet : html _to _sheet ,
2017-04-16 07:31:21 +00:00
_row : make _html _row ,
2017-05-16 17:45:35 +00:00
BEGIN : _BEGIN ,
END : _END ,
2017-06-10 01:47:42 +00:00
_preamble : make _html _preamble ,
2017-04-16 04:32:13 +00:00
from _sheet : sheet _to _html
} ;
} ) ( ) ;
2017-03-29 19:14:15 +00:00
2017-04-08 06:55:35 +00:00
function parse _dom _table ( table /*:HTMLElement*/ , _opts /*:?any*/ ) /*:Worksheet*/ {
var opts = _opts || { } ;
if ( DENSE != null ) opts . dense = DENSE ;
var ws /*:Worksheet*/ = opts . dense ? ( [ ] /*:any*/ ) : ( { } /*:any*/ ) ;
2017-12-30 05:40:35 +00:00
var rows /*:HTMLCollection<HTMLTableRowElement>*/ = table . getElementsByTagName ( 'tr' ) ;
2018-05-27 07:27:47 +00:00
var sheetRows = opts . sheetRows || 10000000 ;
var range /*:Range*/ = { s : { r : 0 , c : 0 } , e : { r : 0 , c : 0 } } ;
2017-12-30 05:40:35 +00:00
var merges /*:Array<Range>*/ = [ ] , midx = 0 ;
2018-05-27 07:27:47 +00:00
var rowinfo /*:Array<RowInfo>*/ = [ ] ;
2019-11-01 03:09:14 +00:00
var _R = 0 , R = 0 , _C = 0 , C = 0 , RS = 0 , CS = 0 ;
2018-05-27 07:27:47 +00:00
for ( ; _R < rows . length && R < sheetRows ; ++ _R ) {
var row /*:HTMLTableRowElement*/ = rows [ _R ] ;
if ( is _dom _element _hidden ( row ) ) {
if ( opts . display ) continue ;
rowinfo [ R ] = { hidden : true } ;
}
2017-12-30 05:40:35 +00:00
var elts /*:HTMLCollection<HTMLTableCellElement>*/ = ( row . children /*:any*/ ) ;
2017-03-29 19:14:15 +00:00
for ( _C = C = 0 ; _C < elts . length ; ++ _C ) {
2018-05-27 07:27:47 +00:00
var elt /*:HTMLTableCellElement*/ = elts [ _C ] ;
if ( opts . display && is _dom _element _hidden ( elt ) ) continue ;
2020-04-26 07:02:58 +00:00
var v /*:string*/ = elt . hasAttribute ( 'v' ) ? elt . getAttribute ( 'v' ) : htmldecode ( elt . innerHTML ) ;
var z /*:string*/ = elt . getAttribute ( 'z' ) ;
2017-03-29 19:14:15 +00:00
for ( midx = 0 ; midx < merges . length ; ++ midx ) {
2017-12-30 05:40:35 +00:00
var m /*:Range*/ = merges [ midx ] ;
2017-03-29 19:14:15 +00:00
if ( m . s . c == C && m . s . r <= R && R <= m . e . r ) { C = m . e . c + 1 ; midx = - 1 ; }
}
/* TODO: figure out how to extract nonstandard mso- style */
CS = + elt . getAttribute ( "colspan" ) || 1 ;
2017-04-16 04:32:13 +00:00
if ( ( RS = + elt . getAttribute ( "rowspan" ) ) > 0 || CS > 1 ) merges . push ( { s : { r : R , c : C } , e : { r : R + ( RS || 1 ) - 1 , c : C + CS - 1 } } ) ;
2017-05-17 04:23:36 +00:00
var o /*:Cell*/ = { t : 's' , v : v } ;
2017-12-12 06:21:28 +00:00
var _t /*:string*/ = elt . getAttribute ( "t" ) || "" ;
2017-08-18 18:10:18 +00:00
if ( v != null ) {
2017-12-12 06:21:28 +00:00
if ( v . length == 0 ) o . t = _t || 'z' ;
else if ( opts . raw || v . trim ( ) . length == 0 || _t == "s" ) { }
2017-08-18 18:10:18 +00:00
else if ( v === 'TRUE' ) o = { t : 'b' , v : true } ;
else if ( v === 'FALSE' ) o = { t : 'b' , v : false } ;
2017-08-09 22:38:23 +00:00
else if ( ! isNaN ( fuzzynum ( v ) ) ) o = { t : 'n' , v : fuzzynum ( v ) } ;
2017-05-17 04:23:36 +00:00
else if ( ! isNaN ( fuzzydate ( v ) . getDate ( ) ) ) {
o = ( { t : 'd' , v : parseDate ( v ) } /*:any*/ ) ;
if ( ! opts . cellDates ) o = ( { t : 'n' , v : datenum ( o . v ) } /*:any*/ ) ;
o . z = opts . dateNF || SSF . _table [ 14 ] ;
}
}
2020-04-26 07:02:58 +00:00
if ( o . z === undefined && z != null ) o . z = z ;
if ( opts . dense ) { if ( ! ws [ R ] ) ws [ R ] = [ ] ; ws [ R ] [ C ] = o ; }
2017-04-08 06:55:35 +00:00
else ws [ encode _cell ( { c : C , r : R } ) ] = o ;
2017-04-04 16:09:41 +00:00
if ( range . e . c < C ) range . e . c = C ;
2017-03-29 19:14:15 +00:00
C += CS ;
}
2018-05-27 07:27:47 +00:00
++ R ;
2017-03-29 19:14:15 +00:00
}
2018-05-05 06:34:37 +00:00
if ( merges . length ) ws [ '!merges' ] = merges ;
2018-05-27 07:27:47 +00:00
if ( rowinfo . length ) ws [ '!rows' ] = rowinfo ;
range . e . r = R - 1 ;
2017-04-01 07:32:12 +00:00
ws [ '!ref' ] = encode _range ( range ) ;
2018-05-27 07:27:47 +00:00
if ( R >= sheetRows ) ws [ '!fullref' ] = encode _range ( ( range . e . r = rows . length - _R + R - 1 , range ) ) ; // We can count the real number of rows to parse but we don't to improve the performance
2017-03-29 19:14:15 +00:00
return ws ;
}
function table _to _book ( table /*:HTMLElement*/ , opts /*:?any*/ ) /*:Workbook*/ {
return sheet _to _workbook ( parse _dom _table ( table , opts ) , opts ) ;
}
2018-05-27 07:27:47 +00:00
function is _dom _element _hidden ( element /*:HTMLElement*/ ) /*:boolean*/ {
var display /*:string*/ = '' ;
var get _computed _style /*:?function*/ = get _get _computed _style _function ( element ) ;
if ( get _computed _style ) display = get _computed _style ( element ) . getPropertyValue ( 'display' ) ;
if ( ! display ) display = element . style . display ; // Fallback for cases when getComputedStyle is not available (e.g. an old browser or some Node.js environments) or doesn't work (e.g. if the element is not inserted to a document)
return display === 'none' ;
}
/* global getComputedStyle */
function get _get _computed _style _function ( element /*:HTMLElement*/ ) /*:?function*/ {
// The proper getComputedStyle implementation is the one defined in the element window
if ( element . ownerDocument . defaultView && typeof element . ownerDocument . defaultView . getComputedStyle === 'function' ) return element . ownerDocument . defaultView . getComputedStyle ;
// If it is not available, try to get one from the global namespace
if ( typeof getComputedStyle === 'function' ) return getComputedStyle ;
return null ;
}