2017-05-17 04:23:36 +00:00
/* note: browser DOM element cannot see mso- style attrs, must parse */
2022-03-20 01:54:41 +00:00
function html _to _sheet ( str /*:string*/ , _opts ) /*:Workbook*/ {
var opts = _opts || { } ;
2022-10-24 01:05:59 +00:00
var dense = ( opts . dense != null ) ? opts . dense : DENSE ;
var ws /*:Worksheet*/ = ( { } /*:any*/ ) ; if ( dense ) ws [ "!data" ] = [ ] ;
2024-02-02 06:52:14 +00:00
str = str _remove _ng ( str , "<!--" , "-->" ) ;
2022-03-20 01:54:41 +00:00
var mtch /*:any*/ = str . match ( /<table/i ) ;
if ( ! mtch ) throw new Error ( "Invalid HTML: could not find <table>" ) ;
var mtch2 /*:any*/ = str . match ( /<\/table/i ) ;
var i /*:number*/ = mtch . index , j /*:number*/ = mtch2 && mtch2 . index || str . length ;
2024-04-05 01:20:28 +00:00
var rows = split _regex ( str . slice ( i , j ) , /(:?<tr[^<>]*>)/i , "<tr>" ) ;
2022-03-20 01:54:41 +00:00
var R = - 1 , C = 0 , RS = 0 , CS = 0 ;
var range /*:Range*/ = { s : { r : 10000000 , c : 10000000 } , e : { r : 0 , c : 0 } } ;
var merges /*:Array<Range>*/ = [ ] ;
for ( i = 0 ; i < rows . length ; ++ i ) {
var row = rows [ i ] . trim ( ) ;
var hd = row . slice ( 0 , 3 ) . toLowerCase ( ) ;
if ( hd == "<tr" ) { ++ R ; if ( opts . sheetRows && opts . sheetRows <= R ) { -- R ; break ; } C = 0 ; continue ; }
if ( hd != "<td" && hd != "<th" ) continue ;
var cells = row . split ( /<\/t[dh]>/i ) ;
for ( j = 0 ; j < cells . length ; ++ j ) {
var cell = cells [ j ] . trim ( ) ;
if ( ! cell . match ( /<t[dh]/i ) ) continue ;
var m = cell , cc = 0 ;
/* TODO: parse styles etc */
while ( m . charAt ( 0 ) == "<" && ( cc = m . indexOf ( ">" ) ) > - 1 ) m = m . slice ( cc + 1 ) ;
for ( var midx = 0 ; midx < merges . length ; ++ midx ) {
var _merge /*:Range*/ = merges [ midx ] ;
if ( _merge . s . c == C && _merge . s . r < R && R <= _merge . e . r ) { C = _merge . e . c + 1 ; midx = - 1 ; }
2017-04-16 04:32:13 +00:00
}
2022-03-20 01:54:41 +00:00
var tag = parsexmltag ( cell . slice ( 0 , cell . indexOf ( ">" ) ) ) ;
CS = tag . colspan ? + tag . colspan : 1 ;
if ( ( RS = + tag . rowspan ) > 1 || CS > 1 ) merges . push ( { s : { r : R , c : C } , e : { r : R + ( RS || 1 ) - 1 , c : C + CS - 1 } } ) ;
var _t /*:string*/ = tag . t || tag [ "data-t" ] || "" ;
/* TODO: generate stub cells */
if ( ! m . length ) { C += CS ; continue ; }
m = htmldecode ( m ) ;
if ( range . s . r > R ) range . s . r = R ; if ( range . e . r < R ) range . e . r = R ;
if ( range . s . c > C ) range . s . c = C ; if ( range . e . c < C ) range . e . c = C ;
if ( ! m . length ) { C += CS ; continue ; }
var o /*:Cell*/ = { t : 's' , v : m } ;
if ( opts . raw || ! m . trim ( ) . length || _t == 's' ) { }
else if ( m === 'TRUE' ) o = { t : 'b' , v : true } ;
else if ( m === 'FALSE' ) o = { t : 'b' , v : false } ;
else if ( ! isNaN ( fuzzynum ( m ) ) ) o = { t : 'n' , v : fuzzynum ( m ) } ;
else if ( ! isNaN ( fuzzydate ( m ) . getDate ( ) ) ) {
o = ( { t : 'd' , v : parseDate ( m ) } /*:any*/ ) ;
2023-06-23 09:48:47 +00:00
if ( opts . UTC === false ) o . v = utc _to _local ( o . v ) ;
2022-03-20 01:54:41 +00:00
if ( ! opts . cellDates ) o = ( { t : 'n' , v : datenum ( o . v ) } /*:any*/ ) ;
o . z = opts . dateNF || table _fmt [ 14 ] ;
2024-07-04 19:54:34 +00:00
} else if ( m . charCodeAt ( 0 ) == 35 /* # */ && RBErr [ m ] != null ) {
o . t = 'e' ; o . w = m ; o . v = RBErr [ m ] ;
2021-09-18 21:27:42 +00:00
}
2023-06-23 09:48:47 +00:00
if ( o . cellText !== false ) o . w = m ;
2022-10-24 01:05:59 +00:00
if ( dense ) { if ( ! ws [ "!data" ] [ R ] ) ws [ "!data" ] [ R ] = [ ] ; ws [ "!data" ] [ R ] [ C ] = o ; }
2022-03-20 01:54:41 +00:00
else ws [ encode _cell ( { r : R , c : C } ) ] = o ;
C += CS ;
2017-04-16 04:32:13 +00:00
}
2017-04-16 07:31:21 +00:00
}
2022-03-20 01:54:41 +00:00
ws [ '!ref' ] = encode _range ( range ) ;
if ( merges . length ) ws [ "!merges" ] = merges ;
return ws ;
}
function make _html _row ( ws /*:Worksheet*/ , r /*:Range*/ , R /*:number*/ , o /*:Sheet2HTMLOpts*/ ) /*:string*/ {
var M /*:Array<Range>*/ = ( ws [ '!merges' ] || [ ] ) ;
var oo /*:Array<string>*/ = [ ] ;
2022-05-25 01:45:55 +00:00
var sp = ( { } /*:any*/ ) ;
2022-10-24 01:05:59 +00:00
var dense = ws [ "!data" ] != null ;
2022-03-20 01:54:41 +00:00
for ( var C = r . s . c ; C <= r . e . c ; ++ C ) {
var RS = 0 , CS = 0 ;
for ( var j = 0 ; j < M . length ; ++ j ) {
if ( M [ j ] . s . r > R || M [ j ] . s . c > C ) continue ;
if ( M [ j ] . e . r < R || M [ j ] . e . c < C ) continue ;
if ( M [ j ] . s . r < R || M [ j ] . s . c < C ) { RS = - 1 ; break ; }
RS = M [ j ] . e . r - M [ j ] . s . r + 1 ; CS = M [ j ] . e . c - M [ j ] . s . c + 1 ; break ;
}
if ( RS < 0 ) continue ;
2022-10-24 01:05:59 +00:00
var coord = encode _col ( C ) + encode _row ( R ) ;
var cell = dense ? ( ws [ "!data" ] [ R ] || [ ] ) [ C ] : ws [ coord ] ;
2024-07-04 19:54:34 +00:00
if ( cell && cell . t == 'n' && cell . v != null && ! isFinite ( cell . v ) ) {
if ( isNaN ( cell . v ) ) cell = ( { t : 'e' , v : 0x24 , w : BErr [ 0x24 ] } ) ;
else cell = ( { t : 'e' , v : 0x07 , w : BErr [ 0x07 ] } ) ;
}
2022-03-20 01:54:41 +00:00
/* TODO: html entities */
var w = ( cell && cell . v != null ) && ( cell . h || escapehtml ( cell . w || ( format _cell ( cell ) , cell . w ) || "" ) ) || "" ;
2022-05-25 01:45:55 +00:00
sp = ( { } /*:any*/ ) ;
2022-03-20 01:54:41 +00:00
if ( RS > 1 ) sp . rowspan = RS ;
if ( CS > 1 ) sp . colspan = CS ;
if ( o . editable ) w = '<span contenteditable="true">' + w + '</span>' ;
else if ( cell ) {
sp [ "data-t" ] = cell && cell . t || 'z' ;
2023-06-23 09:48:47 +00:00
// note: data-v is unaffected by the timezone interpretation
2024-03-22 04:39:09 +00:00
if ( cell . v != null ) sp [ "data-v" ] = escapehtml ( cell . v instanceof Date ? cell . v . toISOString ( ) : cell . v ) ;
2022-03-20 01:54:41 +00:00
if ( cell . z != null ) sp [ "data-z" ] = cell . z ;
2024-08-19 16:43:37 +00:00
if ( cell . f != null ) sp [ "data-f" ] = escapehtml ( cell . f ) ;
2023-04-14 07:51:02 +00:00
if ( cell . l && ( cell . l . Target || "#" ) . charAt ( 0 ) != "#" ) w = '<a href="' + escapehtml ( cell . l . Target ) + '">' + w + '</a>' ;
2022-03-20 01:54:41 +00:00
}
sp . id = ( o . id || "sjs" ) + "-" + coord ;
oo . push ( writextag ( 'td' , w , sp ) ) ;
2017-06-10 01:47:42 +00:00
}
2022-03-20 01:54:41 +00:00
var preamble = "<tr>" ;
return preamble + oo . join ( "" ) + "</tr>" ;
}
2017-04-16 04:32:13 +00:00
2022-03-20 01:54:41 +00:00
var HTML _BEGIN = '<html><head><meta charset="utf-8"/><title>SheetJS Table Export</title></head><body>' ;
var HTML _END = '</body></html>' ;
function html _to _workbook ( str /*:string*/ , opts ) /*:Workbook*/ {
2024-04-05 01:20:28 +00:00
var mtch = str _match _xml _ig ( str , "table" ) ;
2022-03-20 01:54:41 +00:00
if ( ! mtch || mtch . length == 0 ) throw new Error ( "Invalid HTML: could not find <table>" ) ;
2022-07-08 22:31:08 +00:00
if ( mtch . length == 1 ) {
var w = sheet _to _workbook ( html _to _sheet ( mtch [ 0 ] , opts ) , opts ) ;
w . bookType = "html" ;
return w ;
}
2022-03-20 01:54:41 +00:00
var wb = book _new ( ) ;
mtch . forEach ( function ( s , idx ) { book _append _sheet ( wb , html _to _sheet ( s , opts ) , "Sheet" + ( idx + 1 ) ) ; } ) ;
2022-07-08 22:31:08 +00:00
wb . bookType = "html" ;
2022-03-20 01:54:41 +00:00
return wb ;
}
function make _html _preamble ( ws /*:Worksheet*/ , R /*:Range*/ , o /*:Sheet2HTMLOpts*/ ) /*:string*/ {
var out /*:Array<string>*/ = [ ] ;
return out . join ( "" ) + '<table' + ( o && o . id ? ' id="' + o . id + '"' : "" ) + '>' ;
}
function sheet _to _html ( ws /*:Worksheet*/ , opts /*:?Sheet2HTMLOpts*/ /*, wb:?Workbook*/ ) /*:string*/ {
var o = opts || { } ;
var header = o . header != null ? o . header : HTML _BEGIN ;
var footer = o . footer != null ? o . footer : HTML _END ;
var out /*:Array<string>*/ = [ header ] ;
2023-10-17 08:53:00 +00:00
var r = decode _range ( ws [ '!ref' ] || "A1" ) ;
2022-03-20 01:54:41 +00:00
out . push ( make _html _preamble ( ws , r , o ) ) ;
2023-10-17 08:53:00 +00:00
if ( ws [ "!ref" ] ) for ( var R = r . s . r ; R <= r . e . r ; ++ R ) out . push ( make _html _row ( ws , r , R , o ) ) ;
2022-03-20 01:54:41 +00:00
out . push ( "</table>" + footer ) ;
return out . join ( "" ) ;
}
2017-03-29 19:14:15 +00:00
2020-07-31 03:38:44 +00:00
function sheet _add _dom ( ws /*:Worksheet*/ , table /*:HTMLElement*/ , _opts /*:?any*/ ) /*:Worksheet*/ {
2022-05-25 01:45:55 +00:00
var rows /*:HTMLCollection<HTMLTableRowElement>*/ = table . rows ;
if ( ! rows ) {
/* not an HTML TABLE */
throw "Unsupported origin when " + table . tagName + " is not a TABLE" ;
}
2017-04-08 06:55:35 +00:00
var opts = _opts || { } ;
2022-10-24 01:05:59 +00:00
var dense = ws [ "!data" ] != null ;
2020-07-31 03:38:44 +00:00
var or _R = 0 , or _C = 0 ;
if ( opts . origin != null ) {
if ( typeof opts . origin == 'number' ) or _R = opts . origin ;
else {
var _origin /*:CellAddress*/ = typeof opts . origin == "string" ? decode _cell ( opts . origin ) : opts . origin ;
or _R = _origin . r ; or _C = _origin . c ;
}
}
2022-02-08 09:50:51 +00:00
2020-07-31 03:38:44 +00:00
var sheetRows = Math . min ( opts . sheetRows || 10000000 , rows . length ) ;
var range /*:Range*/ = { s : { r : 0 , c : 0 } , e : { r : or _R , c : or _C } } ;
if ( ws [ "!ref" ] ) {
var _range /*:Range*/ = decode _range ( ws [ "!ref" ] ) ;
range . s . r = Math . min ( range . s . r , _range . s . r ) ;
range . s . c = Math . min ( range . s . c , _range . s . c ) ;
range . e . r = Math . max ( range . e . r , _range . e . r ) ;
range . e . c = Math . max ( range . e . c , _range . e . c ) ;
if ( or _R == - 1 ) range . e . r = or _R = _range . e . r + 1 ;
}
2017-12-30 05:40:35 +00:00
var merges /*:Array<Range>*/ = [ ] , midx = 0 ;
2020-07-31 03:38:44 +00:00
var rowinfo /*:Array<RowInfo>*/ = ws [ "!rows" ] || ( ws [ "!rows" ] = [ ] ) ;
2019-11-01 03:09:14 +00:00
var _R = 0 , R = 0 , _C = 0 , C = 0 , RS = 0 , CS = 0 ;
2020-07-31 03:38:44 +00:00
if ( ! ws [ "!cols" ] ) ws [ '!cols' ] = [ ] ;
2018-05-27 07:27:47 +00:00
for ( ; _R < rows . length && R < sheetRows ; ++ _R ) {
var row /*:HTMLTableRowElement*/ = rows [ _R ] ;
if ( is _dom _element _hidden ( row ) ) {
if ( opts . display ) continue ;
rowinfo [ R ] = { hidden : true } ;
}
2022-05-25 01:45:55 +00:00
var elts /*:HTMLCollection<HTMLTableCellElement>*/ = ( row . cells ) ;
2017-03-29 19:14:15 +00:00
for ( _C = C = 0 ; _C < elts . length ; ++ _C ) {
2018-05-27 07:27:47 +00:00
var elt /*:HTMLTableCellElement*/ = elts [ _C ] ;
if ( opts . display && is _dom _element _hidden ( elt ) ) continue ;
2021-09-18 21:27:42 +00:00
var v /*:?string*/ = elt . hasAttribute ( 'data-v' ) ? elt . getAttribute ( 'data-v' ) : elt . hasAttribute ( 'v' ) ? elt . getAttribute ( 'v' ) : htmldecode ( elt . innerHTML ) ;
var z /*:?string*/ = elt . getAttribute ( 'data-z' ) || elt . getAttribute ( 'z' ) ;
2024-08-19 16:43:37 +00:00
var f /*:?string*/ = elt . hasAttribute ( 'data-f' ) ? elt . getAttribute ( 'data-f' ) : elt . hasAttribute ( 'f' ) ? elt . getAttribute ( 'f' ) : null ;
2017-03-29 19:14:15 +00:00
for ( midx = 0 ; midx < merges . length ; ++ midx ) {
2017-12-30 05:40:35 +00:00
var m /*:Range*/ = merges [ midx ] ;
2020-07-31 03:38:44 +00:00
if ( m . s . c == C + or _C && m . s . r < R + or _R && R + or _R <= m . e . r ) { C = m . e . c + 1 - or _C ; midx = - 1 ; }
2017-03-29 19:14:15 +00:00
}
/* TODO: figure out how to extract nonstandard mso- style */
CS = + elt . getAttribute ( "colspan" ) || 1 ;
2020-07-31 03:38:44 +00:00
if ( ( ( RS = ( + elt . getAttribute ( "rowspan" ) || 1 ) ) ) > 1 || CS > 1 ) merges . push ( { s : { r : R + or _R , c : C + or _C } , e : { r : R + or _R + ( RS || 1 ) - 1 , c : C + or _C + ( CS || 1 ) - 1 } } ) ;
2017-05-17 04:23:36 +00:00
var o /*:Cell*/ = { t : 's' , v : v } ;
2021-09-18 21:27:42 +00:00
var _t /*:string*/ = elt . getAttribute ( "data-t" ) || elt . getAttribute ( "t" ) || "" ;
2017-08-18 18:10:18 +00:00
if ( v != null ) {
2017-12-12 06:21:28 +00:00
if ( v . length == 0 ) o . t = _t || 'z' ;
else if ( opts . raw || v . trim ( ) . length == 0 || _t == "s" ) { }
2024-07-04 19:54:34 +00:00
else if ( _t == "e" && BErr [ + v ] ) o = { t : 'e' , v : + v , w : BErr [ + v ] } ;
2017-08-18 18:10:18 +00:00
else if ( v === 'TRUE' ) o = { t : 'b' , v : true } ;
else if ( v === 'FALSE' ) o = { t : 'b' , v : false } ;
2017-08-09 22:38:23 +00:00
else if ( ! isNaN ( fuzzynum ( v ) ) ) o = { t : 'n' , v : fuzzynum ( v ) } ;
2017-05-17 04:23:36 +00:00
else if ( ! isNaN ( fuzzydate ( v ) . getDate ( ) ) ) {
o = ( { t : 'd' , v : parseDate ( v ) } /*:any*/ ) ;
2023-06-23 09:48:47 +00:00
if ( opts . UTC ) o . v = local _to _utc ( o . v ) ;
2017-05-17 04:23:36 +00:00
if ( ! opts . cellDates ) o = ( { t : 'n' , v : datenum ( o . v ) } /*:any*/ ) ;
2022-03-20 01:54:41 +00:00
o . z = opts . dateNF || table _fmt [ 14 ] ;
2024-07-04 19:54:34 +00:00
} else if ( v . charCodeAt ( 0 ) == 35 /* # */ && RBErr [ v ] != null ) o = ( { t : 'e' , v : RBErr [ v ] , w : v } ) ;
2017-05-17 04:23:36 +00:00
}
2020-04-26 07:02:58 +00:00
if ( o . z === undefined && z != null ) o . z = z ;
2021-09-18 21:27:42 +00:00
/ * T h e f i r s t l i n k i s u s e d . L i n k s a r e a s s u m e d t o b e f u l l y s p e c i f i e d .
* TODO : The right way to process relative links is to make a new < a > * /
var l = "" , Aelts = elt . getElementsByTagName ( "A" ) ;
if ( Aelts && Aelts . length ) for ( var Aelti = 0 ; Aelti < Aelts . length ; ++ Aelti ) if ( Aelts [ Aelti ] . hasAttribute ( "href" ) ) {
l = Aelts [ Aelti ] . getAttribute ( "href" ) ; if ( l . charAt ( 0 ) != "#" ) break ;
}
2022-04-30 11:23:48 +00:00
if ( l && l . charAt ( 0 ) != "#" && l . slice ( 0 , 11 ) . toLowerCase ( ) != 'javascript:' ) o . l = ( { Target : l } ) ;
2024-08-19 16:43:37 +00:00
if ( f != null ) o . f = f ;
2022-10-24 01:05:59 +00:00
if ( dense ) { if ( ! ws [ "!data" ] [ R + or _R ] ) ws [ "!data" ] [ R + or _R ] = [ ] ; ws [ "!data" ] [ R + or _R ] [ C + or _C ] = o ; }
2020-07-31 03:38:44 +00:00
else ws [ encode _cell ( { c : C + or _C , r : R + or _R } ) ] = o ;
if ( range . e . c < C + or _C ) range . e . c = C + or _C ;
2017-03-29 19:14:15 +00:00
C += CS ;
}
2018-05-27 07:27:47 +00:00
++ R ;
2017-03-29 19:14:15 +00:00
}
2020-07-31 03:38:44 +00:00
if ( merges . length ) ws [ '!merges' ] = ( ws [ "!merges" ] || [ ] ) . concat ( merges ) ;
range . e . r = Math . max ( range . e . r , R - 1 + or _R ) ;
2017-04-01 07:32:12 +00:00
ws [ '!ref' ] = encode _range ( range ) ;
2020-07-31 03:38:44 +00:00
if ( R >= sheetRows ) ws [ '!fullref' ] = encode _range ( ( range . e . r = rows . length - _R + R - 1 + or _R , range ) ) ; // We can count the real number of rows to parse but we don't to improve the performance
2017-03-29 19:14:15 +00:00
return ws ;
}
2020-07-31 03:38:44 +00:00
function parse _dom _table ( table /*:HTMLElement*/ , _opts /*:?any*/ ) /*:Worksheet*/ {
var opts = _opts || { } ;
2022-10-24 01:05:59 +00:00
var ws /*:Worksheet*/ = ( { } /*:any*/ ) ; if ( opts . dense ) ws [ "!data" ] = [ ] ;
2020-07-31 03:38:44 +00:00
return sheet _add _dom ( ws , table , _opts ) ;
}
2017-03-29 19:14:15 +00:00
function table _to _book ( table /*:HTMLElement*/ , opts /*:?any*/ ) /*:Workbook*/ {
2022-07-08 22:31:08 +00:00
var o = sheet _to _workbook ( parse _dom _table ( table , opts ) , opts ) ;
//o.bookType = "dom"; // TODO: define a type for this
return o ;
2017-03-29 19:14:15 +00:00
}
2018-05-27 07:27:47 +00:00
function is _dom _element _hidden ( element /*:HTMLElement*/ ) /*:boolean*/ {
var display /*:string*/ = '' ;
var get _computed _style /*:?function*/ = get _get _computed _style _function ( element ) ;
if ( get _computed _style ) display = get _computed _style ( element ) . getPropertyValue ( 'display' ) ;
2022-02-08 09:50:51 +00:00
if ( ! display ) display = element . style && element . style . display ;
2018-05-27 07:27:47 +00:00
return display === 'none' ;
}
/* global getComputedStyle */
function get _get _computed _style _function ( element /*:HTMLElement*/ ) /*:?function*/ {
// The proper getComputedStyle implementation is the one defined in the element window
if ( element . ownerDocument . defaultView && typeof element . ownerDocument . defaultView . getComputedStyle === 'function' ) return element . ownerDocument . defaultView . getComputedStyle ;
// If it is not available, try to get one from the global namespace
if ( typeof getComputedStyle === 'function' ) return getComputedStyle ;
return null ;
2022-05-25 01:45:55 +00:00
}