forked from sheetjs/sheetjs
bug: Remove white spaces due to html tags (#1622)
This commit is contained in:
parent
4c956ff9de
commit
19ebdd9477
@ -178,7 +178,19 @@ var htmldecode/*:{(s:string):string}*/ = (function() {
|
||||
['quot', '"'], ['apos', "'"], ['gt', '>'], ['lt', '<'], ['amp', '&']
|
||||
].map(function(x/*:[string, string]*/) { return [new RegExp('&' + x[0] + ';', "g"), x[1]]; });
|
||||
return function htmldecode(str/*:string*/)/*:string*/ {
|
||||
var o = str.replace(/^[\t\n\r ]+/, "").replace(/[\t\n\r ]+$/,"").replace(/[\t\n\r ]+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"");
|
||||
var o = str
|
||||
// Remove new lines and spaces from start of content
|
||||
.replace(/^[\t\n\r ]+/, "")
|
||||
// Remove new lines and spaces from end of content
|
||||
.replace(/[\t\n\r ]+$/,"")
|
||||
// Added line which removes any white space characters after and before html tags
|
||||
.replace(/>\s+/g,">").replace(/\s+</g,"<")
|
||||
// Replace remaining new lines and spaces with space
|
||||
.replace(/[\t\n\r ]+/g, " ")
|
||||
// Replace <br> tags with new lines
|
||||
.replace(/<\s*[bB][rR]\s*\/?>/g,"\n")
|
||||
// Strip HTML elements
|
||||
.replace(/<[^>]*>/g,"");
|
||||
for(var i = 0; i < entities.length; ++i) o = o.replace(entities[i][0], entities[i][1]);
|
||||
return o;
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user