bug: Remove white spaces due to html tags (#1622)

This commit is contained in:
sauravhiremath 2019-10-05 22:20:31 +05:30 committed by SheetJSDev
parent 4c956ff9de
commit 19ebdd9477

@ -178,7 +178,19 @@ var htmldecode/*:{(s:string):string}*/ = (function() {
['quot', '"'], ['apos', "'"], ['gt', '>'], ['lt', '<'], ['amp', '&']
].map(function(x/*:[string, string]*/) { return [new RegExp('&' + x[0] + ';', "g"), x[1]]; });
return function htmldecode(str/*:string*/)/*:string*/ {
var o = str.replace(/^[\t\n\r ]+/, "").replace(/[\t\n\r ]+$/,"").replace(/[\t\n\r ]+/g, " ").replace(/<\s*[bB][rR]\s*\/?>/g,"\n").replace(/<[^>]*>/g,"");
var o = str
// Remove new lines and spaces from start of content
.replace(/^[\t\n\r ]+/, "")
// Remove new lines and spaces from end of content
.replace(/[\t\n\r ]+$/,"")
// Added line which removes any white space characters after and before html tags
.replace(/>\s+/g,">").replace(/\s+</g,"<")
// Replace remaining new lines and spaces with space
.replace(/[\t\n\r ]+/g, " ")
// Replace <br> tags with new lines
.replace(/<\s*[bB][rR]\s*\/?>/g,"\n")
// Strip HTML elements
.replace(/<[^>]*>/g,"");
for(var i = 0; i < entities.length; ++i) o = o.replace(entities[i][0], entities[i][1]);
return o;
};