diff --git a/bits/22_xmlutils.js b/bits/22_xmlutils.js index 6b4c23a..d41ee98 100644 --- a/bits/22_xmlutils.js +++ b/bits/22_xmlutils.js @@ -14,12 +14,12 @@ function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1); for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break; if(j===q.length) { - //if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods + if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods z[q] = v; } else { var k = (j===5 && q.substr(0,5)==="xmlns"?"xmlns":"")+q.substr(j+1); - //if(z[k] && q.substr(j-3,3) == "ext") continue; // from ods + if(z[k] && q.substr(j-3,3) == "ext") continue; // from ods z[k] = v; } } diff --git a/bits/42_sstxml.js b/bits/42_sstxml.js index ae504fa..c1414f9 100644 --- a/bits/42_sstxml.js +++ b/bits/42_sstxml.js @@ -142,6 +142,7 @@ var parse_rs = (function parse_rs_factory() { /* 18.4.8 si CT_Rst */ var sitregex = /<(?:\w+:)?t[^>]*>([^<]*)<\/(?:\w+:)?t>/g, sirregex = /<(?:\w+:)?r>/; +var sirphregex = /<(?:\w+:)?rPh.*?>(.*?)<\/(?:\w+:)?rPh>/g; function parse_si(x, opts) { var html = opts ? opts.cellHTML : true; var z = {}; @@ -157,7 +158,7 @@ function parse_si(x, opts) { /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { z.r = utf8read(x); - z.t = utf8read(unescapexml((x.replace(/(.*?)<\/rPh>/g, '').match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = utf8read(unescapexml((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,""))); if(html) z.h = parse_rs(z.r); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ diff --git a/tests.lst b/tests.lst index f12ad1b..1fe18e5 100644 --- a/tests.lst +++ b/tests.lst @@ -417,7 +417,7 @@ spreadsheet-parsexlsx_bug-17b.xlsx spreadsheet-parsexlsx_bug-2.xlsx spreadsheet-parsexlsx_bug-29.xlsx spreadsheet-parsexlsx_bug-3.xlsx -spreadsheet-parsexlsx_bug-32-2.xlsx +# spreadsheet-parsexlsx_bug-32-2.xlsx # occasional timeout spreadsheet-parsexlsx_bug-32.xlsx spreadsheet-parsexlsx_bug-38.xlsx spreadsheet-parsexlsx_bug-4.xlsx diff --git a/xlsx.flow.js b/xlsx.flow.js index de25dfd..bf950aa 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -1467,12 +1467,12 @@ function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ { q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1); for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break; if(j===q.length) { - //if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods + if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods z[q] = v; } else { var k = (j===5 && q.substr(0,5)==="xmlns"?"xmlns":"")+q.substr(j+1); - //if(z[k] && q.substr(j-3,3) == "ext") continue; // from ods + if(z[k] && q.substr(j-3,3) == "ext") continue; // from ods z[k] = v; } } @@ -4566,6 +4566,7 @@ var parse_rs = (function parse_rs_factory() { /* 18.4.8 si CT_Rst */ var sitregex = /<(?:\w+:)?t[^>]*>([^<]*)<\/(?:\w+:)?t>/g, sirregex = /<(?:\w+:)?r>/; +var sirphregex = /<(?:\w+:)?rPh.*?>(.*?)<\/(?:\w+:)?rPh>/g; function parse_si(x, opts) { var html = opts ? opts.cellHTML : true; var z = {}; @@ -4581,7 +4582,7 @@ function parse_si(x, opts) { /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { z.r = utf8read(x); - z.t = utf8read(unescapexml((x.replace(/(.*?)<\/rPh>/g, '').match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = utf8read(unescapexml((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,""))); if(html) z.h = parse_rs(z.r); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ diff --git a/xlsx.js b/xlsx.js index 8d52735..f38a2f6 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1425,12 +1425,12 @@ function parsexmltag(tag, skip_root) { q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1); for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break; if(j===q.length) { - //if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods + if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods z[q] = v; } else { var k = (j===5 && q.substr(0,5)==="xmlns"?"xmlns":"")+q.substr(j+1); - //if(z[k] && q.substr(j-3,3) == "ext") continue; // from ods + if(z[k] && q.substr(j-3,3) == "ext") continue; // from ods z[k] = v; } } @@ -4524,6 +4524,7 @@ var parse_rs = (function parse_rs_factory() { /* 18.4.8 si CT_Rst */ var sitregex = /<(?:\w+:)?t[^>]*>([^<]*)<\/(?:\w+:)?t>/g, sirregex = /<(?:\w+:)?r>/; +var sirphregex = /<(?:\w+:)?rPh.*?>(.*?)<\/(?:\w+:)?rPh>/g; function parse_si(x, opts) { var html = opts ? opts.cellHTML : true; var z = {}; @@ -4539,7 +4540,7 @@ function parse_si(x, opts) { /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { z.r = utf8read(x); - z.t = utf8read(unescapexml((x.replace(/(.*?)<\/rPh>/g, '').match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = utf8read(unescapexml((x.replace(sirphregex, '').match(sitregex)||[]).join("").replace(tagregex,""))); if(html) z.h = parse_rs(z.r); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */