From d4829476f733670de3ff1d2fd0355e723e1edfd1 Mon Sep 17 00:00:00 2001 From: Takashi Fujita Date: Thu, 30 Oct 2014 14:59:08 +0900 Subject: [PATCH] fix parse_si for Japanese excel files MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit fixes #141 h/t @tgfjt . Original message: using #parse_si 'Phonetic Properties' items for Japanese excel files, the Phonetic items(KATAKANA) represents pronunciation hints of String(KANJI). ```xml 漢字(japanese KANJI) フリガナ(this is KATAKANA for pronounciation:漢字) ``` It expected get '漢字' . but got '漢字フリガナ'. so replace these string, because this can be caused by only ``. --- bits/42_sstxml.js | 2 +- xlsx.flow.js | 2 +- xlsx.js | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/bits/42_sstxml.js b/bits/42_sstxml.js index cc8ba48..ae504fa 100644 --- a/bits/42_sstxml.js +++ b/bits/42_sstxml.js @@ -157,7 +157,7 @@ function parse_si(x, opts) { /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { z.r = utf8read(x); - z.t = utf8read(unescapexml((x.match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = utf8read(unescapexml((x.replace(/(.*?)<\/rPh>/g, '').match(sitregex)||[]).join("").replace(tagregex,""))); if(html) z.h = parse_rs(z.r); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ diff --git a/xlsx.flow.js b/xlsx.flow.js index ca03b88..498a3ff 100644 --- a/xlsx.flow.js +++ b/xlsx.flow.js @@ -4581,7 +4581,7 @@ function parse_si(x, opts) { /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { z.r = utf8read(x); - z.t = utf8read(unescapexml((x.match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = utf8read(unescapexml((x.replace(/(.*?)<\/rPh>/g, '').match(sitregex)||[]).join("").replace(tagregex,""))); if(html) z.h = parse_rs(z.r); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ diff --git a/xlsx.js b/xlsx.js index 3900205..9174665 100644 --- a/xlsx.js +++ b/xlsx.js @@ -4539,7 +4539,7 @@ function parse_si(x, opts) { /* 18.4.4 r CT_RElt (Rich Text Run) */ else if((y = x.match(sirregex))) { z.r = utf8read(x); - z.t = utf8read(unescapexml((x.match(sitregex)||[]).join("").replace(tagregex,""))); + z.t = utf8read(unescapexml((x.replace(/(.*?)<\/rPh>/g, '').match(sitregex)||[]).join("").replace(tagregex,""))); if(html) z.h = parse_rs(z.r); } /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */