From 59d9d9086bd98f026cd3c5a0dbc1079217620ce2 Mon Sep 17 00:00:00 2001 From: kinwah Date: Wed, 15 Jan 2014 15:26:00 +0800 Subject: [PATCH 1/5] Support for parsing Comments Comments parts listed in the [Content Types] are parsed. Sheets's relationships are parsed. Comments parts are correlated to their corresponding sheets parts. Comments's contents are added to the ref'ed cells. Rich text styling properties are currently ignored. For example: { "!ref": "A1:B3", "A1": { "v": 1, "t": "n" }, "B1": { "v": "one", "t": "s", "r": "one", "c": [ { "a": "Yegor Kozlov", "t": [ "Yegor Kozlov:", "\r\nfirst cell" ] } ] } } --- test.js | 11 +++++ xlsx.js | 150 ++++++++++++++++++++++++++++++++++++++++++++++++-------- 2 files changed, 141 insertions(+), 20 deletions(-) diff --git a/test.js b/test.js index 88e1f72..d4b0438 100644 --- a/test.js +++ b/test.js @@ -39,3 +39,14 @@ describe('should parse test files', function() { }); }); }); + +describe('should have comment as part of cell\'s properties', function(){ + it('Parse comments.xml and insert into cell',function(){ + var wb = XLSX.readFile('./test_files/SimpleWithComments.xlsx'); + var sheetName = 'Sheet1'; + var ws = wb.Sheets[sheetName]; + assert.equal(ws.B1.c.length, 1,"must have 1 comment"); + assert.equal(ws.B1.c[0].t.length, 2,"must have 2 texts"); + assert.equal(ws.B1.c[0].a, 'Yegor Kozlov',"must have the same author"); + }); +}); diff --git a/xlsx.js b/xlsx.js index 3b2a05d..e747da0 100644 --- a/xlsx.js +++ b/xlsx.js @@ -588,6 +588,7 @@ var ct2type = { "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml": "strs", "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml":"styles", "application/vnd.openxmlformats-officedocument.theme+xml":"themes", + "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml": "comments", "foo": "bar" }; @@ -821,7 +822,7 @@ var ctext = {}; function parseCT(data) { if(!data || !data.match) return data; var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [], - coreprops: [], extprops: [], strs:[], xmlns: "" }; + coreprops: [], extprops: [], strs:[], comments: [], xmlns: "" }; (data.match(/<[^>]*>/g)||[]).forEach(function(x) { var y = parsexmltag(x); switch(y[0]) { @@ -1026,6 +1027,104 @@ function parseStyles(data) { return styles; } +/* 9.3.2 OPC Relationships Markup */ +function parseRels(data, currentFilePath) { + if (!data) return data; + if (currentFilePath.charAt(0) !== '/') { + currentFilePath = '/'+currentFilePath; + } + var rels = {}; + + var resolveRelativePathIntoAbsolute = function (to) { +    var toksFrom = currentFilePath.split('/'); + toksFrom.pop(); // folder path +    var toksTo = to.split('/'); +    var reversed = []; +    while (toksTo.length !== 0) { +        var tokTo = toksTo.shift(); +        if (tokTo === '..') { +            toksFrom.pop(); +        } else if (tokTo !== '.') { +            toksFrom.push(tokTo); +        } +    } +    return toksFrom.join('/'); + } + + data.match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + /* 9.3.2.2 OPC_Relationships */ + if (y[0] === '/)) { + throw new Error('Not a valid comments xml'); + } + var authors = []; + var commentList = []; + data.match(/([^\u2603]*)<\/authors>/m)[1].split('').forEach(function(x) { + if(x === "" || x.trim() === "") return; + authors.push(x.match(/]*>(.*)/)[1]); + }); + data.match(/([^\u2603]*)<\/commentList>/m)[1].split('').forEach(function(x, index) { + if(x === "" || x.trim() === "") return; + var y = parsexmltag(x.match(/]*>/)[0]); + var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] }; + x.match(/([^\u2603]*)<\/text>/m)[1].split('').forEach(function(r) { + if(r === "" || r.trim() === "") return; + /* 18.4.12 t ST_Xstring */ + var ct = r.match(matchtag('t')); + comment.texts.push(utf8read(unescapexml(ct[1]))); + // TODO: parse rich text format + }); + commentList.push(comment); + }); + return commentList; +} + +function parseCommentsAddToSheets(zip, dirComments, sheets, sheetRels) { + for(var i = 0; i != dirComments.length; ++i) { + var canonicalpath=dirComments[i]; + var comments=parseComments(getdata(getzipfile(zip, canonicalpath.replace(/^\//,'')))); + // find the sheets targeted by these comments + var sheetNames = Object.keys(sheets); + for(var j = 0; j != sheetNames.length; ++j) { + var sheetName = sheetNames[j]; + var rels = sheetRels[sheetName]; + if (rels) { + var rel = rels[canonicalpath]; + if (rel) { + insertCommentsIntoSheet(sheetName, sheets[sheetName], comments); + } + } + } + } +} + +function insertCommentsIntoSheet(sheetName, sheet, comments) { + comments.forEach(function(comment) { + var cell = sheet[comment.ref]; + if (!cell) { + cell = {}; + sheet[comment.ref] = cell; + } + + if (!cell.c) { + cell.c = []; + } + cell.c.push({a: comment.author, t: comment.texts}); + }); +} + function getdata(data) { if(!data) return null; if(data.data) return data.data; @@ -1058,26 +1157,37 @@ function parseZip(zip) { var deps = {}; if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0; + var sheetRels = {}; if(!props.Worksheets) { - /* Google Docs doesn't generate the appropriate metadata, so we impute: */ - var wbsheets = wb.Sheets; - props.Worksheets = wbsheets.length; - props.SheetNames = []; - for(var j = 0; j != wbsheets.length; ++j) { - props.SheetNames[j] = wbsheets[j].name; - } - for(i = 0; i != props.Worksheets; ++i) { - try { /* TODO: remove these guards */ - sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml'))); - } catch(e) {} - } - } - else { - for(i = 0; i != props.Worksheets; ++i) { - try { - sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,'')))); - } catch(e) {} - } + /* Google Docs doesn't generate the appropriate metadata, so we impute: */ + var wbsheets = wb.Sheets; + props.Worksheets = wbsheets.length; + props.SheetNames = []; + for(var j = 0; j != wbsheets.length; ++j) { + props.SheetNames[j] = wbsheets[j].name; + } + for(i = 0; i != props.Worksheets; ++i) { + try { /* TODO: remove these guards */ + var path = 'xl/worksheets/sheet' + (i+1) + '.xml'; + var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path))); + sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); + } catch(e) {} + } + } + else { + for(i = 0; i != props.Worksheets; ++i) { + try { + var path = dir.sheets[i].replace(/^\//,''); + var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path))); + sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); + } catch(e) {} + } + } + + if(dir.comments) { + parseCommentsAddToSheets(zip, dir.comments, sheets, sheetRels); } return { Directory: dir, From 80a843ca3bf0401c06d46dbe30375a8e3033238a Mon Sep 17 00:00:00 2001 From: kinwah Date: Thu, 16 Jan 2014 11:42:25 +0800 Subject: [PATCH 2/5] Add parsing comment codes into bits/70_xlsx.js Update test xlsx file to apachepoi_SimpleWithComments.xlsx --- bits/70_xlsx.js | 150 +++++++++++++++++++++++++++++++++++++++++------- test.js | 2 +- 2 files changed, 131 insertions(+), 21 deletions(-) diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index bd95026..b950d8d 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -8,6 +8,7 @@ var ct2type = { "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml": "strs", "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml":"styles", "application/vnd.openxmlformats-officedocument.theme+xml":"themes", + "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml": "comments", "foo": "bar" }; @@ -241,7 +242,7 @@ var ctext = {}; function parseCT(data) { if(!data || !data.match) return data; var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [], - coreprops: [], extprops: [], strs:[], xmlns: "" }; + coreprops: [], extprops: [], strs:[], comments: [], xmlns: "" }; (data.match(/<[^>]*>/g)||[]).forEach(function(x) { var y = parsexmltag(x); switch(y[0]) { @@ -446,6 +447,104 @@ function parseStyles(data) { return styles; } +/* 9.3.2 OPC Relationships Markup */ +function parseRels(data, currentFilePath) { + if (!data) return data; + if (currentFilePath.charAt(0) !== '/') { + currentFilePath = '/'+currentFilePath; + } + var rels = {}; + + var resolveRelativePathIntoAbsolute = function (to) { +    var toksFrom = currentFilePath.split('/'); + toksFrom.pop(); // folder path +    var toksTo = to.split('/'); +    var reversed = []; +    while (toksTo.length !== 0) { +        var tokTo = toksTo.shift(); +        if (tokTo === '..') { +            toksFrom.pop(); +        } else if (tokTo !== '.') { +            toksFrom.push(tokTo); +        } +    } +    return toksFrom.join('/'); + } + + data.match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + /* 9.3.2.2 OPC_Relationships */ + if (y[0] === '/)) { + throw new Error('Not a valid comments xml'); + } + var authors = []; + var commentList = []; + data.match(/([^\u2603]*)<\/authors>/m)[1].split('').forEach(function(x) { + if(x === "" || x.trim() === "") return; + authors.push(x.match(/]*>(.*)/)[1]); + }); + data.match(/([^\u2603]*)<\/commentList>/m)[1].split('').forEach(function(x, index) { + if(x === "" || x.trim() === "") return; + var y = parsexmltag(x.match(/]*>/)[0]); + var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] }; + x.match(/([^\u2603]*)<\/text>/m)[1].split('').forEach(function(r) { + if(r === "" || r.trim() === "") return; + /* 18.4.12 t ST_Xstring */ + var ct = r.match(matchtag('t')); + comment.texts.push(utf8read(unescapexml(ct[1]))); + // TODO: parse rich text format + }); + commentList.push(comment); + }); + return commentList; +} + +function parseCommentsAddToSheets(zip, dirComments, sheets, sheetRels) { + for(var i = 0; i != dirComments.length; ++i) { + var canonicalpath=dirComments[i]; + var comments=parseComments(getdata(getzipfile(zip, canonicalpath.replace(/^\//,'')))); + // find the sheets targeted by these comments + var sheetNames = Object.keys(sheets); + for(var j = 0; j != sheetNames.length; ++j) { + var sheetName = sheetNames[j]; + var rels = sheetRels[sheetName]; + if (rels) { + var rel = rels[canonicalpath]; + if (rel) { + insertCommentsIntoSheet(sheetName, sheets[sheetName], comments); + } + } + } + } +} + +function insertCommentsIntoSheet(sheetName, sheet, comments) { + comments.forEach(function(comment) { + var cell = sheet[comment.ref]; + if (!cell) { + cell = {}; + sheet[comment.ref] = cell; + } + + if (!cell.c) { + cell.c = []; + } + cell.c.push({a: comment.author, t: comment.texts}); + }); +} + function getdata(data) { if(!data) return null; if(data.data) return data.data; @@ -478,26 +577,37 @@ function parseZip(zip) { var deps = {}; if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0; + var sheetRels = {}; if(!props.Worksheets) { - /* Google Docs doesn't generate the appropriate metadata, so we impute: */ - var wbsheets = wb.Sheets; - props.Worksheets = wbsheets.length; - props.SheetNames = []; - for(var j = 0; j != wbsheets.length; ++j) { - props.SheetNames[j] = wbsheets[j].name; - } - for(i = 0; i != props.Worksheets; ++i) { - try { /* TODO: remove these guards */ - sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml'))); - } catch(e) {} - } - } - else { - for(i = 0; i != props.Worksheets; ++i) { - try { - sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,'')))); - } catch(e) {} - } + /* Google Docs doesn't generate the appropriate metadata, so we impute: */ + var wbsheets = wb.Sheets; + props.Worksheets = wbsheets.length; + props.SheetNames = []; + for(var j = 0; j != wbsheets.length; ++j) { + props.SheetNames[j] = wbsheets[j].name; + } + for(i = 0; i != props.Worksheets; ++i) { + try { /* TODO: remove these guards */ + var path = 'xl/worksheets/sheet' + (i+1) + '.xml'; + var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path))); + sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); + } catch(e) {} + } + } + else { + for(i = 0; i != props.Worksheets; ++i) { + try { + var path = dir.sheets[i].replace(/^\//,''); + var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path))); + sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); + } catch(e) {} + } + } + + if(dir.comments) { + parseCommentsAddToSheets(zip, dir.comments, sheets, sheetRels); } return { Directory: dir, diff --git a/test.js b/test.js index d4b0438..98395a6 100644 --- a/test.js +++ b/test.js @@ -42,7 +42,7 @@ describe('should parse test files', function() { describe('should have comment as part of cell\'s properties', function(){ it('Parse comments.xml and insert into cell',function(){ - var wb = XLSX.readFile('./test_files/SimpleWithComments.xlsx'); + var wb = XLSX.readFile('./test_files/apachepoi_SimpleWithComments.xlsx'); var sheetName = 'Sheet1'; var ws = wb.Sheets[sheetName]; assert.equal(ws.B1.c.length, 1,"must have 1 comment"); From 36f7080a68134d63c3b892b2140735d801bcbc98 Mon Sep 17 00:00:00 2001 From: Hugues Malphettes Date: Sat, 18 Jan 2014 16:29:51 +0800 Subject: [PATCH 3/5] Comments maybe contain an empty text tag --- bits/70_xlsx.js | 4 +++- xlsx.js | 4 +++- 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index b950d8d..3941e1c 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -499,7 +499,9 @@ function parseComments(data) { if(x === "" || x.trim() === "") return; var y = parsexmltag(x.match(/]*>/)[0]); var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] }; - x.match(/([^\u2603]*)<\/text>/m)[1].split('').forEach(function(r) { + var textMatch = x.match(/([^\u2603]*)<\/text>/m); + if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag. + textMatch[1].split('').forEach(function(r) { if(r === "" || r.trim() === "") return; /* 18.4.12 t ST_Xstring */ var ct = r.match(matchtag('t')); diff --git a/xlsx.js b/xlsx.js index e747da0..ac664aa 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1079,7 +1079,9 @@ function parseComments(data) { if(x === "" || x.trim() === "") return; var y = parsexmltag(x.match(/]*>/)[0]); var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] }; - x.match(/([^\u2603]*)<\/text>/m)[1].split('').forEach(function(r) { + var textMatch = x.match(/([^\u2603]*)<\/text>/m); + if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag. + textMatch[1].split('').forEach(function(r) { if(r === "" || r.trim() === "") return; /* 18.4.12 t ST_Xstring */ var ct = r.match(matchtag('t')); From 5d43dffc79361448f8aa8a05df8c1322afc58cd5 Mon Sep 17 00:00:00 2001 From: Hugues Malphettes Date: Sat, 18 Jan 2014 21:45:49 +0800 Subject: [PATCH 4/5] Share the code for parsing the rich text and use it to parse the comments. --- bits/65_sst.js | 221 ++++++++++++++++++++++----------------------- bits/70_xlsx.js | 15 ++- test.js | 13 ++- xlsx.js | 236 ++++++++++++++++++++++++------------------------ 4 files changed, 241 insertions(+), 244 deletions(-) diff --git a/bits/65_sst.js b/bits/65_sst.js index 0ce03dd..85c0c3e 100644 --- a/bits/65_sst.js +++ b/bits/65_sst.js @@ -1,131 +1,130 @@ -/* 18.4 Shared String Table */ -var parse_sst = (function(){ +/* Parse a list of tags */ +var parse_rs = (function() { var tregex = matchtag("t"), rpregex = matchtag("rPr"); - /* Parse a list of tags */ - var parse_rs = (function() { - /* 18.4.7 rPr CT_RPrElt */ - var parse_rpr = function(rpr, intro, outro) { - var font = {}; - (rpr.match(/<[^>]*>/g)||[]).forEach(function(x) { - var y = parsexmltag(x); - switch(y[0]) { - /* 18.8.12 condense CT_BooleanProperty */ - /* ** not required . */ - case ']*>/g)||[]).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + /* 18.8.12 condense CT_BooleanProperty */ + /* ** not required . */ + case '': font.strike = 1; break; - case '': break; + /* 18.4.10 strike CT_BooleanProperty */ + case '': font.strike = 1; break; + case '': break; - /* 18.4.13 u CT_UnderlineProperty */ - case '': font.u = 1; break; - case '': break; + /* 18.4.13 u CT_UnderlineProperty */ + case '': font.u = 1; break; + case '': break; - /* 18.8.2 b */ - case '': font.b = 1; break; - case '': break; + /* 18.8.2 b */ + case '': font.b = 1; break; + case '': break; - /* 18.8.26 i */ - case '': font.i = 1; break; - case '': break; + /* 18.8.26 i */ + case '': font.i = 1; break; + case '': break; - /* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */ - case ''); - outro.push(""); - }; - - /* 18.4.4 r CT_RElt */ - function parse_r(r) { - var terms = [[],"",[]]; - /* 18.4.12 t ST_Xstring */ - var t = r.match(tregex); - if(!isval(t)) return ""; - terms[1] = t[1]; - - var rpr = r.match(rpregex); - if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]); - return terms[0].join("") + terms[1].replace(/\r\n/g,'
') + terms[2].join(""); - } - return function(rs) { - return rs.replace(//g,"").split(/<\/r>/).map(parse_r).join(""); - }; - })(); - - /* 18.4.8 si CT_Rst */ - var parse_si = function(x) { - var z = {}; - if(!x) return z; - var y; - /* 18.4.12 t ST_Xstring (Plaintext String) */ - if(x[1] === 't') { - z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,""))); - z.raw = x; - z.r = z.t; - } - /* 18.4.4 r CT_RElt (Rich Text Run) */ - else if((y = x.match(//))) { - z.raw = x; - /* TODO: properly parse (note: no other valid child can have body text) */ - z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); - z.r = parse_rs(x); - } - /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ - /* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */ - return z; + default: + if(y[0][2] !== '/') throw 'Unrecognized rich format ' + y[0]; + } + }); + /* TODO: These should be generated styles, not inline */ + var style = []; + if(font.b) style.push("font-weight: bold;"); + if(font.i) style.push("font-style: italic;"); + intro.push(''); + outro.push(""); }; + /* 18.4.4 r CT_RElt */ + function parse_r(r) { + var terms = [[],"",[]]; + /* 18.4.12 t ST_Xstring */ + var t = r.match(tregex); + if(!isval(t)) return ""; + terms[1] = t[1]; + var rpr = r.match(rpregex); + if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]); + return terms[0].join("") + terms[1].replace(/\r\n/g,'
') + terms[2].join(""); + } + return function(rs) { + return rs.replace(//g,"").split(/<\/r>/).map(parse_r).join(""); + }; +})(); + +/* 18.4.8 si CT_Rst */ +var parse_si = function(x) { + var z = {}; + if(!x) return z; + var y; + /* 18.4.12 t ST_Xstring (Plaintext String) */ + if(x[1] === 't') { + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,""))); + z.raw = x; + z.r = z.t; + } + /* 18.4.4 r CT_RElt (Rich Text Run) */ + else if((y = x.match(//))) { + z.raw = x; + /* TODO: properly parse (note: no other valid child can have body text) */ + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); + z.r = parse_rs(x); + } + /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ + /* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */ + return z; +}; + +/* 18.4 Shared String Table */ +var parse_sst = (function(){ return function(data) { var s = []; /* 18.4.9 sst CT_Sst */ diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index 3941e1c..fdbefca 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -498,16 +498,13 @@ function parseComments(data) { data.match(/([^\u2603]*)<\/commentList>/m)[1].split('').forEach(function(x, index) { if(x === "" || x.trim() === "") return; var y = parsexmltag(x.match(/]*>/)[0]); - var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] }; + var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid }; var textMatch = x.match(/([^\u2603]*)<\/text>/m); if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag. - textMatch[1].split('').forEach(function(r) { - if(r === "" || r.trim() === "") return; - /* 18.4.12 t ST_Xstring */ - var ct = r.match(matchtag('t')); - comment.texts.push(utf8read(unescapexml(ct[1]))); - // TODO: parse rich text format - }); + var rt = parse_si(textMatch[1]); + comment.raw = rt.raw; + comment.t = rt.t; + comment.r = rt.r; commentList.push(comment); }); return commentList; @@ -543,7 +540,7 @@ function insertCommentsIntoSheet(sheetName, sheet, comments) { if (!cell.c) { cell.c = []; } - cell.c.push({a: comment.author, t: comment.texts}); + cell.c.push({a: comment.author, t: comment.t, raw: comment.raw, r: comment.r}); }); } diff --git a/test.js b/test.js index 98395a6..dfd4088 100644 --- a/test.js +++ b/test.js @@ -41,12 +41,17 @@ describe('should parse test files', function() { }); describe('should have comment as part of cell\'s properties', function(){ - it('Parse comments.xml and insert into cell',function(){ + var ws; + before(function() { + XLSX = require('./xlsx'); var wb = XLSX.readFile('./test_files/apachepoi_SimpleWithComments.xlsx'); var sheetName = 'Sheet1'; - var ws = wb.Sheets[sheetName]; + ws = wb.Sheets[sheetName]; + }); + it('Parse comments.xml and insert into cell',function(){ assert.equal(ws.B1.c.length, 1,"must have 1 comment"); - assert.equal(ws.B1.c[0].t.length, 2,"must have 2 texts"); - assert.equal(ws.B1.c[0].a, 'Yegor Kozlov',"must have the same author"); + assert.equal(ws.B1.c[0].t, "Yegor Kozlov:\r\nfirst cell", "must have the concatenated texts"); + assert.equal(ws.B1.c[0].r, 'Yegor Kozlov:
first cell
', "must have the html representation"); + assert.equal(ws.B1.c[0].a, "Yegor Kozlov","must have the same author"); }); }); diff --git a/xlsx.js b/xlsx.js index ac664aa..d346cf1 100644 --- a/xlsx.js +++ b/xlsx.js @@ -439,134 +439,133 @@ function parseVector(data) { } function isval(x) { return typeof x !== "undefined" && x !== null; } -/* 18.4 Shared String Table */ -var parse_sst = (function(){ +/* Parse a list of tags */ +var parse_rs = (function() { var tregex = matchtag("t"), rpregex = matchtag("rPr"); - /* Parse a list of tags */ - var parse_rs = (function() { - /* 18.4.7 rPr CT_RPrElt */ - var parse_rpr = function(rpr, intro, outro) { - var font = {}; - (rpr.match(/<[^>]*>/g)||[]).forEach(function(x) { - var y = parsexmltag(x); - switch(y[0]) { - /* 18.8.12 condense CT_BooleanProperty */ - /* ** not required . */ - case ']*>/g)||[]).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + /* 18.8.12 condense CT_BooleanProperty */ + /* ** not required . */ + case '': font.strike = 1; break; - case '': break; + /* 18.4.10 strike CT_BooleanProperty */ + case '': font.strike = 1; break; + case '': break; - /* 18.4.13 u CT_UnderlineProperty */ - case '': font.u = 1; break; - case '': break; + /* 18.4.13 u CT_UnderlineProperty */ + case '': font.u = 1; break; + case '': break; - /* 18.8.2 b */ - case '': font.b = 1; break; - case '': break; + /* 18.8.2 b */ + case '': font.b = 1; break; + case '': break; - /* 18.8.26 i */ - case '': font.i = 1; break; - case '': break; + /* 18.8.26 i */ + case '': font.i = 1; break; + case '': break; - /* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */ - case ''); - outro.push(""); - }; - - /* 18.4.4 r CT_RElt */ - function parse_r(r) { - var terms = [[],"",[]]; - /* 18.4.12 t ST_Xstring */ - var t = r.match(tregex); - if(!isval(t)) return ""; - terms[1] = t[1]; - - var rpr = r.match(rpregex); - if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]); - return terms[0].join("") + terms[1].replace(/\r\n/g,'
') + terms[2].join(""); - } - return function(rs) { - return rs.replace(//g,"").split(/<\/r>/).map(parse_r).join(""); - }; - })(); - - /* 18.4.8 si CT_Rst */ - var parse_si = function(x) { - var z = {}; - if(!x) return z; - var y; - /* 18.4.12 t ST_Xstring (Plaintext String) */ - if(x[1] === 't') { - z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,""))); - z.raw = x; - z.r = z.t; - } - /* 18.4.4 r CT_RElt (Rich Text Run) */ - else if((y = x.match(//))) { - z.raw = x; - /* TODO: properly parse (note: no other valid child can have body text) */ - z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); - z.r = parse_rs(x); - } - /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ - /* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */ - return z; + default: + if(y[0][2] !== '/') throw 'Unrecognized rich format ' + y[0]; + } + }); + /* TODO: These should be generated styles, not inline */ + var style = []; + if(font.b) style.push("font-weight: bold;"); + if(font.i) style.push("font-style: italic;"); + intro.push(''); + outro.push(""); }; + /* 18.4.4 r CT_RElt */ + function parse_r(r) { + var terms = [[],"",[]]; + /* 18.4.12 t ST_Xstring */ + var t = r.match(tregex); + if(!isval(t)) return ""; + terms[1] = t[1]; + var rpr = r.match(rpregex); + if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]); + return terms[0].join("") + terms[1].replace(/\r\n/g,'
') + terms[2].join(""); + } + return function(rs) { + return rs.replace(//g,"").split(/<\/r>/).map(parse_r).join(""); + }; +})(); + +/* 18.4.8 si CT_Rst */ +var parse_si = function(x) { + var z = {}; + if(!x) return z; + var y; + /* 18.4.12 t ST_Xstring (Plaintext String) */ + if(x[1] === 't') { + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,""))); + z.raw = x; + z.r = z.t; + } + /* 18.4.4 r CT_RElt (Rich Text Run) */ + else if((y = x.match(//))) { + z.raw = x; + /* TODO: properly parse (note: no other valid child can have body text) */ + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); + z.r = parse_rs(x); + } + /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ + /* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */ + return z; +}; + +/* 18.4 Shared String Table */ +var parse_sst = (function(){ return function(data) { var s = []; /* 18.4.9 sst CT_Sst */ @@ -1078,16 +1077,13 @@ function parseComments(data) { data.match(/([^\u2603]*)<\/commentList>/m)[1].split('').forEach(function(x, index) { if(x === "" || x.trim() === "") return; var y = parsexmltag(x.match(/]*>/)[0]); - var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] }; + var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid }; var textMatch = x.match(/([^\u2603]*)<\/text>/m); if (!textMatch || !textMatch[1]) return; // a comment may contain an empty text tag. - textMatch[1].split('').forEach(function(r) { - if(r === "" || r.trim() === "") return; - /* 18.4.12 t ST_Xstring */ - var ct = r.match(matchtag('t')); - comment.texts.push(utf8read(unescapexml(ct[1]))); - // TODO: parse rich text format - }); + var rt = parse_si(textMatch[1]); + comment.raw = rt.raw; + comment.t = rt.t; + comment.r = rt.r; commentList.push(comment); }); return commentList; @@ -1123,7 +1119,7 @@ function insertCommentsIntoSheet(sheetName, sheet, comments) { if (!cell.c) { cell.c = []; } - cell.c.push({a: comment.author, t: comment.texts}); + cell.c.push({a: comment.author, t: comment.t, raw: comment.raw, r: comment.r}); }); } From 9efa4e2aae6fb29a7477dfec884e4498bd9c2f39 Mon Sep 17 00:00:00 2001 From: Hugues Malphettes Date: Tue, 21 Jan 2014 06:05:00 +0700 Subject: [PATCH 5/5] Update the range of the sheet when creating a cell for the content of its comments --- bits/70_xlsx.js | 8 ++++++++ xlsx.js | 8 ++++++++ 2 files changed, 16 insertions(+) diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index fdbefca..21032b5 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -535,6 +535,14 @@ function insertCommentsIntoSheet(sheetName, sheet, comments) { if (!cell) { cell = {}; sheet[comment.ref] = cell; + var range = decode_range(sheet["!ref"]); + var thisCell = decode_cell(comment.ref); + if(range.s.r > thisCell.r) range.s.r = thisCell.r; + if(range.e.r < thisCell.r) range.e.r = thisCell.r; + if(range.s.c > thisCell.c) range.s.c = thisCell.c; + if(range.e.c < thisCell.c) range.e.c = thisCell.c; + var encoded = encode_range(range); + if (encoded !== sheet["!ref"]) sheet["!ref"] = encoded; } if (!cell.c) { diff --git a/xlsx.js b/xlsx.js index d346cf1..10e0979 100644 --- a/xlsx.js +++ b/xlsx.js @@ -1114,6 +1114,14 @@ function insertCommentsIntoSheet(sheetName, sheet, comments) { if (!cell) { cell = {}; sheet[comment.ref] = cell; + var range = decode_range(sheet["!ref"]); + var thisCell = decode_cell(comment.ref); + if(range.s.r > thisCell.r) range.s.r = thisCell.r; + if(range.e.r < thisCell.r) range.e.r = thisCell.r; + if(range.s.c > thisCell.c) range.s.c = thisCell.c; + if(range.e.c < thisCell.c) range.e.c = thisCell.c; + var encoded = encode_range(range); + if (encoded !== sheet["!ref"]) sheet["!ref"] = encoded; } if (!cell.c) {