From 80a843ca3bf0401c06d46dbe30375a8e3033238a Mon Sep 17 00:00:00 2001 From: kinwah Date: Thu, 16 Jan 2014 11:42:25 +0800 Subject: [PATCH] Add parsing comment codes into bits/70_xlsx.js Update test xlsx file to apachepoi_SimpleWithComments.xlsx --- bits/70_xlsx.js | 150 +++++++++++++++++++++++++++++++++++++++++------- test.js | 2 +- 2 files changed, 131 insertions(+), 21 deletions(-) diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index bd95026..b950d8d 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -8,6 +8,7 @@ var ct2type = { "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml": "strs", "application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml":"styles", "application/vnd.openxmlformats-officedocument.theme+xml":"themes", + "application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml": "comments", "foo": "bar" }; @@ -241,7 +242,7 @@ var ctext = {}; function parseCT(data) { if(!data || !data.match) return data; var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [], - coreprops: [], extprops: [], strs:[], xmlns: "" }; + coreprops: [], extprops: [], strs:[], comments: [], xmlns: "" }; (data.match(/<[^>]*>/g)||[]).forEach(function(x) { var y = parsexmltag(x); switch(y[0]) { @@ -446,6 +447,104 @@ function parseStyles(data) { return styles; } +/* 9.3.2 OPC Relationships Markup */ +function parseRels(data, currentFilePath) { + if (!data) return data; + if (currentFilePath.charAt(0) !== '/') { + currentFilePath = '/'+currentFilePath; + } + var rels = {}; + + var resolveRelativePathIntoAbsolute = function (to) { +    var toksFrom = currentFilePath.split('/'); + toksFrom.pop(); // folder path +    var toksTo = to.split('/'); +    var reversed = []; +    while (toksTo.length !== 0) { +        var tokTo = toksTo.shift(); +        if (tokTo === '..') { +            toksFrom.pop(); +        } else if (tokTo !== '.') { +            toksFrom.push(tokTo); +        } +    } +    return toksFrom.join('/'); + } + + data.match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + /* 9.3.2.2 OPC_Relationships */ + if (y[0] === '/)) { + throw new Error('Not a valid comments xml'); + } + var authors = []; + var commentList = []; + data.match(/([^\u2603]*)<\/authors>/m)[1].split('').forEach(function(x) { + if(x === "" || x.trim() === "") return; + authors.push(x.match(/]*>(.*)/)[1]); + }); + data.match(/([^\u2603]*)<\/commentList>/m)[1].split('').forEach(function(x, index) { + if(x === "" || x.trim() === "") return; + var y = parsexmltag(x.match(/]*>/)[0]); + var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] }; + x.match(/([^\u2603]*)<\/text>/m)[1].split('').forEach(function(r) { + if(r === "" || r.trim() === "") return; + /* 18.4.12 t ST_Xstring */ + var ct = r.match(matchtag('t')); + comment.texts.push(utf8read(unescapexml(ct[1]))); + // TODO: parse rich text format + }); + commentList.push(comment); + }); + return commentList; +} + +function parseCommentsAddToSheets(zip, dirComments, sheets, sheetRels) { + for(var i = 0; i != dirComments.length; ++i) { + var canonicalpath=dirComments[i]; + var comments=parseComments(getdata(getzipfile(zip, canonicalpath.replace(/^\//,'')))); + // find the sheets targeted by these comments + var sheetNames = Object.keys(sheets); + for(var j = 0; j != sheetNames.length; ++j) { + var sheetName = sheetNames[j]; + var rels = sheetRels[sheetName]; + if (rels) { + var rel = rels[canonicalpath]; + if (rel) { + insertCommentsIntoSheet(sheetName, sheets[sheetName], comments); + } + } + } + } +} + +function insertCommentsIntoSheet(sheetName, sheet, comments) { + comments.forEach(function(comment) { + var cell = sheet[comment.ref]; + if (!cell) { + cell = {}; + sheet[comment.ref] = cell; + } + + if (!cell.c) { + cell.c = []; + } + cell.c.push({a: comment.author, t: comment.texts}); + }); +} + function getdata(data) { if(!data) return null; if(data.data) return data.data; @@ -478,26 +577,37 @@ function parseZip(zip) { var deps = {}; if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,'')))); var sheets = {}, i=0; + var sheetRels = {}; if(!props.Worksheets) { - /* Google Docs doesn't generate the appropriate metadata, so we impute: */ - var wbsheets = wb.Sheets; - props.Worksheets = wbsheets.length; - props.SheetNames = []; - for(var j = 0; j != wbsheets.length; ++j) { - props.SheetNames[j] = wbsheets[j].name; - } - for(i = 0; i != props.Worksheets; ++i) { - try { /* TODO: remove these guards */ - sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml'))); - } catch(e) {} - } - } - else { - for(i = 0; i != props.Worksheets; ++i) { - try { - sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,'')))); - } catch(e) {} - } + /* Google Docs doesn't generate the appropriate metadata, so we impute: */ + var wbsheets = wb.Sheets; + props.Worksheets = wbsheets.length; + props.SheetNames = []; + for(var j = 0; j != wbsheets.length; ++j) { + props.SheetNames[j] = wbsheets[j].name; + } + for(i = 0; i != props.Worksheets; ++i) { + try { /* TODO: remove these guards */ + var path = 'xl/worksheets/sheet' + (i+1) + '.xml'; + var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path))); + sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); + } catch(e) {} + } + } + else { + for(i = 0; i != props.Worksheets; ++i) { + try { + var path = dir.sheets[i].replace(/^\//,''); + var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels"); + sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path))); + sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path); + } catch(e) {} + } + } + + if(dir.comments) { + parseCommentsAddToSheets(zip, dir.comments, sheets, sheetRels); } return { Directory: dir, diff --git a/test.js b/test.js index d4b0438..98395a6 100644 --- a/test.js +++ b/test.js @@ -42,7 +42,7 @@ describe('should parse test files', function() { describe('should have comment as part of cell\'s properties', function(){ it('Parse comments.xml and insert into cell',function(){ - var wb = XLSX.readFile('./test_files/SimpleWithComments.xlsx'); + var wb = XLSX.readFile('./test_files/apachepoi_SimpleWithComments.xlsx'); var sheetName = 'Sheet1'; var ws = wb.Sheets[sheetName]; assert.equal(ws.B1.c.length, 1,"must have 1 comment");