Support for parsing Comments

Comments parts listed in the [Content Types] are parsed. Sheets's relationships are parsed. Comments parts are correlated to their corresponding sheets parts. Comments's contents are added to the ref'ed cells. Rich text styling properties are currently ignored. For example: { "!ref": "A1:B3", "A1": { "v": 1, "t": "n" }, "B1": { "v": "one", "t": "s", "r": "one", "c": [ { "a": "Yegor Kozlov", "t": [ "Yegor Kozlov:", "\r\nfirst cell" ] } ] } }
2014-01-15 15:26:00 +08:00 · 2014-01-15 15:26:00 +08:00 · 59d9d9086b
commit 59d9d9086b
parent e298cc8dd3
2 changed files with 141 additions and 20 deletions
--- a/test.js
+++ b/test.js
@ -39,3 +39,14 @@ describe('should parse test files', function() {
 		});
 	});
 });
+
+describe('should have comment as part of cell\'s properties', function(){
+	it('Parse comments.xml and insert into cell',function(){
+		var wb = XLSX.readFile('./test_files/SimpleWithComments.xlsx');
+		var sheetName = 'Sheet1';
+		var ws = wb.Sheets[sheetName];
+		assert.equal(ws.B1.c.length, 1,"must have 1 comment");
+		assert.equal(ws.B1.c[0].t.length, 2,"must have 2 texts");
+		assert.equal(ws.B1.c[0].a, 'Yegor Kozlov',"must have the same author");
+	});
+});
--- a/xlsx.js
+++ b/xlsx.js
@ -588,6 +588,7 @@ var ct2type = {
 	"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml": "strs",
 	"application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml":"styles",
 	"application/vnd.openxmlformats-officedocument.theme+xml":"themes",
+	"application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml": "comments",
 	"foo": "bar"
 };

@ -821,7 +822,7 @@ var ctext = {};
 function parseCT(data) {
 	if(!data || !data.match) return data;
 	var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
-		coreprops: [], extprops: [], strs:[], xmlns: "" };
+		coreprops: [], extprops: [], strs:[], comments: [], xmlns: "" };
 	(data.match(/<[^>]*>/g)||[]).forEach(function(x) {
 		var y = parsexmltag(x);
 		switch(y[0]) {
@ -1026,6 +1027,104 @@ function parseStyles(data) {
 	return styles;
 }

+/* 9.3.2 OPC Relationships Markup */
+function parseRels(data, currentFilePath) {
+	if (!data) return data;
+	if (currentFilePath.charAt(0) !== '/') {
+		currentFilePath = '/'+currentFilePath;
+	}
+	var rels = {};
+
+	var resolveRelativePathIntoAbsolute = function (to) {
+	    var toksFrom = currentFilePath.split('/');
+	 	toksFrom.pop(); // folder path
+	    var toksTo = to.split('/');
+	    var reversed = [];
+	    while (toksTo.length !== 0) {
+	        var tokTo = toksTo.shift();
+	        if (tokTo === '..') {
+	            toksFrom.pop();
+	        } else if (tokTo !== '.') {
+	            toksFrom.push(tokTo);
+	        }
+	    }
+	    return toksFrom.join('/');
+	}
+
+	data.match(/<[^>]*>/g).forEach(function(x) {
+		var y = parsexmltag(x);
+		/* 9.3.2.2 OPC_Relationships */
+		if (y[0] === '<Relationship') {
+			var rel = {}; rel.Type = y.Type; rel.Target = y.Target; rel.Id = y.Id; rel.TargetMode = y.TargetMode;
+			var canonictarget = resolveRelativePathIntoAbsolute(y.Target);
+			rels[canonictarget] = rel;
+		}
+	});
+
+	return rels;
+}
+
+/* 18.7.3 CT_Comment */
+function parseComments(data) {
+	if(data.match(/<comments *\/>/)) {
+		throw new Error('Not a valid comments xml');
+	}
+	var authors = [];
+	var commentList = [];
+	data.match(/<authors>([^\u2603]*)<\/authors>/m)[1].split('</author>').forEach(function(x) {
+		if(x === "" || x.trim() === "") return;
+		authors.push(x.match(/<author[^>]*>(.*)/)[1]);
+	});
+	data.match(/<commentList>([^\u2603]*)<\/commentList>/m)[1].split('</comment>').forEach(function(x, index) {
+		if(x === "" || x.trim() === "") return;
+		var y = parsexmltag(x.match(/<comment[^>]*>/)[0]);
+		var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] };
+		x.match(/<text>([^\u2603]*)<\/text>/m)[1].split('</r>').forEach(function(r) {
+			if(r === "" || r.trim() === "") return;
+			/* 18.4.12 t ST_Xstring */
+			var ct = r.match(matchtag('t'));
+			comment.texts.push(utf8read(unescapexml(ct[1])));
+			// TODO: parse rich text format
+		});
+		commentList.push(comment);
+	});
+	return commentList;
+}
+
+function parseCommentsAddToSheets(zip, dirComments, sheets, sheetRels) {
+	for(var i = 0; i != dirComments.length; ++i) {
+		var canonicalpath=dirComments[i];
+		var comments=parseComments(getdata(getzipfile(zip, canonicalpath.replace(/^\//,''))));
+		// find the sheets targeted by these comments
+		var sheetNames = Object.keys(sheets);
+		for(var j = 0; j != sheetNames.length; ++j) {
+			var sheetName = sheetNames[j];
+			var rels = sheetRels[sheetName];
+			if (rels) {
+				var rel = rels[canonicalpath];
+				if (rel) {
+					insertCommentsIntoSheet(sheetName, sheets[sheetName], comments);
+				}
+			}
+		}
+	}	
+}
+
+function insertCommentsIntoSheet(sheetName, sheet, comments) {
+	comments.forEach(function(comment) {
+		var cell = sheet[comment.ref];
+		if (!cell) {
+			cell = {};
+			sheet[comment.ref] = cell;
+		} 
+
+		if (!cell.c) {
+			cell.c = [];
+		}
+		cell.c.push({a: comment.author, t: comment.texts});
+	});
+}
+
 function getdata(data) {
 	if(!data) return null; 
 	if(data.data) return data.data;
@ -1058,26 +1157,37 @@ function parseZip(zip) {
 	var deps = {};
 	if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
 	var sheets = {}, i=0;
+	var sheetRels = {};	
 	if(!props.Worksheets) {
-		/* Google Docs doesn't generate the appropriate metadata, so we impute: */
-		var wbsheets = wb.Sheets;
-		props.Worksheets = wbsheets.length;
-		props.SheetNames = [];
-		for(var j = 0; j != wbsheets.length; ++j) {
-			props.SheetNames[j] = wbsheets[j].name;
-		}
-		for(i = 0; i != props.Worksheets; ++i) {
-			try { /* TODO: remove these guards */ 
-			sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml')));
-			} catch(e) {}
-		}
-	}
-	else {
-		for(i = 0; i != props.Worksheets; ++i) {
-			try { 
-			sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,''))));
-			} catch(e) {}
-		}
+        /* Google Docs doesn't generate the appropriate metadata, so we impute: */
+        var wbsheets = wb.Sheets;
+        props.Worksheets = wbsheets.length;
+        props.SheetNames = [];
+        for(var j = 0; j != wbsheets.length; ++j) {
+                props.SheetNames[j] = wbsheets[j].name;
+        }
+        for(i = 0; i != props.Worksheets; ++i) {
+                try { /* TODO: remove these guards */
+	                var path = 'xl/worksheets/sheet' + (i+1) + '.xml';
+	                var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
+	                sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path)));
+	                sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
+                } catch(e) {}
+        }
+    }
+    else {
+        for(i = 0; i != props.Worksheets; ++i) {
+            try {
+            	var path = dir.sheets[i].replace(/^\//,'');
+				var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
+            	sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path)));
+            	sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
+            } catch(e) {}
+        }
+    }
+
+	if(dir.comments) {
+		parseCommentsAddToSheets(zip, dir.comments, sheets, sheetRels);
 	}
 	return {
 		Directory: dir,