From 59d9d9086bd98f026cd3c5a0dbc1079217620ce2 Mon Sep 17 00:00:00 2001
From: kinwah <kinwah.lai@odd-e.com>
Date: Wed, 15 Jan 2014 15:26:00 +0800
Subject: [PATCH] Support for parsing Comments

Comments parts listed in the [Content Types] are parsed.
Sheets's relationships are parsed.
Comments parts are correlated to their corresponding sheets parts.
Comments's contents are added to the ref'ed cells.
Rich text styling properties are currently ignored.

For example:
{
  "!ref": "A1:B3",
  "A1": {
    "v": 1,
    "t": "n"
  },
  "B1": {
    "v": "one",
    "t": "s",
    "r": "one",
    "c": [
      { "a": "Yegor Kozlov",
       "t": [ "Yegor Kozlov:",
              "\r\nfirst cell" ]
      }
    ]
  }
}
---
 test.js |  11 +++++
 xlsx.js | 150 ++++++++++++++++++++++++++++++++++++++++++++++++--------
 2 files changed, 141 insertions(+), 20 deletions(-)
diff --git a/test.js b/test.js
index 88e1f72..d4b0438 100644
--- a/test.js
+++ b/test.js
@@ -39,3 +39,14 @@ describe('should parse test files', function() {
 		});
 	});
 });
+
+describe('should have comment as part of cell\'s properties', function(){
+	it('Parse comments.xml and insert into cell',function(){
+		var wb = XLSX.readFile('./test_files/SimpleWithComments.xlsx');
+		var sheetName = 'Sheet1';
+		var ws = wb.Sheets[sheetName];
+		assert.equal(ws.B1.c.length, 1,"must have 1 comment");
+		assert.equal(ws.B1.c[0].t.length, 2,"must have 2 texts");
+		assert.equal(ws.B1.c[0].a, 'Yegor Kozlov',"must have the same author");
+	});
+});
diff --git a/xlsx.js b/xlsx.js
index 3b2a05d..e747da0 100644
--- a/xlsx.js
+++ b/xlsx.js
@@ -588,6 +588,7 @@ var ct2type = {
 	"application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml": "strs",
 	"application/vnd.openxmlformats-officedocument.spreadsheetml.styles+xml":"styles",
 	"application/vnd.openxmlformats-officedocument.theme+xml":"themes",
+	"application/vnd.openxmlformats-officedocument.spreadsheetml.comments+xml": "comments",
 	"foo": "bar"
 };
 
@@ -821,7 +822,7 @@ var ctext = {};
 function parseCT(data) {
 	if(!data || !data.match) return data;
 	var ct = { workbooks: [], sheets: [], calcchains: [], themes: [], styles: [],
-		coreprops: [], extprops: [], strs:[], xmlns: "" };
+		coreprops: [], extprops: [], strs:[], comments: [], xmlns: "" };
 	(data.match(/<[^>]*>/g)||[]).forEach(function(x) {
 		var y = parsexmltag(x);
 		switch(y[0]) {
@@ -1026,6 +1027,104 @@ function parseStyles(data) {
 	return styles;
 }
 
+/* 9.3.2 OPC Relationships Markup */
+function parseRels(data, currentFilePath) {
+	if (!data) return data;
+	if (currentFilePath.charAt(0) !== '/') {
+		currentFilePath = '/'+currentFilePath;
+	}
+	var rels = {};
+
+	var resolveRelativePathIntoAbsolute = function (to) {
+	    var toksFrom = currentFilePath.split('/');
+	 	toksFrom.pop(); // folder path
+	    var toksTo = to.split('/');
+	    var reversed = [];
+	    while (toksTo.length !== 0) {
+	        var tokTo = toksTo.shift();
+	        if (tokTo === '..') {
+	            toksFrom.pop();
+	        } else if (tokTo !== '.') {
+	            toksFrom.push(tokTo);
+	        }
+	    }
+	    return toksFrom.join('/');
+	}
+
+	data.match(/<[^>]*>/g).forEach(function(x) {
+		var y = parsexmltag(x);
+		/* 9.3.2.2 OPC_Relationships */
+		if (y[0] === '<Relationship') {
+			var rel = {}; rel.Type = y.Type; rel.Target = y.Target; rel.Id = y.Id; rel.TargetMode = y.TargetMode;
+			var canonictarget = resolveRelativePathIntoAbsolute(y.Target);
+			rels[canonictarget] = rel;
+		}
+	});
+
+	return rels;
+}
+
+/* 18.7.3 CT_Comment */
+function parseComments(data) {
+	if(data.match(/<comments *\/>/)) {
+		throw new Error('Not a valid comments xml');
+	}
+	var authors = [];
+	var commentList = [];
+	data.match(/<authors>([^\u2603]*)<\/authors>/m)[1].split('</author>').forEach(function(x) {
+		if(x === "" || x.trim() === "") return;
+		authors.push(x.match(/<author[^>]*>(.*)/)[1]);
+	});
+	data.match(/<commentList>([^\u2603]*)<\/commentList>/m)[1].split('</comment>').forEach(function(x, index) {
+		if(x === "" || x.trim() === "") return;
+		var y = parsexmltag(x.match(/<comment[^>]*>/)[0]);
+		var comment = { author: y.authorId && authors[y.authorId] ? authors[y.authorId] : undefined, ref: y.ref, guid: y.guid, texts:[] };
+		x.match(/<text>([^\u2603]*)<\/text>/m)[1].split('</r>').forEach(function(r) {
+			if(r === "" || r.trim() === "") return;
+			/* 18.4.12 t ST_Xstring */
+			var ct = r.match(matchtag('t'));
+			comment.texts.push(utf8read(unescapexml(ct[1])));
+			// TODO: parse rich text format
+		});
+		commentList.push(comment);
+	});
+	return commentList;
+}
+
+function parseCommentsAddToSheets(zip, dirComments, sheets, sheetRels) {
+	for(var i = 0; i != dirComments.length; ++i) {
+		var canonicalpath=dirComments[i];
+		var comments=parseComments(getdata(getzipfile(zip, canonicalpath.replace(/^\//,''))));
+		// find the sheets targeted by these comments
+		var sheetNames = Object.keys(sheets);
+		for(var j = 0; j != sheetNames.length; ++j) {
+			var sheetName = sheetNames[j];
+			var rels = sheetRels[sheetName];
+			if (rels) {
+				var rel = rels[canonicalpath];
+				if (rel) {
+					insertCommentsIntoSheet(sheetName, sheets[sheetName], comments);
+				}
+			}
+		}
+	}	
+}
+
+function insertCommentsIntoSheet(sheetName, sheet, comments) {
+	comments.forEach(function(comment) {
+		var cell = sheet[comment.ref];
+		if (!cell) {
+			cell = {};
+			sheet[comment.ref] = cell;
+		} 
+
+		if (!cell.c) {
+			cell.c = [];
+		}
+		cell.c.push({a: comment.author, t: comment.texts});
+	});
+}
+
 function getdata(data) {
 	if(!data) return null; 
 	if(data.data) return data.data;
@@ -1058,26 +1157,37 @@ function parseZip(zip) {
 	var deps = {};
 	if(dir.calcchain) deps=parseDeps(getdata(getzipfile(zip, dir.calcchain.replace(/^\//,''))));
 	var sheets = {}, i=0;
+	var sheetRels = {};	
 	if(!props.Worksheets) {
-		/* Google Docs doesn't generate the appropriate metadata, so we impute: */
-		var wbsheets = wb.Sheets;
-		props.Worksheets = wbsheets.length;
-		props.SheetNames = [];
-		for(var j = 0; j != wbsheets.length; ++j) {
-			props.SheetNames[j] = wbsheets[j].name;
-		}
-		for(i = 0; i != props.Worksheets; ++i) {
-			try { /* TODO: remove these guards */ 
-			sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, 'xl/worksheets/sheet' + (i+1) + '.xml')));
-			} catch(e) {}
-		}
-	}
-	else {
-		for(i = 0; i != props.Worksheets; ++i) {
-			try { 
-			sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, dir.sheets[i].replace(/^\//,''))));
-			} catch(e) {}
-		}
+        /* Google Docs doesn't generate the appropriate metadata, so we impute: */
+        var wbsheets = wb.Sheets;
+        props.Worksheets = wbsheets.length;
+        props.SheetNames = [];
+        for(var j = 0; j != wbsheets.length; ++j) {
+                props.SheetNames[j] = wbsheets[j].name;
+        }
+        for(i = 0; i != props.Worksheets; ++i) {
+                try { /* TODO: remove these guards */
+	                var path = 'xl/worksheets/sheet' + (i+1) + '.xml';
+	                var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
+	                sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path)));
+	                sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
+                } catch(e) {}
+        }
+    }
+    else {
+        for(i = 0; i != props.Worksheets; ++i) {
+            try {
+            	var path = dir.sheets[i].replace(/^\//,'');
+				var relsPath = path.replace(/^(.*)(\/)([^\/]*)$/, "$1/_rels/$3.rels");
+            	sheets[props.SheetNames[i]]=parseSheet(getdata(getzipfile(zip, path)));
+            	sheetRels[props.SheetNames[i]]=parseRels(getdata(getzipfile(zip, relsPath)), path);
+            } catch(e) {}
+        }
+    }
+
+	if(dir.comments) {
+		parseCommentsAddToSheets(zip, dir.comments, sheets, sheetRels);
 	}
 	return {
 		Directory: dir,