From 55ba84d56107395f72eda265283ddcf8f66becd7 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Wed, 1 May 2013 00:55:54 -0400 Subject: [PATCH] version bump 0.2.7-b: formatted string support Mixed formatting stored in the shared string table. Ultimately there will be a function to convert to HTML, but in the interim this extracts plaintext and populates the right fields --- bits/70_xlsx.js | 36 ++++++++++++++++++++++++++++++------ bits/90_utils.js | 2 +- package.json | 2 +- xlsx.js | 38 +++++++++++++++++++++++++++++++------- 4 files changed, 63 insertions(+), 15 deletions(-) diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index 97bcd2b..7fd7ae9 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -119,8 +119,8 @@ function parsexmltag(tag) { function parsexmlbool(value, tag) { switch(value) { - case '0': case 0: case 'false': case 'FALSE': return false; - case '1': case 1: case 'true': case 'TRUE': return true; + case '0': case 0: case 'false': case 'FALSE': return false; + case '1': case 1: case 'true': case 'TRUE': return true; default: throw "bad boolean value " + value + " in "+(tag||"?"); } } @@ -140,7 +140,7 @@ function parseSheet(data) { var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} }; var q = ["v","f"]; - + var sidx = 0; /* 18.3.1.80 sheetData CT_SheetData ? */ if(!data.match(//)) data.match(/([^\u2603]*)<\/sheetData>/m)[1].split("").forEach(function(x) { @@ -167,7 +167,11 @@ function parseSheet(data) { else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema switch(p.t) { case 'n': p.v = parseFloat(p.v); break; - case 's': p.v = strs[parseInt(p.v, 10)].t; break; + case 's': { + sidx = parseInt(p.v, 10); + p.v = strs[sidx].t; + p.r = strs[sidx].r; + } break; case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string case 'inlineStr': p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]); @@ -237,13 +241,33 @@ var utf8read = function(orig) { return out; }; +/* 18.4.8 si CT_Rst */ +function parse_si(x) { + var z = {}; + if(!x) return z; + var y; + /* 18.4.12 t ST_Xstring plaintext string */ + if((y = x.match(/^]*>([^\u2603]*)<\/t>$/m))) { + z.t = utf8read(unescapexml(y[1])); + z.r = x; + } + /* 18.4.4 r CT_RElt Rich Text Run */ + else if((y = x.match(//))) { + z.r = x; + /* TODO: properly parse (note: no other valid child can have body text) */ + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); + } + /* TODO: handle rPh and phoneticPr */ + return z; +} + /* 18.4 Shared String Table */ function parseStrs(data) { var s = []; + /* 18.4.9 sst CT_Sst */ var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); if(sst) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(function(x) { var z = {}; - var y=x.match(/<(.*)>([\s\S]*)<\/.*/); if(y) z[y[1].split(" ")[0]]=utf8read(unescapexml(y[2])); return z;}); + s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; } diff --git a/bits/90_utils.js b/bits/90_utils.js index 9064ea0..c25f93f 100644 --- a/bits/90_utils.js +++ b/bits/90_utils.js @@ -66,7 +66,7 @@ function sheet_to_csv(sheet) { var stringify = function stringify(val) { switch(val.t){ case 'n': return String(val.v); - case 's': case 'str': + case 's': case 'str': if(typeof val.v === 'undefined') return ""; return JSON.stringify(val.v); case 'b': return val.v ? "TRUE" : "FALSE"; diff --git a/package.json b/package.json index 172a685..f1be0fe 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xlsx", - "version": "0.2.7", + "version": "0.2.7-b", "author": "Niggler", "description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser", "keywords": [ diff --git a/xlsx.js b/xlsx.js index 520ac6c..467199b 100644 --- a/xlsx.js +++ b/xlsx.js @@ -343,8 +343,8 @@ function parsexmltag(tag) { function parsexmlbool(value, tag) { switch(value) { - case '0': case 0: case 'false': case 'FALSE': return false; - case '1': case 1: case 'true': case 'TRUE': return true; + case '0': case 0: case 'false': case 'FALSE': return false; + case '1': case 1: case 'true': case 'TRUE': return true; default: throw "bad boolean value " + value + " in "+(tag||"?"); } } @@ -364,7 +364,7 @@ function parseSheet(data) { var refguess = {s: {r:1000000, c:1000000}, e: {r:0, c:0} }; var q = ["v","f"]; - + var sidx = 0; /* 18.3.1.80 sheetData CT_SheetData ? */ if(!data.match(//)) data.match(/([^\u2603]*)<\/sheetData>/m)[1].split("").forEach(function(x) { @@ -391,7 +391,11 @@ function parseSheet(data) { else p.t = (cell.t ? cell.t : "n"); // default is "n" in schema switch(p.t) { case 'n': p.v = parseFloat(p.v); break; - case 's': p.v = strs[parseInt(p.v, 10)].t; break; + case 's': { + sidx = parseInt(p.v, 10); + p.v = strs[sidx].t; + p.r = strs[sidx].r; + } break; case 'str': if(p.v) p.v = utf8read(p.v); break; // normal string case 'inlineStr': p.t = 'str'; p.v = unescapexml(d.match(matchtag('t'))[1]); @@ -461,13 +465,33 @@ var utf8read = function(orig) { return out; }; +/* 18.4.8 si CT_Rst */ +function parse_si(x) { + var z = {}; + if(!x) return z; + var y; + /* 18.4.12 t ST_Xstring plaintext string */ + if((y = x.match(/^]*>([^\u2603]*)<\/t>$/m))) { + z.t = utf8read(unescapexml(y[1])); + z.r = x; + } + /* 18.4.4 r CT_RElt Rich Text Run */ + else if((y = x.match(//))) { + z.r = x; + /* TODO: properly parse (note: no other valid child can have body text) */ + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); + } + /* TODO: handle rPh and phoneticPr */ + return z; +} + /* 18.4 Shared String Table */ function parseStrs(data) { var s = []; + /* 18.4.9 sst CT_Sst */ var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); if(sst) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(function(x) { var z = {}; - var y=x.match(/<(.*)>([\s\S]*)<\/.*/); if(y) z[y[1].split(" ")[0]]=utf8read(unescapexml(y[2])); return z;}); + s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; } @@ -893,7 +917,7 @@ function sheet_to_csv(sheet) { var stringify = function stringify(val) { switch(val.t){ case 'n': return String(val.v); - case 's': case 'str': + case 's': case 'str': if(typeof val.v === 'undefined') return ""; return JSON.stringify(val.v); case 'b': return val.v ? "TRUE" : "FALSE";