version bump 0.2.7-c: basic html from sst

Very basic rendering.  Will ultimately be a separate component (to share code
with the more general formatting case)
This commit is contained in:
SheetJS 2013-05-02 12:59:27 -04:00
parent 55ba84d561
commit 81f89d6f4e
7 changed files with 419 additions and 207 deletions

1
bits/50_xlsxheader.js Normal file

@ -0,0 +1 @@
var XLSX = (function(){

66
bits/51_xlsxutils.js Normal file

@ -0,0 +1,66 @@
function parsexmltag(tag) {
var words = tag.split(/\s+/);
var z = {'0': words[0]};
if(words.length === 1) return z;
tag.match(/(\w+)="([^"]*)"/g).map(
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
return z;
}
var encodings = {
'"': '"',
''': "'",
'>': '>',
'&lt;': '<',
'&amp;': '&'
};
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function parsexmlbool(value, tag) {
switch(value) {
case '0': case 0: case 'false': case 'FALSE': return false;
case '1': case 1: case 'true': case 'TRUE': return true;
default: throw "bad boolean value " + value + " in "+(tag||"?");
}
}
var utf8read = function(orig) {
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out += _chr(c);
else {
c2 = orig.charCodeAt(i++);
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
else {
c3 = orig.charCodeAt(i++);
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
}
}
}
return out;
};
// matches <foo>...</foo> extracts content
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
function parseVector(data) {
var h = parsexmltag(data);
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
var res = [];
matches.forEach(function(x) {
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
res.push({v:v[2], t:v[1]});
});
return res;
}
function isval(x) { return typeof x !== "undefined" && x !== null; }

139
bits/65_sst.js Normal file

@ -0,0 +1,139 @@
/* 18.4 Shared String Table */
var parse_sst = (function(){
var tregex = matchtag("t"), rpregex = matchtag("rPr");
/* Parse a list of <r> tags */
var parse_rs = (function() {
/* 18.4.7 rPr CT_RPrElt */
var parse_rpr = function(rpr, intro, outro) {
var font = {};
rpr.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
/* 18.8.12 condense CT_BooleanProperty */
/* ** not required . */
case '<condense': break;
/* 18.8.17 extend CT_BooleanProperty */
/* ** not required . */
case '<extend': break;
/* 18.8.36 shadow CT_BooleanProperty */
/* ** not required . */
case '<shadow': break;
/* 18.4.1 charset CT_IntProperty TODO */
case '<charset': break;
/* 18.4.2 outline CT_BooleanProperty TODO */
case '<outline': break;
/* 18.4.5 rFont CT_FontName */
case '<rFont': font.name = y.val; break;
/* 18.4.11 sz CT_FontSize */
case '<sz': font.sz = y.val; break;
/* 18.4.10 strike CT_BooleanProperty */
case '<strike':
if(!y.val) break;
/* falls through */
case '<strike/>': font.strike = 1; break;
case '</strike>': break;
/* 18.4.13 u CT_UnderlineProperty */
case '<u':
if(!y.val) break;
/* falls through */
case '<u/>': font.u = 1; break;
case '</u>': break;
/* 18.8.2 b */
case '<b':
if(!y.val) break;
/* falls through */
case '<b/>': font.b = 1; break;
case '</b>': break;
/* 18.8.26 i */
case '<i':
if(!y.val) break;
/* falls through */
case '<i/>': font.i = 1; break;
case '</i>': break;
/* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */
case '<color':
if(y.rgb) font.color = y.rgb.substr(2,6);
break;
/* 18.8.18 family ST_FontFamily */
case '<family': font.family = y.val; break;
/* 18.4.14 vertAlign CT_VerticalAlignFontProperty TODO */
case '<vertAlign': break;
/* 18.8.35 scheme CT_FontScheme TODO */
case '<scheme': break;
default:
if(y[0][2] !== '/') throw 'Unrecognized rich format ' + y[0];
}
});
/* TODO: These should be generated styles, not inline */
var style = [];
if(font.b) style.push("font-weight: bold;");
if(font.i) style.push("font-style: italic;");
intro.push('<span style="' + style.join("") + '">');
outro.push("</span>");
};
/* 18.4.4 r CT_RElt */
function parse_r(r) {
var terms = [[],"",[]];
/* 18.4.12 t ST_Xstring */
var t = r.match(tregex);
if(!isval(t)) return "";
terms[1] = t[1];
var rpr = r.match(rpregex);
if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]);
return terms[0].join("") + terms[1].replace(/\r\n/g,'<br/>') + terms[2].join("");
}
return function(rs) {
return rs.replace(/<r>/g,"").split(/<\/r>/).map(parse_r).join("");
};
})();
/* 18.4.8 si CT_Rst */
var parse_si = function(x) {
var z = {};
if(!x) return z;
var y;
/* 18.4.12 t ST_Xstring (Plaintext String) */
if(x[1] === 't') {
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,"")));
z.raw = x;
z.r = z.t;
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(/<r>/))) {
z.raw = x;
/* TODO: properly parse (note: no other valid child can have body text) */
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
z.r = parse_rs(x);
}
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */
/* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */
return z;
};
return function(data) {
var s = [];
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(sst) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
};
})();

@ -1,5 +1,4 @@
var XLSX = (function(){
var ct2type = {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks",
"application/vnd.openxmlformats-package.core-properties+xml": "coreprops",
@ -93,38 +92,6 @@ var CustomWBViewDef = {
var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types';
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var encodings = {
'&quot;': '"',
'&apos;': "'",
'&gt;': '>',
'&lt;': '<',
'&amp;': '&'
};
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function parsexmltag(tag) {
var words = tag.split(/\s+/);
var z = {'0': words[0]};
if(words.length === 1) return z;
tag.match(/(\w+)="([^"]*)"/g).map(
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
return z;
}
function parsexmlbool(value, tag) {
switch(value) {
case '0': case 0: case 'false': case 'FALSE': return false;
case '1': case 1: case 'true': case 'TRUE': return true;
default: throw "bad boolean value " + value + " in "+(tag||"?");
}
}
var strs = {}; // shared strings
var styles = {}; // shared styles
var _ssfopts = {}; // spreadsheet formatting options
@ -207,73 +174,6 @@ function parseSheet(data) {
return s;
}
// matches <foo>...</foo> extracts content
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
function parseVector(data) {
var h = parsexmltag(data);
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
var res = [];
matches.forEach(function(x) {
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
res.push({v:v[2], t:v[1]});
});
return res;
}
var utf8read = function(orig) {
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out += _chr(c);
else {
c2 = orig.charCodeAt(i++);
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
else {
c3 = orig.charCodeAt(i++);
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
}
}
}
return out;
};
/* 18.4.8 si CT_Rst */
function parse_si(x) {
var z = {};
if(!x) return z;
var y;
/* 18.4.12 t ST_Xstring plaintext string */
if((y = x.match(/^<t[^>]*>([^\u2603]*)<\/t>$/m))) {
z.t = utf8read(unescapexml(y[1]));
z.r = x;
}
/* 18.4.4 r CT_RElt Rich Text Run */
else if((y = x.match(/<r>/))) {
z.r = x;
/* TODO: properly parse (note: no other valid child can have body text) */
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
}
/* TODO: handle rPh and phoneticPr */
return z;
}
/* 18.4 Shared String Table */
function parseStrs(data) {
var s = [];
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst ([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(sst) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
}
function parseProps(data) {
var p = { Company:'' }, q = {};
var strings = ["Application", "DocSecurity", "Company", "AppVersion"];
@ -549,7 +449,7 @@ function parseZip(zip) {
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
strs = {};
if(dir.sst) strs=parseStrs(zip.files[dir.sst.replace(/^\//,'')].data);
if(dir.sst) strs=parse_sst(zip.files[dir.sst.replace(/^\//,'')].data);
styles = {};
if(dir.style) styles = parseStyles(zip.files[dir.style.replace(/^\//,'')].data);
@ -621,7 +521,3 @@ function readFileSync(data, options) {
this.read = readSync;
this.readFile = readFileSync;
this.parseZip = parseZip;
return this;
})();

4
bits/89_xlsxfooter.js Normal file

@ -0,0 +1,4 @@
return this;
})();

@ -1,6 +1,6 @@
{
"name": "xlsx",
"version": "0.2.7-b",
"version": "0.2.7-c",
"author": "Niggler",
"description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser",
"keywords": [

308
xlsx.js

@ -222,8 +222,213 @@ SSF.format = format;
return SSF;
})();
var XLSX = (function(){
function parsexmltag(tag) {
var words = tag.split(/\s+/);
var z = {'0': words[0]};
if(words.length === 1) return z;
tag.match(/(\w+)="([^"]*)"/g).map(
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
return z;
}
var encodings = {
'&quot;': '"',
'&apos;': "'",
'&gt;': '>',
'&lt;': '<',
'&amp;': '&'
};
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function parsexmlbool(value, tag) {
switch(value) {
case '0': case 0: case 'false': case 'FALSE': return false;
case '1': case 1: case 'true': case 'TRUE': return true;
default: throw "bad boolean value " + value + " in "+(tag||"?");
}
}
var utf8read = function(orig) {
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out += _chr(c);
else {
c2 = orig.charCodeAt(i++);
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
else {
c3 = orig.charCodeAt(i++);
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
}
}
}
return out;
};
// matches <foo>...</foo> extracts content
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
function parseVector(data) {
var h = parsexmltag(data);
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
var res = [];
matches.forEach(function(x) {
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
res.push({v:v[2], t:v[1]});
});
return res;
}
function isval(x) { return typeof x !== "undefined" && x !== null; }
/* 18.4 Shared String Table */
var parse_sst = (function(){
var tregex = matchtag("t"), rpregex = matchtag("rPr");
/* Parse a list of <r> tags */
var parse_rs = (function() {
/* 18.4.7 rPr CT_RPrElt */
var parse_rpr = function(rpr, intro, outro) {
var font = {};
rpr.match(/<[^>]*>/g).forEach(function(x) {
var y = parsexmltag(x);
switch(y[0]) {
/* 18.8.12 condense CT_BooleanProperty */
/* ** not required . */
case '<condense': break;
/* 18.8.17 extend CT_BooleanProperty */
/* ** not required . */
case '<extend': break;
/* 18.8.36 shadow CT_BooleanProperty */
/* ** not required . */
case '<shadow': break;
/* 18.4.1 charset CT_IntProperty TODO */
case '<charset': break;
/* 18.4.2 outline CT_BooleanProperty TODO */
case '<outline': break;
/* 18.4.5 rFont CT_FontName */
case '<rFont': font.name = y.val; break;
/* 18.4.11 sz CT_FontSize */
case '<sz': font.sz = y.val; break;
/* 18.4.10 strike CT_BooleanProperty */
case '<strike':
if(!y.val) break;
/* falls through */
case '<strike/>': font.strike = 1; break;
case '</strike>': break;
/* 18.4.13 u CT_UnderlineProperty */
case '<u':
if(!y.val) break;
/* falls through */
case '<u/>': font.u = 1; break;
case '</u>': break;
/* 18.8.2 b */
case '<b':
if(!y.val) break;
/* falls through */
case '<b/>': font.b = 1; break;
case '</b>': break;
/* 18.8.26 i */
case '<i':
if(!y.val) break;
/* falls through */
case '<i/>': font.i = 1; break;
case '</i>': break;
/* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */
case '<color':
if(y.rgb) font.color = y.rgb.substr(2,6);
break;
/* 18.8.18 family ST_FontFamily */
case '<family': font.family = y.val; break;
/* 18.4.14 vertAlign CT_VerticalAlignFontProperty TODO */
case '<vertAlign': break;
/* 18.8.35 scheme CT_FontScheme TODO */
case '<scheme': break;
default:
if(y[0][2] !== '/') throw 'Unrecognized rich format ' + y[0];
}
});
/* TODO: These should be generated styles, not inline */
var style = [];
if(font.b) style.push("font-weight: bold;");
if(font.i) style.push("font-style: italic;");
intro.push('<span style="' + style.join("") + '">');
outro.push("</span>");
};
/* 18.4.4 r CT_RElt */
function parse_r(r) {
var terms = [[],"",[]];
/* 18.4.12 t ST_Xstring */
var t = r.match(tregex);
if(!isval(t)) return "";
terms[1] = t[1];
var rpr = r.match(rpregex);
if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]);
return terms[0].join("") + terms[1].replace(/\r\n/g,'<br/>') + terms[2].join("");
}
return function(rs) {
return rs.replace(/<r>/g,"").split(/<\/r>/).map(parse_r).join("");
};
})();
/* 18.4.8 si CT_Rst */
var parse_si = function(x) {
var z = {};
if(!x) return z;
var y;
/* 18.4.12 t ST_Xstring (Plaintext String) */
if(x[1] === 't') {
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,"")));
z.raw = x;
z.r = z.t;
}
/* 18.4.4 r CT_RElt (Rich Text Run) */
else if((y = x.match(/<r>/))) {
z.raw = x;
/* TODO: properly parse (note: no other valid child can have body text) */
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
z.r = parse_rs(x);
}
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */
/* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */
return z;
};
return function(data) {
var s = [];
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(sst) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
};
})();
var ct2type = {
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks",
"application/vnd.openxmlformats-package.core-properties+xml": "coreprops",
@ -317,38 +522,6 @@ var CustomWBViewDef = {
var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types';
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
var encodings = {
'&quot;': '"',
'&apos;': "'",
'&gt;': '>',
'&lt;': '<',
'&amp;': '&'
};
// TODO: CP remap (need to read file version to determine OS)
function unescapexml(text){
var s = text + '';
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
}
function parsexmltag(tag) {
var words = tag.split(/\s+/);
var z = {'0': words[0]};
if(words.length === 1) return z;
tag.match(/(\w+)="([^"]*)"/g).map(
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
return z;
}
function parsexmlbool(value, tag) {
switch(value) {
case '0': case 0: case 'false': case 'FALSE': return false;
case '1': case 1: case 'true': case 'TRUE': return true;
default: throw "bad boolean value " + value + " in "+(tag||"?");
}
}
var strs = {}; // shared strings
var styles = {}; // shared styles
var _ssfopts = {}; // spreadsheet formatting options
@ -431,73 +604,6 @@ function parseSheet(data) {
return s;
}
// matches <foo>...</foo> extracts content
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
function parseVector(data) {
var h = parsexmltag(data);
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
var res = [];
matches.forEach(function(x) {
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
res.push({v:v[2], t:v[1]});
});
return res;
}
var utf8read = function(orig) {
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) out += _chr(c);
else {
c2 = orig.charCodeAt(i++);
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
else {
c3 = orig.charCodeAt(i++);
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
}
}
}
return out;
};
/* 18.4.8 si CT_Rst */
function parse_si(x) {
var z = {};
if(!x) return z;
var y;
/* 18.4.12 t ST_Xstring plaintext string */
if((y = x.match(/^<t[^>]*>([^\u2603]*)<\/t>$/m))) {
z.t = utf8read(unescapexml(y[1]));
z.r = x;
}
/* 18.4.4 r CT_RElt Rich Text Run */
else if((y = x.match(/<r>/))) {
z.r = x;
/* TODO: properly parse (note: no other valid child can have body text) */
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
}
/* TODO: handle rPh and phoneticPr */
return z;
}
/* 18.4 Shared String Table */
function parseStrs(data) {
var s = [];
/* 18.4.9 sst CT_Sst */
var sst = data.match(new RegExp("<sst ([^>]*)>([\\s\\S]*)<\/sst>","m"));
if(sst) {
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
}
return s;
}
function parseProps(data) {
var p = { Company:'' }, q = {};
var strings = ["Application", "DocSecurity", "Company", "AppVersion"];
@ -773,7 +879,7 @@ function parseZip(zip) {
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
strs = {};
if(dir.sst) strs=parseStrs(zip.files[dir.sst.replace(/^\//,'')].data);
if(dir.sst) strs=parse_sst(zip.files[dir.sst.replace(/^\//,'')].data);
styles = {};
if(dir.style) styles = parseStyles(zip.files[dir.style.replace(/^\//,'')].data);