forked from sheetjs/sheetjs
version bump 0.2.7-c: basic html from sst
Very basic rendering. Will ultimately be a separate component (to share code with the more general formatting case)
This commit is contained in:
parent
55ba84d561
commit
81f89d6f4e
1
bits/50_xlsxheader.js
Normal file
1
bits/50_xlsxheader.js
Normal file
@ -0,0 +1 @@
|
||||
var XLSX = (function(){
|
66
bits/51_xlsxutils.js
Normal file
66
bits/51_xlsxutils.js
Normal file
@ -0,0 +1,66 @@
|
||||
function parsexmltag(tag) {
|
||||
var words = tag.split(/\s+/);
|
||||
var z = {'0': words[0]};
|
||||
if(words.length === 1) return z;
|
||||
tag.match(/(\w+)="([^"]*)"/g).map(
|
||||
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
|
||||
return z;
|
||||
}
|
||||
|
||||
var encodings = {
|
||||
'"': '"',
|
||||
''': "'",
|
||||
'>': '>',
|
||||
'<': '<',
|
||||
'&': '&'
|
||||
};
|
||||
|
||||
// TODO: CP remap (need to read file version to determine OS)
|
||||
function unescapexml(text){
|
||||
var s = text + '';
|
||||
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
|
||||
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
|
||||
}
|
||||
|
||||
function parsexmlbool(value, tag) {
|
||||
switch(value) {
|
||||
case '0': case 0: case 'false': case 'FALSE': return false;
|
||||
case '1': case 1: case 'true': case 'TRUE': return true;
|
||||
default: throw "bad boolean value " + value + " in "+(tag||"?");
|
||||
}
|
||||
}
|
||||
|
||||
var utf8read = function(orig) {
|
||||
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
|
||||
while (i < orig.length) {
|
||||
c = orig.charCodeAt(i++);
|
||||
if (c < 128) out += _chr(c);
|
||||
else {
|
||||
c2 = orig.charCodeAt(i++);
|
||||
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
|
||||
else {
|
||||
c3 = orig.charCodeAt(i++);
|
||||
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
};
|
||||
|
||||
// matches <foo>...</foo> extracts content
|
||||
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
|
||||
|
||||
function parseVector(data) {
|
||||
var h = parsexmltag(data);
|
||||
|
||||
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
|
||||
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
|
||||
var res = [];
|
||||
matches.forEach(function(x) {
|
||||
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
|
||||
res.push({v:v[2], t:v[1]});
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
function isval(x) { return typeof x !== "undefined" && x !== null; }
|
139
bits/65_sst.js
Normal file
139
bits/65_sst.js
Normal file
@ -0,0 +1,139 @@
|
||||
/* 18.4 Shared String Table */
|
||||
var parse_sst = (function(){
|
||||
var tregex = matchtag("t"), rpregex = matchtag("rPr");
|
||||
/* Parse a list of <r> tags */
|
||||
var parse_rs = (function() {
|
||||
/* 18.4.7 rPr CT_RPrElt */
|
||||
var parse_rpr = function(rpr, intro, outro) {
|
||||
var font = {};
|
||||
rpr.match(/<[^>]*>/g).forEach(function(x) {
|
||||
var y = parsexmltag(x);
|
||||
switch(y[0]) {
|
||||
/* 18.8.12 condense CT_BooleanProperty */
|
||||
/* ** not required . */
|
||||
case '<condense': break;
|
||||
/* 18.8.17 extend CT_BooleanProperty */
|
||||
/* ** not required . */
|
||||
case '<extend': break;
|
||||
/* 18.8.36 shadow CT_BooleanProperty */
|
||||
/* ** not required . */
|
||||
case '<shadow': break;
|
||||
|
||||
/* 18.4.1 charset CT_IntProperty TODO */
|
||||
case '<charset': break;
|
||||
|
||||
/* 18.4.2 outline CT_BooleanProperty TODO */
|
||||
case '<outline': break;
|
||||
|
||||
/* 18.4.5 rFont CT_FontName */
|
||||
case '<rFont': font.name = y.val; break;
|
||||
|
||||
/* 18.4.11 sz CT_FontSize */
|
||||
case '<sz': font.sz = y.val; break;
|
||||
|
||||
/* 18.4.10 strike CT_BooleanProperty */
|
||||
case '<strike':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<strike/>': font.strike = 1; break;
|
||||
case '</strike>': break;
|
||||
|
||||
/* 18.4.13 u CT_UnderlineProperty */
|
||||
case '<u':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<u/>': font.u = 1; break;
|
||||
case '</u>': break;
|
||||
|
||||
/* 18.8.2 b */
|
||||
case '<b':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<b/>': font.b = 1; break;
|
||||
case '</b>': break;
|
||||
|
||||
/* 18.8.26 i */
|
||||
case '<i':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<i/>': font.i = 1; break;
|
||||
case '</i>': break;
|
||||
|
||||
/* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */
|
||||
case '<color':
|
||||
if(y.rgb) font.color = y.rgb.substr(2,6);
|
||||
break;
|
||||
|
||||
/* 18.8.18 family ST_FontFamily */
|
||||
case '<family': font.family = y.val; break;
|
||||
|
||||
/* 18.4.14 vertAlign CT_VerticalAlignFontProperty TODO */
|
||||
case '<vertAlign': break;
|
||||
|
||||
/* 18.8.35 scheme CT_FontScheme TODO */
|
||||
case '<scheme': break;
|
||||
|
||||
default:
|
||||
if(y[0][2] !== '/') throw 'Unrecognized rich format ' + y[0];
|
||||
}
|
||||
});
|
||||
/* TODO: These should be generated styles, not inline */
|
||||
var style = [];
|
||||
if(font.b) style.push("font-weight: bold;");
|
||||
if(font.i) style.push("font-style: italic;");
|
||||
intro.push('<span style="' + style.join("") + '">');
|
||||
outro.push("</span>");
|
||||
};
|
||||
|
||||
/* 18.4.4 r CT_RElt */
|
||||
function parse_r(r) {
|
||||
var terms = [[],"",[]];
|
||||
/* 18.4.12 t ST_Xstring */
|
||||
var t = r.match(tregex);
|
||||
if(!isval(t)) return "";
|
||||
terms[1] = t[1];
|
||||
|
||||
var rpr = r.match(rpregex);
|
||||
if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]);
|
||||
return terms[0].join("") + terms[1].replace(/\r\n/g,'<br/>') + terms[2].join("");
|
||||
}
|
||||
return function(rs) {
|
||||
return rs.replace(/<r>/g,"").split(/<\/r>/).map(parse_r).join("");
|
||||
};
|
||||
})();
|
||||
|
||||
/* 18.4.8 si CT_Rst */
|
||||
var parse_si = function(x) {
|
||||
var z = {};
|
||||
if(!x) return z;
|
||||
var y;
|
||||
/* 18.4.12 t ST_Xstring (Plaintext String) */
|
||||
if(x[1] === 't') {
|
||||
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,"")));
|
||||
z.raw = x;
|
||||
z.r = z.t;
|
||||
}
|
||||
/* 18.4.4 r CT_RElt (Rich Text Run) */
|
||||
else if((y = x.match(/<r>/))) {
|
||||
z.raw = x;
|
||||
/* TODO: properly parse (note: no other valid child can have body text) */
|
||||
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
|
||||
z.r = parse_rs(x);
|
||||
}
|
||||
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */
|
||||
/* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */
|
||||
return z;
|
||||
};
|
||||
|
||||
|
||||
return function(data) {
|
||||
var s = [];
|
||||
/* 18.4.9 sst CT_Sst */
|
||||
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
|
||||
if(sst) {
|
||||
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
|
||||
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
|
||||
}
|
||||
return s;
|
||||
};
|
||||
})();
|
106
bits/70_xlsx.js
106
bits/70_xlsx.js
@ -1,5 +1,4 @@
|
||||
|
||||
var XLSX = (function(){
|
||||
var ct2type = {
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks",
|
||||
"application/vnd.openxmlformats-package.core-properties+xml": "coreprops",
|
||||
@ -93,38 +92,6 @@ var CustomWBViewDef = {
|
||||
var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types';
|
||||
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
var encodings = {
|
||||
'"': '"',
|
||||
''': "'",
|
||||
'>': '>',
|
||||
'<': '<',
|
||||
'&': '&'
|
||||
};
|
||||
|
||||
// TODO: CP remap (need to read file version to determine OS)
|
||||
function unescapexml(text){
|
||||
var s = text + '';
|
||||
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
|
||||
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
|
||||
}
|
||||
|
||||
function parsexmltag(tag) {
|
||||
var words = tag.split(/\s+/);
|
||||
var z = {'0': words[0]};
|
||||
if(words.length === 1) return z;
|
||||
tag.match(/(\w+)="([^"]*)"/g).map(
|
||||
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
|
||||
return z;
|
||||
}
|
||||
|
||||
function parsexmlbool(value, tag) {
|
||||
switch(value) {
|
||||
case '0': case 0: case 'false': case 'FALSE': return false;
|
||||
case '1': case 1: case 'true': case 'TRUE': return true;
|
||||
default: throw "bad boolean value " + value + " in "+(tag||"?");
|
||||
}
|
||||
}
|
||||
|
||||
var strs = {}; // shared strings
|
||||
var styles = {}; // shared styles
|
||||
var _ssfopts = {}; // spreadsheet formatting options
|
||||
@ -207,73 +174,6 @@ function parseSheet(data) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// matches <foo>...</foo> extracts content
|
||||
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
|
||||
|
||||
function parseVector(data) {
|
||||
var h = parsexmltag(data);
|
||||
|
||||
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
|
||||
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
|
||||
var res = [];
|
||||
matches.forEach(function(x) {
|
||||
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
|
||||
res.push({v:v[2], t:v[1]});
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
var utf8read = function(orig) {
|
||||
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
|
||||
while (i < orig.length) {
|
||||
c = orig.charCodeAt(i++);
|
||||
if (c < 128) out += _chr(c);
|
||||
else {
|
||||
c2 = orig.charCodeAt(i++);
|
||||
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
|
||||
else {
|
||||
c3 = orig.charCodeAt(i++);
|
||||
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
};
|
||||
|
||||
/* 18.4.8 si CT_Rst */
|
||||
function parse_si(x) {
|
||||
var z = {};
|
||||
if(!x) return z;
|
||||
var y;
|
||||
/* 18.4.12 t ST_Xstring plaintext string */
|
||||
if((y = x.match(/^<t[^>]*>([^\u2603]*)<\/t>$/m))) {
|
||||
z.t = utf8read(unescapexml(y[1]));
|
||||
z.r = x;
|
||||
}
|
||||
/* 18.4.4 r CT_RElt Rich Text Run */
|
||||
else if((y = x.match(/<r>/))) {
|
||||
z.r = x;
|
||||
/* TODO: properly parse (note: no other valid child can have body text) */
|
||||
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
|
||||
}
|
||||
/* TODO: handle rPh and phoneticPr */
|
||||
return z;
|
||||
}
|
||||
|
||||
/* 18.4 Shared String Table */
|
||||
function parseStrs(data) {
|
||||
var s = [];
|
||||
/* 18.4.9 sst CT_Sst */
|
||||
var sst = data.match(new RegExp("<sst ([^>]*)>([\\s\\S]*)<\/sst>","m"));
|
||||
if(sst) {
|
||||
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
|
||||
|
||||
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
function parseProps(data) {
|
||||
var p = { Company:'' }, q = {};
|
||||
var strings = ["Application", "DocSecurity", "Company", "AppVersion"];
|
||||
@ -549,7 +449,7 @@ function parseZip(zip) {
|
||||
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
|
||||
|
||||
strs = {};
|
||||
if(dir.sst) strs=parseStrs(zip.files[dir.sst.replace(/^\//,'')].data);
|
||||
if(dir.sst) strs=parse_sst(zip.files[dir.sst.replace(/^\//,'')].data);
|
||||
|
||||
styles = {};
|
||||
if(dir.style) styles = parseStyles(zip.files[dir.style.replace(/^\//,'')].data);
|
||||
@ -621,7 +521,3 @@ function readFileSync(data, options) {
|
||||
this.read = readSync;
|
||||
this.readFile = readFileSync;
|
||||
this.parseZip = parseZip;
|
||||
return this;
|
||||
|
||||
})();
|
||||
|
||||
|
4
bits/89_xlsxfooter.js
Normal file
4
bits/89_xlsxfooter.js
Normal file
@ -0,0 +1,4 @@
|
||||
return this;
|
||||
|
||||
})();
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "xlsx",
|
||||
"version": "0.2.7-b",
|
||||
"version": "0.2.7-c",
|
||||
"author": "Niggler",
|
||||
"description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser",
|
||||
"keywords": [
|
||||
|
308
xlsx.js
308
xlsx.js
@ -222,8 +222,213 @@ SSF.format = format;
|
||||
|
||||
return SSF;
|
||||
})();
|
||||
|
||||
var XLSX = (function(){
|
||||
function parsexmltag(tag) {
|
||||
var words = tag.split(/\s+/);
|
||||
var z = {'0': words[0]};
|
||||
if(words.length === 1) return z;
|
||||
tag.match(/(\w+)="([^"]*)"/g).map(
|
||||
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
|
||||
return z;
|
||||
}
|
||||
|
||||
var encodings = {
|
||||
'"': '"',
|
||||
''': "'",
|
||||
'>': '>',
|
||||
'<': '<',
|
||||
'&': '&'
|
||||
};
|
||||
|
||||
// TODO: CP remap (need to read file version to determine OS)
|
||||
function unescapexml(text){
|
||||
var s = text + '';
|
||||
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
|
||||
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
|
||||
}
|
||||
|
||||
function parsexmlbool(value, tag) {
|
||||
switch(value) {
|
||||
case '0': case 0: case 'false': case 'FALSE': return false;
|
||||
case '1': case 1: case 'true': case 'TRUE': return true;
|
||||
default: throw "bad boolean value " + value + " in "+(tag||"?");
|
||||
}
|
||||
}
|
||||
|
||||
var utf8read = function(orig) {
|
||||
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
|
||||
while (i < orig.length) {
|
||||
c = orig.charCodeAt(i++);
|
||||
if (c < 128) out += _chr(c);
|
||||
else {
|
||||
c2 = orig.charCodeAt(i++);
|
||||
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
|
||||
else {
|
||||
c3 = orig.charCodeAt(i++);
|
||||
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
};
|
||||
|
||||
// matches <foo>...</foo> extracts content
|
||||
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
|
||||
|
||||
function parseVector(data) {
|
||||
var h = parsexmltag(data);
|
||||
|
||||
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
|
||||
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
|
||||
var res = [];
|
||||
matches.forEach(function(x) {
|
||||
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
|
||||
res.push({v:v[2], t:v[1]});
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
function isval(x) { return typeof x !== "undefined" && x !== null; }
|
||||
/* 18.4 Shared String Table */
|
||||
var parse_sst = (function(){
|
||||
var tregex = matchtag("t"), rpregex = matchtag("rPr");
|
||||
/* Parse a list of <r> tags */
|
||||
var parse_rs = (function() {
|
||||
/* 18.4.7 rPr CT_RPrElt */
|
||||
var parse_rpr = function(rpr, intro, outro) {
|
||||
var font = {};
|
||||
rpr.match(/<[^>]*>/g).forEach(function(x) {
|
||||
var y = parsexmltag(x);
|
||||
switch(y[0]) {
|
||||
/* 18.8.12 condense CT_BooleanProperty */
|
||||
/* ** not required . */
|
||||
case '<condense': break;
|
||||
/* 18.8.17 extend CT_BooleanProperty */
|
||||
/* ** not required . */
|
||||
case '<extend': break;
|
||||
/* 18.8.36 shadow CT_BooleanProperty */
|
||||
/* ** not required . */
|
||||
case '<shadow': break;
|
||||
|
||||
/* 18.4.1 charset CT_IntProperty TODO */
|
||||
case '<charset': break;
|
||||
|
||||
/* 18.4.2 outline CT_BooleanProperty TODO */
|
||||
case '<outline': break;
|
||||
|
||||
/* 18.4.5 rFont CT_FontName */
|
||||
case '<rFont': font.name = y.val; break;
|
||||
|
||||
/* 18.4.11 sz CT_FontSize */
|
||||
case '<sz': font.sz = y.val; break;
|
||||
|
||||
/* 18.4.10 strike CT_BooleanProperty */
|
||||
case '<strike':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<strike/>': font.strike = 1; break;
|
||||
case '</strike>': break;
|
||||
|
||||
/* 18.4.13 u CT_UnderlineProperty */
|
||||
case '<u':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<u/>': font.u = 1; break;
|
||||
case '</u>': break;
|
||||
|
||||
/* 18.8.2 b */
|
||||
case '<b':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<b/>': font.b = 1; break;
|
||||
case '</b>': break;
|
||||
|
||||
/* 18.8.26 i */
|
||||
case '<i':
|
||||
if(!y.val) break;
|
||||
/* falls through */
|
||||
case '<i/>': font.i = 1; break;
|
||||
case '</i>': break;
|
||||
|
||||
/* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */
|
||||
case '<color':
|
||||
if(y.rgb) font.color = y.rgb.substr(2,6);
|
||||
break;
|
||||
|
||||
/* 18.8.18 family ST_FontFamily */
|
||||
case '<family': font.family = y.val; break;
|
||||
|
||||
/* 18.4.14 vertAlign CT_VerticalAlignFontProperty TODO */
|
||||
case '<vertAlign': break;
|
||||
|
||||
/* 18.8.35 scheme CT_FontScheme TODO */
|
||||
case '<scheme': break;
|
||||
|
||||
default:
|
||||
if(y[0][2] !== '/') throw 'Unrecognized rich format ' + y[0];
|
||||
}
|
||||
});
|
||||
/* TODO: These should be generated styles, not inline */
|
||||
var style = [];
|
||||
if(font.b) style.push("font-weight: bold;");
|
||||
if(font.i) style.push("font-style: italic;");
|
||||
intro.push('<span style="' + style.join("") + '">');
|
||||
outro.push("</span>");
|
||||
};
|
||||
|
||||
/* 18.4.4 r CT_RElt */
|
||||
function parse_r(r) {
|
||||
var terms = [[],"",[]];
|
||||
/* 18.4.12 t ST_Xstring */
|
||||
var t = r.match(tregex);
|
||||
if(!isval(t)) return "";
|
||||
terms[1] = t[1];
|
||||
|
||||
var rpr = r.match(rpregex);
|
||||
if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]);
|
||||
return terms[0].join("") + terms[1].replace(/\r\n/g,'<br/>') + terms[2].join("");
|
||||
}
|
||||
return function(rs) {
|
||||
return rs.replace(/<r>/g,"").split(/<\/r>/).map(parse_r).join("");
|
||||
};
|
||||
})();
|
||||
|
||||
/* 18.4.8 si CT_Rst */
|
||||
var parse_si = function(x) {
|
||||
var z = {};
|
||||
if(!x) return z;
|
||||
var y;
|
||||
/* 18.4.12 t ST_Xstring (Plaintext String) */
|
||||
if(x[1] === 't') {
|
||||
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,"")));
|
||||
z.raw = x;
|
||||
z.r = z.t;
|
||||
}
|
||||
/* 18.4.4 r CT_RElt (Rich Text Run) */
|
||||
else if((y = x.match(/<r>/))) {
|
||||
z.raw = x;
|
||||
/* TODO: properly parse (note: no other valid child can have body text) */
|
||||
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
|
||||
z.r = parse_rs(x);
|
||||
}
|
||||
/* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */
|
||||
/* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */
|
||||
return z;
|
||||
};
|
||||
|
||||
|
||||
return function(data) {
|
||||
var s = [];
|
||||
/* 18.4.9 sst CT_Sst */
|
||||
var sst = data.match(new RegExp("<sst([^>]*)>([\\s\\S]*)<\/sst>","m"));
|
||||
if(sst) {
|
||||
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
|
||||
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
|
||||
}
|
||||
return s;
|
||||
};
|
||||
})();
|
||||
|
||||
var ct2type = {
|
||||
"application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks",
|
||||
"application/vnd.openxmlformats-package.core-properties+xml": "coreprops",
|
||||
@ -317,38 +522,6 @@ var CustomWBViewDef = {
|
||||
var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types';
|
||||
var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main';
|
||||
|
||||
var encodings = {
|
||||
'"': '"',
|
||||
''': "'",
|
||||
'>': '>',
|
||||
'<': '<',
|
||||
'&': '&'
|
||||
};
|
||||
|
||||
// TODO: CP remap (need to read file version to determine OS)
|
||||
function unescapexml(text){
|
||||
var s = text + '';
|
||||
for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]);
|
||||
return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));});
|
||||
}
|
||||
|
||||
function parsexmltag(tag) {
|
||||
var words = tag.split(/\s+/);
|
||||
var z = {'0': words[0]};
|
||||
if(words.length === 1) return z;
|
||||
tag.match(/(\w+)="([^"]*)"/g).map(
|
||||
function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; });
|
||||
return z;
|
||||
}
|
||||
|
||||
function parsexmlbool(value, tag) {
|
||||
switch(value) {
|
||||
case '0': case 0: case 'false': case 'FALSE': return false;
|
||||
case '1': case 1: case 'true': case 'TRUE': return true;
|
||||
default: throw "bad boolean value " + value + " in "+(tag||"?");
|
||||
}
|
||||
}
|
||||
|
||||
var strs = {}; // shared strings
|
||||
var styles = {}; // shared styles
|
||||
var _ssfopts = {}; // spreadsheet formatting options
|
||||
@ -431,73 +604,6 @@ function parseSheet(data) {
|
||||
return s;
|
||||
}
|
||||
|
||||
// matches <foo>...</foo> extracts content
|
||||
function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)</'+f+'>',(g||"")+"m");}
|
||||
|
||||
function parseVector(data) {
|
||||
var h = parsexmltag(data);
|
||||
|
||||
var matches = data.match(new RegExp("<vt:" + h.baseType + ">(.*?)</vt:" + h.baseType + ">", 'g'));
|
||||
if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size;
|
||||
var res = [];
|
||||
matches.forEach(function(x) {
|
||||
var v = x.replace(/<[/]?vt:variant>/g,"").match(/<vt:([^>]*)>(.*)</);
|
||||
res.push({v:v[2], t:v[1]});
|
||||
});
|
||||
return res;
|
||||
}
|
||||
|
||||
|
||||
var utf8read = function(orig) {
|
||||
var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0;
|
||||
while (i < orig.length) {
|
||||
c = orig.charCodeAt(i++);
|
||||
if (c < 128) out += _chr(c);
|
||||
else {
|
||||
c2 = orig.charCodeAt(i++);
|
||||
if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63);
|
||||
else {
|
||||
c3 = orig.charCodeAt(i++);
|
||||
out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63);
|
||||
}
|
||||
}
|
||||
}
|
||||
return out;
|
||||
};
|
||||
|
||||
/* 18.4.8 si CT_Rst */
|
||||
function parse_si(x) {
|
||||
var z = {};
|
||||
if(!x) return z;
|
||||
var y;
|
||||
/* 18.4.12 t ST_Xstring plaintext string */
|
||||
if((y = x.match(/^<t[^>]*>([^\u2603]*)<\/t>$/m))) {
|
||||
z.t = utf8read(unescapexml(y[1]));
|
||||
z.r = x;
|
||||
}
|
||||
/* 18.4.4 r CT_RElt Rich Text Run */
|
||||
else if((y = x.match(/<r>/))) {
|
||||
z.r = x;
|
||||
/* TODO: properly parse (note: no other valid child can have body text) */
|
||||
z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,"")));
|
||||
}
|
||||
/* TODO: handle rPh and phoneticPr */
|
||||
return z;
|
||||
}
|
||||
|
||||
/* 18.4 Shared String Table */
|
||||
function parseStrs(data) {
|
||||
var s = [];
|
||||
/* 18.4.9 sst CT_Sst */
|
||||
var sst = data.match(new RegExp("<sst ([^>]*)>([\\s\\S]*)<\/sst>","m"));
|
||||
if(sst) {
|
||||
s = sst[2].replace(/<si>/g,"").split(/<\/si>/).map(parse_si);
|
||||
|
||||
sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount;
|
||||
}
|
||||
return s;
|
||||
}
|
||||
|
||||
function parseProps(data) {
|
||||
var p = { Company:'' }, q = {};
|
||||
var strings = ["Application", "DocSecurity", "Company", "AppVersion"];
|
||||
@ -773,7 +879,7 @@ function parseZip(zip) {
|
||||
var dir = parseCT((zip.files['[Content_Types].xml']||{}).data);
|
||||
|
||||
strs = {};
|
||||
if(dir.sst) strs=parseStrs(zip.files[dir.sst.replace(/^\//,'')].data);
|
||||
if(dir.sst) strs=parse_sst(zip.files[dir.sst.replace(/^\//,'')].data);
|
||||
|
||||
styles = {};
|
||||
if(dir.style) styles = parseStyles(zip.files[dir.style.replace(/^\//,'')].data);
|
||||
|
Loading…
Reference in New Issue
Block a user