diff --git a/bits/50_xlsxheader.js b/bits/50_xlsxheader.js new file mode 100644 index 0000000..fafc9aa --- /dev/null +++ b/bits/50_xlsxheader.js @@ -0,0 +1 @@ +var XLSX = (function(){ diff --git a/bits/51_xlsxutils.js b/bits/51_xlsxutils.js new file mode 100644 index 0000000..6660a4f --- /dev/null +++ b/bits/51_xlsxutils.js @@ -0,0 +1,66 @@ +function parsexmltag(tag) { + var words = tag.split(/\s+/); + var z = {'0': words[0]}; + if(words.length === 1) return z; + tag.match(/(\w+)="([^"]*)"/g).map( + function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; }); + return z; +} + +var encodings = { + '"': '"', + ''': "'", + '>': '>', + '<': '<', + '&': '&' +}; + +// TODO: CP remap (need to read file version to determine OS) +function unescapexml(text){ + var s = text + ''; + for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]); + return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));}); +} + +function parsexmlbool(value, tag) { + switch(value) { + case '0': case 0: case 'false': case 'FALSE': return false; + case '1': case 1: case 'true': case 'TRUE': return true; + default: throw "bad boolean value " + value + " in "+(tag||"?"); + } +} + +var utf8read = function(orig) { + var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0; + while (i < orig.length) { + c = orig.charCodeAt(i++); + if (c < 128) out += _chr(c); + else { + c2 = orig.charCodeAt(i++); + if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63); + else { + c3 = orig.charCodeAt(i++); + out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63); + } + } + } + return out; +}; + +// matches ... extracts content +function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)',(g||"")+"m");} + +function parseVector(data) { + var h = parsexmltag(data); + + var matches = data.match(new RegExp("(.*?)", 'g')); + if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size; + var res = []; + matches.forEach(function(x) { + var v = x.replace(/<[/]?vt:variant>/g,"").match(/]*)>(.*) tags */ + var parse_rs = (function() { + /* 18.4.7 rPr CT_RPrElt */ + var parse_rpr = function(rpr, intro, outro) { + var font = {}; + rpr.match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + /* 18.8.12 condense CT_BooleanProperty */ + /* ** not required . */ + case '': font.strike = 1; break; + case '': break; + + /* 18.4.13 u CT_UnderlineProperty */ + case '': font.u = 1; break; + case '': break; + + /* 18.8.2 b */ + case '': font.b = 1; break; + case '': break; + + /* 18.8.26 i */ + case '': font.i = 1; break; + case '': break; + + /* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */ + case ''); + outro.push(""); + }; + + /* 18.4.4 r CT_RElt */ + function parse_r(r) { + var terms = [[],"",[]]; + /* 18.4.12 t ST_Xstring */ + var t = r.match(tregex); + if(!isval(t)) return ""; + terms[1] = t[1]; + + var rpr = r.match(rpregex); + if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]); + return terms[0].join("") + terms[1].replace(/\r\n/g,'
') + terms[2].join(""); + } + return function(rs) { + return rs.replace(//g,"").split(/<\/r>/).map(parse_r).join(""); + }; + })(); + + /* 18.4.8 si CT_Rst */ + var parse_si = function(x) { + var z = {}; + if(!x) return z; + var y; + /* 18.4.12 t ST_Xstring (Plaintext String) */ + if(x[1] === 't') { + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,""))); + z.raw = x; + z.r = z.t; + } + /* 18.4.4 r CT_RElt (Rich Text Run) */ + else if((y = x.match(//))) { + z.raw = x; + /* TODO: properly parse (note: no other valid child can have body text) */ + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); + z.r = parse_rs(x); + } + /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ + /* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */ + return z; + }; + + + return function(data) { + var s = []; + /* 18.4.9 sst CT_Sst */ + var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); + if(sst) { + s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); + sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; + } + return s; + }; +})(); diff --git a/bits/70_xlsx.js b/bits/70_xlsx.js index 7fd7ae9..7dd0cee 100644 --- a/bits/70_xlsx.js +++ b/bits/70_xlsx.js @@ -1,5 +1,4 @@ -var XLSX = (function(){ var ct2type = { "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks", "application/vnd.openxmlformats-package.core-properties+xml": "coreprops", @@ -93,38 +92,6 @@ var CustomWBViewDef = { var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types'; var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; -var encodings = { - '"': '"', - ''': "'", - '>': '>', - '<': '<', - '&': '&' -}; - -// TODO: CP remap (need to read file version to determine OS) -function unescapexml(text){ - var s = text + ''; - for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]); - return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));}); -} - -function parsexmltag(tag) { - var words = tag.split(/\s+/); - var z = {'0': words[0]}; - if(words.length === 1) return z; - tag.match(/(\w+)="([^"]*)"/g).map( - function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; }); - return z; -} - -function parsexmlbool(value, tag) { - switch(value) { - case '0': case 0: case 'false': case 'FALSE': return false; - case '1': case 1: case 'true': case 'TRUE': return true; - default: throw "bad boolean value " + value + " in "+(tag||"?"); - } -} - var strs = {}; // shared strings var styles = {}; // shared styles var _ssfopts = {}; // spreadsheet formatting options @@ -207,73 +174,6 @@ function parseSheet(data) { return s; } -// matches ... extracts content -function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)',(g||"")+"m");} - -function parseVector(data) { - var h = parsexmltag(data); - - var matches = data.match(new RegExp("(.*?)", 'g')); - if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size; - var res = []; - matches.forEach(function(x) { - var v = x.replace(/<[/]?vt:variant>/g,"").match(/]*)>(.*)191 && c<224) out += _chr((c & 31) << 6 | c2 & 63); - else { - c3 = orig.charCodeAt(i++); - out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63); - } - } - } - return out; -}; - -/* 18.4.8 si CT_Rst */ -function parse_si(x) { - var z = {}; - if(!x) return z; - var y; - /* 18.4.12 t ST_Xstring plaintext string */ - if((y = x.match(/^]*>([^\u2603]*)<\/t>$/m))) { - z.t = utf8read(unescapexml(y[1])); - z.r = x; - } - /* 18.4.4 r CT_RElt Rich Text Run */ - else if((y = x.match(//))) { - z.r = x; - /* TODO: properly parse (note: no other valid child can have body text) */ - z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); - } - /* TODO: handle rPh and phoneticPr */ - return z; -} - -/* 18.4 Shared String Table */ -function parseStrs(data) { - var s = []; - /* 18.4.9 sst CT_Sst */ - var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); - if(sst) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); - - sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; - } - return s; -} - function parseProps(data) { var p = { Company:'' }, q = {}; var strings = ["Application", "DocSecurity", "Company", "AppVersion"]; @@ -549,7 +449,7 @@ function parseZip(zip) { var dir = parseCT((zip.files['[Content_Types].xml']||{}).data); strs = {}; - if(dir.sst) strs=parseStrs(zip.files[dir.sst.replace(/^\//,'')].data); + if(dir.sst) strs=parse_sst(zip.files[dir.sst.replace(/^\//,'')].data); styles = {}; if(dir.style) styles = parseStyles(zip.files[dir.style.replace(/^\//,'')].data); @@ -621,7 +521,3 @@ function readFileSync(data, options) { this.read = readSync; this.readFile = readFileSync; this.parseZip = parseZip; -return this; - -})(); - diff --git a/bits/89_xlsxfooter.js b/bits/89_xlsxfooter.js new file mode 100644 index 0000000..90b30b2 --- /dev/null +++ b/bits/89_xlsxfooter.js @@ -0,0 +1,4 @@ +return this; + +})(); + diff --git a/package.json b/package.json index f1be0fe..256c050 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "xlsx", - "version": "0.2.7-b", + "version": "0.2.7-c", "author": "Niggler", "description": "(one day) a full-featured XLSX parser and writer. For now, primitive parser", "keywords": [ diff --git a/xlsx.js b/xlsx.js index 467199b..8d36ae0 100644 --- a/xlsx.js +++ b/xlsx.js @@ -222,8 +222,213 @@ SSF.format = format; return SSF; })(); - var XLSX = (function(){ +function parsexmltag(tag) { + var words = tag.split(/\s+/); + var z = {'0': words[0]}; + if(words.length === 1) return z; + tag.match(/(\w+)="([^"]*)"/g).map( + function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; }); + return z; +} + +var encodings = { + '"': '"', + ''': "'", + '>': '>', + '<': '<', + '&': '&' +}; + +// TODO: CP remap (need to read file version to determine OS) +function unescapexml(text){ + var s = text + ''; + for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]); + return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));}); +} + +function parsexmlbool(value, tag) { + switch(value) { + case '0': case 0: case 'false': case 'FALSE': return false; + case '1': case 1: case 'true': case 'TRUE': return true; + default: throw "bad boolean value " + value + " in "+(tag||"?"); + } +} + +var utf8read = function(orig) { + var out = "", i = 0, c = 0, c1 = 0, c2 = 0, c3 = 0; + while (i < orig.length) { + c = orig.charCodeAt(i++); + if (c < 128) out += _chr(c); + else { + c2 = orig.charCodeAt(i++); + if (c>191 && c<224) out += _chr((c & 31) << 6 | c2 & 63); + else { + c3 = orig.charCodeAt(i++); + out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63); + } + } + } + return out; +}; + +// matches ... extracts content +function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)',(g||"")+"m");} + +function parseVector(data) { + var h = parsexmltag(data); + + var matches = data.match(new RegExp("(.*?)", 'g')); + if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size; + var res = []; + matches.forEach(function(x) { + var v = x.replace(/<[/]?vt:variant>/g,"").match(/]*)>(.*) tags */ + var parse_rs = (function() { + /* 18.4.7 rPr CT_RPrElt */ + var parse_rpr = function(rpr, intro, outro) { + var font = {}; + rpr.match(/<[^>]*>/g).forEach(function(x) { + var y = parsexmltag(x); + switch(y[0]) { + /* 18.8.12 condense CT_BooleanProperty */ + /* ** not required . */ + case '': font.strike = 1; break; + case '': break; + + /* 18.4.13 u CT_UnderlineProperty */ + case '': font.u = 1; break; + case '': break; + + /* 18.8.2 b */ + case '': font.b = 1; break; + case '': break; + + /* 18.8.26 i */ + case '': font.i = 1; break; + case '': break; + + /* 18.3.1.15 color CT_Color TODO: tint, theme, auto, indexed */ + case ''); + outro.push(""); + }; + + /* 18.4.4 r CT_RElt */ + function parse_r(r) { + var terms = [[],"",[]]; + /* 18.4.12 t ST_Xstring */ + var t = r.match(tregex); + if(!isval(t)) return ""; + terms[1] = t[1]; + + var rpr = r.match(rpregex); + if(isval(rpr)) parse_rpr(rpr[1], terms[0], terms[2]); + return terms[0].join("") + terms[1].replace(/\r\n/g,'
') + terms[2].join(""); + } + return function(rs) { + return rs.replace(//g,"").split(/<\/r>/).map(parse_r).join(""); + }; + })(); + + /* 18.4.8 si CT_Rst */ + var parse_si = function(x) { + var z = {}; + if(!x) return z; + var y; + /* 18.4.12 t ST_Xstring (Plaintext String) */ + if(x[1] === 't') { + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/g,""))); + z.raw = x; + z.r = z.t; + } + /* 18.4.4 r CT_RElt (Rich Text Run) */ + else if((y = x.match(//))) { + z.raw = x; + /* TODO: properly parse (note: no other valid child can have body text) */ + z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); + z.r = parse_rs(x); + } + /* 18.4.3 phoneticPr CT_PhoneticPr (TODO: needed for Asian support) */ + /* 18.4.6 rPh CT_PhoneticRun (TODO: needed for Asian support) */ + return z; + }; + + + return function(data) { + var s = []; + /* 18.4.9 sst CT_Sst */ + var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); + if(sst) { + s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); + sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; + } + return s; + }; +})(); + var ct2type = { "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml": "workbooks", "application/vnd.openxmlformats-package.core-properties+xml": "coreprops", @@ -317,38 +522,6 @@ var CustomWBViewDef = { var XMLNS_CT = 'http://schemas.openxmlformats.org/package/2006/content-types'; var XMLNS_WB = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'; -var encodings = { - '"': '"', - ''': "'", - '>': '>', - '<': '<', - '&': '&' -}; - -// TODO: CP remap (need to read file version to determine OS) -function unescapexml(text){ - var s = text + ''; - for(var y in encodings) s = s.replace(new RegExp(y,'g'), encodings[y]); - return s.replace(/_x([0-9a-fA-F]*)_/g,function(m,c) {return _chr(parseInt(c,16));}); -} - -function parsexmltag(tag) { - var words = tag.split(/\s+/); - var z = {'0': words[0]}; - if(words.length === 1) return z; - tag.match(/(\w+)="([^"]*)"/g).map( - function(x){var y=x.match(/(\w+)="([^"]*)"/); z[y[1]] = y[2]; }); - return z; -} - -function parsexmlbool(value, tag) { - switch(value) { - case '0': case 0: case 'false': case 'FALSE': return false; - case '1': case 1: case 'true': case 'TRUE': return true; - default: throw "bad boolean value " + value + " in "+(tag||"?"); - } -} - var strs = {}; // shared strings var styles = {}; // shared styles var _ssfopts = {}; // spreadsheet formatting options @@ -431,73 +604,6 @@ function parseSheet(data) { return s; } -// matches ... extracts content -function matchtag(f,g) {return new RegExp('<'+f+'(?: xml:space="preserve")?>([^\u2603]*)',(g||"")+"m");} - -function parseVector(data) { - var h = parsexmltag(data); - - var matches = data.match(new RegExp("(.*?)", 'g')); - if(matches.length != h.size) throw "unexpected vector length " + matches.length + " != " + h.size; - var res = []; - matches.forEach(function(x) { - var v = x.replace(/<[/]?vt:variant>/g,"").match(/]*)>(.*)191 && c<224) out += _chr((c & 31) << 6 | c2 & 63); - else { - c3 = orig.charCodeAt(i++); - out += _chr((c & 15) << 12 | (c2 & 63) << 6 | c3 & 63); - } - } - } - return out; -}; - -/* 18.4.8 si CT_Rst */ -function parse_si(x) { - var z = {}; - if(!x) return z; - var y; - /* 18.4.12 t ST_Xstring plaintext string */ - if((y = x.match(/^]*>([^\u2603]*)<\/t>$/m))) { - z.t = utf8read(unescapexml(y[1])); - z.r = x; - } - /* 18.4.4 r CT_RElt Rich Text Run */ - else if((y = x.match(//))) { - z.r = x; - /* TODO: properly parse (note: no other valid child can have body text) */ - z.t = utf8read(unescapexml(x.replace(/<[^>]*>/gm,""))); - } - /* TODO: handle rPh and phoneticPr */ - return z; -} - -/* 18.4 Shared String Table */ -function parseStrs(data) { - var s = []; - /* 18.4.9 sst CT_Sst */ - var sst = data.match(new RegExp("]*)>([\\s\\S]*)<\/sst>","m")); - if(sst) { - s = sst[2].replace(//g,"").split(/<\/si>/).map(parse_si); - - sst = parsexmltag(sst[1]); s.Count = sst.count; s.Unique = sst.uniqueCount; - } - return s; -} - function parseProps(data) { var p = { Company:'' }, q = {}; var strings = ["Application", "DocSecurity", "Company", "AppVersion"]; @@ -773,7 +879,7 @@ function parseZip(zip) { var dir = parseCT((zip.files['[Content_Types].xml']||{}).data); strs = {}; - if(dir.sst) strs=parseStrs(zip.files[dir.sst.replace(/^\//,'')].data); + if(dir.sst) strs=parse_sst(zip.files[dir.sst.replace(/^\//,'')].data); styles = {}; if(dir.style) styles = parseStyles(zip.files[dir.style.replace(/^\//,'')].data);