sheetjs-clone/bits/22_xmlutils.js
SheetJS a846f7184d utility improvements
- sheet_to_csv strip option (fixes #182 h/t @davidworkman9)
- sheet_to_json dateNF option (fixes #134 h/t @rotemtam)
- file type detection expanded to 4 byte magic number
2017-03-22 03:50:11 -04:00

189 lines
7.6 KiB
JavaScript

var attregexg=/([^\s?>\/]+)=((?:")([^"]*)(?:")|(?:')([^']*)(?:'))/g;
var tagregex=/<[^>]*>/g;
var nsregex=/<\w*:/, nsregex2 = /<(\/?)\w+:/;
function parsexmltag(tag/*:string*/, skip_root/*:?boolean*/)/*:any*/ {
var z = ({}/*:any*/);
var eq = 0, c = 0;
for(; eq !== tag.length; ++eq) if((c = tag.charCodeAt(eq)) === 32 || c === 10 || c === 13) break;
if(!skip_root) z[0] = tag.substr(0, eq);
if(eq === tag.length) return z;
var m = tag.match(attregexg), j=0, v="", i=0, q="", cc="";
if(m) for(i = 0; i != m.length; ++i) {
cc = m[i];
for(c=0; c != cc.length; ++c) if(cc.charCodeAt(c) === 61) break;
q = cc.substr(0,c); v = cc.substring(c+2, cc.length-1);
for(j=0;j!=q.length;++j) if(q.charCodeAt(j) === 58) break;
if(j===q.length) {
if(q.indexOf("_") > 0) q = q.substr(0, q.indexOf("_")); // from ods
z[q] = v;
}
else {
var k = (j===5 && q.substr(0,5)==="xmlns"?"xmlns":"")+q.substr(j+1);
if(z[k] && q.substr(j-3,3) == "ext") continue; // from ods
z[k] = v;
}
}
return z;
}
function strip_ns(x/*:string*/)/*:string*/ { return x.replace(nsregex2, "<$1"); }
var encodings = {
'&quot;': '"',
'&apos;': "'",
'&gt;': '>',
'&lt;': '<',
'&amp;': '&'
};
var rencoding = evert(encodings);
var rencstr = "&<>'\"".split("");
// TODO: CP remap (need to read file version to determine OS)
var unescapexml/*:StringConv*/ = (function() {
/* 22.4.2.4 bstr (Basic String) */
var encregex = /&(?:quot|apos|gt|lt|amp|#x?([\da-fA-F]+));/g, coderegex = /_x([\da-fA-F]{4})_/g;
return function unescapexml(text/*:string*/)/*:string*/ {
var s = text + '';
return s.replace(encregex, function($$, $1) { return encodings[$$]||String.fromCharCode(parseInt($1,$$.indexOf("x")>-1?16:10))||$$; }).replace(coderegex,function(m,c) {return String.fromCharCode(parseInt(c,16));});
};
})();
var decregex=/[&<>'"]/g, charegex = /[\u0000-\u0008\u000b-\u001f]/g;
function escapexml(text/*:string*/, xml/*:?boolean*/)/*:string*/{
var s = text + '';
return s.replace(decregex, function(y) { return rencoding[y]; }).replace(charegex,function(s) { return "_x" + ("000"+s.charCodeAt(0).toString(16)).slice(-4) + "_";});
}
function escapexmltag(text/*:string*/)/*:string*/{ return escapexml(text).replace(/ /g,"_x0020_"); }
/* TODO: handle codepages */
var xlml_fixstr/*:StringConv*/ = (function() {
var entregex = /&#(\d+);/g;
function entrepl($$/*:string*/,$1/*:string*/)/*:string*/ { return String.fromCharCode(parseInt($1,10)); }
return function xlml_fixstr(str/*:string*/)/*:string*/ { return str.replace(entregex,entrepl); };
})();
function parsexmlbool(value/*:any*/, tag/*:?string*/)/*:boolean*/ {
switch(value) {
case '1': case 'true': case 'TRUE': return true;
/* case '0': case 'false': case 'FALSE':*/
default: return false;
}
}
var utf8read/*:StringConv*/ = function utf8reada(orig) {
var out = "", i = 0, c = 0, d = 0, e = 0, f = 0, w = 0;
while (i < orig.length) {
c = orig.charCodeAt(i++);
if (c < 128) { out += String.fromCharCode(c); continue; }
d = orig.charCodeAt(i++);
if (c>191 && c<224) { out += String.fromCharCode(((c & 31) << 6) | (d & 63)); continue; }
e = orig.charCodeAt(i++);
if (c < 240) { out += String.fromCharCode(((c & 15) << 12) | ((d & 63) << 6) | (e & 63)); continue; }
f = orig.charCodeAt(i++);
w = (((c & 7) << 18) | ((d & 63) << 12) | ((e & 63) << 6) | (f & 63))-65536;
out += String.fromCharCode(0xD800 + ((w>>>10)&1023));
out += String.fromCharCode(0xDC00 + (w&1023));
}
return out;
};
if(has_buf) {
var utf8readb = function utf8readb(data) {
var out = new Buffer(2*data.length), w, i, j = 1, k = 0, ww=0, c;
for(i = 0; i < data.length; i+=j) {
j = 1;
if((c=data.charCodeAt(i)) < 128) w = c;
else if(c < 224) { w = (c&31)*64+(data.charCodeAt(i+1)&63); j=2; }
else if(c < 240) { w=(c&15)*4096+(data.charCodeAt(i+1)&63)*64+(data.charCodeAt(i+2)&63); j=3; }
else { j = 4;
w = (c & 7)*262144+(data.charCodeAt(i+1)&63)*4096+(data.charCodeAt(i+2)&63)*64+(data.charCodeAt(i+3)&63);
w -= 65536; ww = 0xD800 + ((w>>>10)&1023); w = 0xDC00 + (w&1023);
}
if(ww !== 0) { out[k++] = ww&255; out[k++] = ww>>>8; ww = 0; }
out[k++] = w%256; out[k++] = w>>>8;
}
out.length = k;
return out.toString('ucs2');
};
var corpus = "foo bar baz\u00e2\u0098\u0083\u00f0\u009f\u008d\u00a3";
if(utf8read(corpus) == utf8readb(corpus)) utf8read = utf8readb;
// $FlowIgnore
var utf8readc = function utf8readc(data) { return Buffer(data, 'binary').toString('utf8'); };
if(utf8read(corpus) == utf8readc(corpus)) utf8read = utf8readc;
}
// matches <foo>...</foo> extracts content
var matchtag = (function() {
var mtcache/*:{[k:string]:RegExp}*/ = ({}/*:any*/);
return function matchtag(f,g/*:?string*/)/*:RegExp*/ {
var t = f+"|"+(g||"");
if(mtcache[t]) return mtcache[t];
return (mtcache[t] = new RegExp('<(?:\\w+:)?'+f+'(?: xml:space="preserve")?(?:[^>]*)>([^\u2603]*)</(?:\\w+:)?'+f+'>',((g||"")/*:any*/)));
};
})();
var vtregex = (function(){ var vt_cache = {};
return function vt_regex(bt) {
if(vt_cache[bt] !== undefined) return vt_cache[bt];
return (vt_cache[bt] = new RegExp("<(?:vt:)?" + bt + ">(.*?)</(?:vt:)?" + bt + ">", 'g') );
};})();
var vtvregex = /<\/?(:?vt:)?variant>/g, vtmregex = /<(:?vt:)?([^>]*)>(.*)</;
function parseVector(data) {
var h = parsexmltag(data);
var matches = data.match(vtregex(h.baseType))||[];
if(matches.length != h.size) throw new Error("unexpected vector length " + matches.length + " != " + h.size);
var res = [];
matches.forEach(function(x) {
var v = x.replace(vtvregex,"").match(vtmregex);
res.push({v:v[2], t:v[1]});
});
return res;
}
var wtregex = /(^\s|\s$|\n)/;
function writetag(f,g) {return '<' + f + (g.match(wtregex)?' xml:space="preserve"' : "") + '>' + g + '</' + f + '>';}
function wxt_helper(h)/*:string*/ { return keys(h).map(function(k) { return " " + k + '="' + h[k] + '"';}).join(""); }
function writextag(f,g,h) { return '<' + f + (isval(h) /*:: && h */? wxt_helper(h) : "") + (isval(g) /*:: && g */? (g.match(wtregex)?' xml:space="preserve"' : "") + '>' + g + '</' + f : "/") + '>';}
function write_w3cdtf(d/*:Date*/, t/*:?boolean*/)/*:string*/ { try { return d.toISOString().replace(/\.\d*/,""); } catch(e) { if(t) throw e; } return ""; }
function write_vt(s) {
switch(typeof s) {
case 'string': return writextag('vt:lpwstr', s);
case 'number': return writextag((s|0)==s?'vt:i4':'vt:r8', String(s));
case 'boolean': return writextag('vt:bool',s?'true':'false');
}
if(s instanceof Date) return writextag('vt:filetime', write_w3cdtf(s));
throw new Error("Unable to serialize " + s);
}
var XML_HEADER = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\r\n';
var XMLNS = ({
'dc': 'http://purl.org/dc/elements/1.1/',
'dcterms': 'http://purl.org/dc/terms/',
'dcmitype': 'http://purl.org/dc/dcmitype/',
'mx': 'http://schemas.microsoft.com/office/mac/excel/2008/main',
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships',
'sjs': 'http://schemas.openxmlformats.org/package/2006/sheetjs/core-properties',
'vt': 'http://schemas.openxmlformats.org/officeDocument/2006/docPropsVTypes',
'xsi': 'http://www.w3.org/2001/XMLSchema-instance',
'xsd': 'http://www.w3.org/2001/XMLSchema'
}/*:any*/);
XMLNS.main = [
'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
'http://purl.oclc.org/ooxml/spreadsheetml/main',
'http://schemas.microsoft.com/office/excel/2006/main',
'http://schemas.microsoft.com/office/excel/2006/2'
];
var XLMLNS = ({
'o': 'urn:schemas-microsoft-com:office:office',
'x': 'urn:schemas-microsoft-com:office:excel',
'ss': 'urn:schemas-microsoft-com:office:spreadsheet',
'dt': 'uuid:C2F41010-65B3-11d1-A29F-00AA00C14882',
'html': 'http://www.w3.org/TR/REC-html40'
}/*:any*/);