js-cfb/bits/84_mht.js
2021-10-09 17:49:35 -04:00

207 lines
6.7 KiB
JavaScript

var ContentTypeMap = ({
"htm": "text/html",
"xml": "text/xml",
"gif": "image/gif",
"jpg": "image/jpeg",
"png": "image/png",
"mso": "application/x-mso",
"thmx": "application/vnd.ms-officetheme",
"sh33tj5": "application/octet-stream"
}/*:any*/);
function get_content_type(fi/*:CFBEntry*/, fp/*:string*/)/*:string*/ {
if(fi.ctype) return fi.ctype;
var ext = fi.name || "", m = ext.match(/\.([^\.]+)$/);
if(m && ContentTypeMap[m[1]]) return ContentTypeMap[m[1]];
if(fp) {
m = (ext = fp).match(/[\.\\]([^\.\\])+$/);
if(m && ContentTypeMap[m[1]]) return ContentTypeMap[m[1]];
}
return "application/octet-stream";
}
/* 76 character chunks TODO: intertwine encoding */
function write_base64_76(bstr/*:string*/)/*:string*/ {
var data = Base64.encode(bstr);
var o = [];
for(var i = 0; i < data.length; i+= 76) o.push(data.slice(i, i+76));
return o.join("\r\n") + "\r\n";
}
/*
Rules for QP:
- escape =## applies for all non-display characters and literal "="
- space or tab at end of line must be encoded
- \r\n newlines can be preserved, but bare \r and \n must be escaped
- lines must not exceed 76 characters, use soft breaks =\r\n
TODO: Some files from word appear to write line extensions with bare equals:
```
<table class=3DMsoTableGrid border=3D1 cellspacing=3D0 cellpadding=3D0 width=
="70%"
```
*/
function write_quoted_printable(text/*:string*/)/*:string*/ {
var encoded = text.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7E-\xFF=]/g, function(c) {
var w = c.charCodeAt(0).toString(16).toUpperCase();
return "=" + (w.length == 1 ? "0" + w : w);
});
encoded = encoded.replace(/ $/mg, "=20").replace(/\t$/mg, "=09");
if(encoded.charAt(0) == "\n") encoded = "=0D" + encoded.slice(1);
encoded = encoded.replace(/\r(?!\n)/mg, "=0D").replace(/\n\n/mg, "\n=0A").replace(/([^\r\n])\n/mg, "$1=0A");
var o/*:Array<string>*/ = [], split = encoded.split("\r\n");
for(var si = 0; si < split.length; ++si) {
var str = split[si];
if(str.length == 0) { o.push(""); continue; }
for(var i = 0; i < str.length;) {
var end = 76;
var tmp = str.slice(i, i + end);
if(tmp.charAt(end - 1) == "=") end --;
else if(tmp.charAt(end - 2) == "=") end -= 2;
else if(tmp.charAt(end - 3) == "=") end -= 3;
tmp = str.slice(i, i + end);
i += end;
if(i < str.length) tmp += "=";
o.push(tmp);
}
}
return o.join("\r\n");
}
function parse_quoted_printable(data/*:Array<string>*/)/*:RawBytes*/ {
var o = [];
/* unify long lines */
for(var di = 0; di < data.length; ++di) {
var line = data[di];
while(di <= data.length && line.charAt(line.length - 1) == "=") line = line.slice(0, line.length - 1) + data[++di];
o.push(line);
}
/* decode */
for(var oi = 0; oi < o.length; ++oi) o[oi] = o[oi].replace(/[=][0-9A-Fa-f]{2}/g, function($$) { return String.fromCharCode(parseInt($$.slice(1), 16)); });
return s2a(o.join("\r\n"));
}
function parse_mime(cfb/*:CFBContainer*/, data/*:Array<string>*/, root/*:string*/)/*:void*/ {
var fname = "", cte = "", ctype = "", fdata;
var di = 0;
for(;di < 10; ++di) {
var line = data[di];
if(!line || line.match(/^\s*$/)) break;
var m = line.match(/^(.*?):\s*([^\s].*)$/);
if(m) switch(m[1].toLowerCase()) {
case "content-location": fname = m[2].trim(); break;
case "content-type": ctype = m[2].trim(); break;
case "content-transfer-encoding": cte = m[2].trim(); break;
}
}
++di;
switch(cte.toLowerCase()) {
case 'base64': fdata = s2a(Base64.decode(data.slice(di).join(""))); break;
case 'quoted-printable': fdata = parse_quoted_printable(data.slice(di)); break;
default: throw new Error("Unsupported Content-Transfer-Encoding " + cte);
}
var file = cfb_add(cfb, fname.slice(root.length), fdata, {unsafe: true});
if(ctype) file.ctype = ctype;
}
function parse_mad(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ {
if(a2s(file.slice(0,13)).toLowerCase() != "mime-version:") throw new Error("Unsupported MAD header");
var root = (options && options.root || "");
// $FlowIgnore
var data = (has_buf && Buffer.isBuffer(file) ? file.toString("binary") : a2s(file)).split("\r\n");
var di = 0, row = "";
/* if root is not specified, scan for the common prefix */
for(di = 0; di < data.length; ++di) {
row = data[di];
if(!/^Content-Location:/i.test(row)) continue;
row = row.slice(row.indexOf("file"));
if(!root) root = row.slice(0, row.lastIndexOf("/") + 1);
if(row.slice(0, root.length) == root) continue;
while(root.length > 0) {
root = root.slice(0, root.length - 1);
root = root.slice(0, root.lastIndexOf("/") + 1);
if(row.slice(0,root.length) == root) break;
}
}
var mboundary = (data[1] || "").match(/boundary="(.*?)"/);
if(!mboundary) throw new Error("MAD cannot find boundary");
var boundary = "--" + (mboundary[1] || "");
var FileIndex/*:CFBFileIndex*/ = [], FullPaths/*:Array<string>*/ = [];
var o = {
FileIndex: FileIndex,
FullPaths: FullPaths
};
init_cfb(o);
var start_di, fcnt = 0;
for(di = 0; di < data.length; ++di) {
var line = data[di];
if(line !== boundary && line !== boundary + "--") continue;
if(fcnt++) parse_mime(o, data.slice(start_di, di), root);
start_di = di;
}
return o;
}
function write_mad(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:string*/ {
var opts = options || {};
var boundary = opts.boundary || "SheetJS";
boundary = '------=' + boundary;
var out = [
'MIME-Version: 1.0',
'Content-Type: multipart/related; boundary="' + boundary.slice(2) + '"',
'',
'',
''
];
var root = cfb.FullPaths[0], fp = root, fi = cfb.FileIndex[0];
for(var i = 1; i < cfb.FullPaths.length; ++i) {
fp = cfb.FullPaths[i].slice(root.length);
fi = cfb.FileIndex[i];
if(!fi.size || !fi.content || fp == "\u0001Sh33tJ5") continue;
/* Normalize filename */
fp = fp.replace(/[\x00-\x08\x0B\x0C\x0E-\x1F\x7E-\xFF]/g, function(c) {
return "_x" + c.charCodeAt(0).toString(16) + "_";
}).replace(/[\u0080-\uFFFF]/g, function(u) {
return "_u" + u.charCodeAt(0).toString(16) + "_";
});
/* Extract content as binary string */
var ca = fi.content;
// $FlowIgnore
var cstr = has_buf && Buffer.isBuffer(ca) ? ca.toString("binary") : a2s(ca);
/* 4/5 of first 1024 chars ascii -> quoted printable, else base64 */
var dispcnt = 0, L = Math.min(1024, cstr.length), cc = 0;
for(var csl = 0; csl <= L; ++csl) if((cc=cstr.charCodeAt(csl)) >= 0x20 && cc < 0x80) ++dispcnt;
var qp = dispcnt >= L * 4 / 5;
out.push(boundary);
out.push('Content-Location: ' + (opts.root || 'file:///C:/SheetJS/') + fp);
out.push('Content-Transfer-Encoding: ' + (qp ? 'quoted-printable' : 'base64'));
out.push('Content-Type: ' + get_content_type(fi, fp));
out.push('');
out.push(qp ? write_quoted_printable(cstr) : write_base64_76(cstr));
}
out.push(boundary + '--\r\n');
return out.join("\r\n");
}