diff --git a/.gitignore b/.gitignore index 296e217..ad682f7 100644 --- a/.gitignore +++ b/.gitignore @@ -27,6 +27,7 @@ test_files_pres *.[eE][tT][hH] *.[zZ][iI][pP] *.[mM][sS][iIgG] +*.[mM][hH][tT] *.123 *.htm *.html diff --git a/.travis.yml b/.travis.yml index 1ca55c3..b5b4997 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,5 +1,8 @@ language: node_js +dist: xenial node_js: + - "14" + - "13" - "12" - "11" - "10" @@ -13,12 +16,15 @@ node_js: - "0.10" - "0.8" before_install: - - "npm install -g npm@4.3.0" + - "npm config set strict-ssl false" + - "./misc/node_version.sh" - "npm install -g mocha@2.x voc" - "npm install blanket" - - "npm install xlsjs crc-32" + - "npm install word crc-32" - "npm install coveralls mocha-lcov-reporter" before_script: - "make init" +install: + - npm install after_success: - "make coveralls-spin" diff --git a/CHANGELOG.md b/CHANGELOG.md index c65a3a3..2ff5950 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,11 @@ This log is intended to keep track of backwards-incompatible changes, including but not limited to API changes and file location changes. Minor behavioral changes may not be included if they are not expected to break existing code. +## 1.2.0 (2020-07-09) + +* Support for MAD file format (MIME aggregate document) +* Spun off the CLI tool to the `cfb-cli` module + ## 1.1.0 (2018-09-04) * Support for ZIP file format diff --git a/Makefile b/Makefile index 4b778de..068b4b0 100644 --- a/Makefile +++ b/Makefile @@ -4,7 +4,7 @@ FMT=xls doc ppt misc full REQS= ADDONS= AUXTARGETS=xlscfb.js -CMDS=bin/cfb.njs +CMDS=packages/cfb-cli/bin/cfb.njs HTMLLINT=index.html ULIB=$(shell echo $(LIB) | tr a-z A-Z) diff --git a/README.md b/README.md index 8255369..3967f4a 100644 --- a/README.md +++ b/README.md @@ -44,28 +44,8 @@ var data = workbook.content; ## Command-Line Utility Usage -It is preferable to install the library globally with npm: - -```bash -$ npm install -g cfb -``` - -The global installation adds a command `cfb` which can work with files: - -- `cfb file [names...]` extracts the contents of the file. If additional names - are supplied, only the listed files will be extracted. - -- `cfb -l file` lists the contained files (following `unzip -l` "short format") - -- `cfb -r file` attempts to repair by reading and re-writing the file. - This fixes some issues with files generated by non-standard tools. - -- `cfb -c file [files...]` creates a new file containing the listed files. - The default root entry name is `Root Entry`. - -- `cfb -a file [files...]` adds the listed files to the original file. - -- `cfb -d file [files...]` deletes the listed files from the original file. +The [`cfb-cli`](https://www.npmjs.com/package/cfb-cli) module ships with a CLI +tool for manipulating and inspecting supported files. ## JS API @@ -113,10 +93,11 @@ name, if there are no slashes) and returns an entry object or null if not found. `opts.fileType` controls the output file type: -| `fileType` | output | -|:-------------------|:--------------| -| `'cfb'` (default) | CFB container | -| `'zip'` | ZIP file | +| `fileType` | output | +|:-------------------|:------------------------| +| `'cfb'` (default) | CFB container | +| `'zip'` | ZIP file | +| `'mad'` | MIME aggregate document | `opts.compression` enables DEFLATE compression for ZIP file type. @@ -157,6 +138,7 @@ interface CFBEntry { content: Buffer | number[] | Uint8Array; /** Raw Content */ ct?: Date; /** Creation Time */ mt?: Date; /** Modification Time */ + ctype?: String; /** Content-Type (for MAD) */ } ``` @@ -172,4 +154,6 @@ granted by the Apache 2.0 License are reserved by the Original Author. - `MS-CFB`: Compound File Binary File Format - ZIP `APPNOTE.TXT`: .ZIP File Format Specification - RFC1951: https://www.ietf.org/rfc/rfc1951.txt + - RFC2045: https://www.ietf.org/rfc/rfc2045.txt + - RFC2557: https://www.ietf.org/rfc/rfc2557.txt diff --git a/bits/05_buf.js b/bits/05_buf.js index d0a0fd3..217e5ac 100644 --- a/bits/05_buf.js +++ b/bits/05_buf.js @@ -24,7 +24,7 @@ function new_unsafe_buf(len/*:number*/) { /* jshint +W056 */ } -var s2a = function s2a(s/*:string*/)/*:any*/ { +var s2a = function s2a(s/*:string*/)/*:RawBytes*/ { if(has_buf) return Buffer_from(s, "binary"); return s.split("").map(function(x/*:string*/)/*:number*/{ return x.charCodeAt(0) & 0xff; }); }; diff --git a/bits/31_version.js b/bits/31_version.js index 96a8b06..79633f6 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -exports.version = '1.1.4'; +exports.version = '1.2.0'; diff --git a/bits/40_parse.js b/bits/40_parse.js index 633e2b3..3631e24 100644 --- a/bits/40_parse.js +++ b/bits/40_parse.js @@ -1,5 +1,6 @@ function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { if(file[0] == 0x50 && file[1] == 0x4b) return parse_zip(file, options); +if((file[0] | 0x20) == 0x6d && (file[1]|0x20) == 0x69) return parse_mad(file, options); if(file.length < 512) throw new Error("CFB file size " + file.length + " < 512"); var mver = 3; var ssz = 512; diff --git a/bits/60_writehead.js b/bits/60_writehead.js index 47e5bc8..1ba952e 100644 --- a/bits/60_writehead.js +++ b/bits/60_writehead.js @@ -1,4 +1,9 @@ -function _write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes*/ { +function _write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes|string*/ { var _opts = options || {}; + /* MAD is order-sensitive, skip rebuild and sort */ + if(_opts.fileType == 'mad') return write_mad(cfb, _opts); rebuild_cfb(cfb); - if(_opts.fileType == 'zip') return write_zip(cfb, _opts); + switch(_opts.fileType) { + case 'zip': return write_zip(cfb, _opts); + //case 'mad': return write_mad(cfb, _opts); + } diff --git a/bits/77_writeutils.js b/bits/77_writeutils.js index 9527cda..3ebb9d2 100644 --- a/bits/77_writeutils.js +++ b/bits/77_writeutils.js @@ -13,10 +13,13 @@ function a2s(o/*:RawBytes*/)/*:string*/ { function write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes|string*/ { var o = _write(cfb, options); - switch(options && options.type) { + switch(options && options.type || "buffer") { case "file": get_fs(); fs.writeFileSync(options.filename, (o/*:any*/)); return o; - case "binary": return a2s(o); - case "base64": return Base64.encode(a2s(o)); + case "binary": return typeof o == "string" ? o : a2s(o); + case "base64": return Base64.encode(typeof o == "string" ? o : a2s(o)); + case "buffer": if(has_buf) return Buffer.isBuffer(o) ? o : Buffer_from(o); + /* falls through */ + case "array": return typeof o == "string" ? s2a(o) : o; } return o; } diff --git a/bits/84_mht.js b/bits/84_mht.js new file mode 100644 index 0000000..8edb6db --- /dev/null +++ b/bits/84_mht.js @@ -0,0 +1,206 @@ +var ContentTypeMap = ({ + "htm": "text/html", + "xml": "text/xml", + + "gif": "image/gif", + "jpg": "image/jpeg", + "png": "image/png", + + "mso": "application/x-mso", + "thmx": "application/vnd.ms-officetheme", + "sh33tj5": "application/octet-stream" +}/*:any*/); + +function get_content_type(fi/*:CFBEntry*/, fp/*:string*/)/*:string*/ { + if(fi.ctype) return fi.ctype; + + var ext = fi.name || "", m = ext.match(/\.([^\.]+)$/); + if(m && ContentTypeMap[m[1]]) return ContentTypeMap[m[1]]; + + if(fp) { + m = (ext = fp).match(/[\.\\]([^\.\\])+$/); + if(m && ContentTypeMap[m[1]]) return ContentTypeMap[m[1]]; + } + + return "application/octet-stream"; +} + +/* 76 character chunks TODO: intertwine encoding */ +function write_base64_76(bstr/*:string*/)/*:string*/ { + var data = Base64.encode(bstr); + var o = []; + for(var i = 0; i < data.length; i+= 76) o.push(data.slice(i, i+76)); + return o.join("\r\n") + "\r\n"; +} + +/* +Rules for QP: + - escape =## applies for all non-display characters and literal "=" + - space or tab at end of line must be encoded + - \r\n newlines can be preserved, but bare \r and \n must be escaped + - lines must not exceed 76 characters, use soft breaks =\r\n + +TODO: Some files from word appear to write line extensions with bare equals: + +``` +