diff --git a/.flowconfig b/.flowconfig index 99333f7..191e085 100644 --- a/.flowconfig +++ b/.flowconfig @@ -1,6 +1,8 @@ [ignore] .*/node_modules/.* .*/dist/.* +.*/test_files/.* +.*/test_files_pres/.* .*/test.js .*/bits/.* @@ -11,7 +13,6 @@ .*/demo/browser.js .*/shim.js -.*/odsbits/.* .*/xlscfb.js .*/cfb.js .*/jszip.js diff --git a/.gitignore b/.gitignore index bfe1a57..5f8f8ce 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,27 @@ node_modules +package-lock.json +*.tgz misc/coverage.html prof.js v8.log test_files test_files_pres -*.xls +*.[tT][xX][tT] +*.[cC][sS][vV] +*.[dD][iIbB][fF] +*.[pP][rR][nN] +*.[sS][lL][kK] +*.socialcalc +*.[xX][lL][sSwWcC] +*.[xX][lL][sS][xXmMbB] +*.[oO][dD][sS] +*.[fF][oO][dD][sS] +*.[xX][mM][lL] +*.[uU][oO][sS] +*.[wW][kKqQbB][S1234567890] +*.[qQ][pP][wW] +*.123 +*.htm +*.html *.sheetjs +*.exe diff --git a/.travis.yml b/.travis.yml index 171ea58..6871faf 100644 --- a/.travis.yml +++ b/.travis.yml @@ -13,7 +13,7 @@ before_install: - "npm install -g npm@4.3.0" - "npm install -g mocha@2.x voc" - "npm install blanket" - - "npm install xlsjs" + - "npm install xlsjs crc-32" - "npm install coveralls mocha-lcov-reporter" before_script: - "make init" diff --git a/Makefile b/Makefile index 946e096..bc69d3e 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ DEPS=$(sort $(wildcard bits/*.js)) TARGET=$(LIB).js FLOWTARGET=$(LIB).flow.js FLOWTGTS=$(TARGET) $(AUXTARGETS) -UGLIFYOPTS=--support-ie8 +UGLIFYOPTS=--support-ie8 -m CLOSURE=/usr/local/lib/node_modules/google-closure-compiler/compiler.jar ## Main Targets @@ -48,7 +48,7 @@ init: ## Initial setup for development dist: dist-deps $(TARGET) ## Prepare JS files for distribution cp $(TARGET) dist/ cp LICENSE dist/ - uglifyjs $(UGLIFYOPTS) $(TARGET) -o dist/$(LIB).min.js --source-map dist/$(LIB).min.map --preamble "$$(head -n 1 bits/00_header.js)" + uglifyjs $(TARGET) $(UGLIFYOPTS) -o dist/$(LIB).min.js --source-map dist/$(LIB).min.map --preamble "$$(head -n 1 bits/00_header.js)" misc/strip_sourcemap.sh dist/$(LIB).min.js .PHONY: dist-deps @@ -102,7 +102,7 @@ tslint: $(TARGET) ## Run typescript checks .PHONY: flow flow: lint ## Run flow checker - @flow check --all --show-all-errors + @flow check --all --show-all-errors --include-warnings .PHONY: cov cov: misc/coverage.html ## Run coverage test diff --git a/README.md b/README.md index d7481d5..541512b 100644 --- a/README.md +++ b/README.md @@ -1,29 +1,34 @@ # Compound File Binary Format -This is a Pure-JS implementation of MS-CFB: Compound File Binary File Format, a -format used in many Microsoft file types (such as XLS and DOC) +Pure-JS implementation of MS-CFB: Compound File Binary File Format, a container +format used in many Microsoft file types (XLS, DOC, VBA blobs in XLSX and XLSB) -# Utility Installation and Usage +[![Build Status](https://travis-ci.org/SheetJS/js-cfb.svg?branch=master)](https://travis-ci.org/SheetJS/js-cfb) +[![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-cfb/master.svg)](https://coveralls.io/r/SheetJS/js-cfb?branch=master) +[![Dependencies Status](https://david-dm.org/sheetjs/js-cfb/status.svg)](https://david-dm.org/sheetjs/js-cfb) +[![NPM Downloads](https://img.shields.io/npm/dt/cfb.svg)](https://npmjs.org/package/cfb) +[![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-xlsx)](https://ghit.me/repo/sheetjs/js-xlsx) +[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-cfb?pixel)](https://github.com/SheetJS/js-cfb) -With [npm](https://www.npmjs.org/package/cfb): - -```bash -$ npm install -g cfb -$ cfb path/to/CFB/file -``` - -The command will extract the storages and streams in the container, generating -files that line up with the tree-based structure of the storage. Metadata such -as the red-black tree are discarded. The `-l` option displays a manifest. - -# Library Installation and Usage +## Installation In the browser: ```html - + ``` +With [npm](https://www.npmjs.org/package/cfb): + +```bash +$ npm install cfb +``` + +The `xlscfb.js` file is designed to be embedded in [js-xlsx](http://git.io/xlsx) + + +## Library Usage + In node: ```js @@ -34,36 +39,72 @@ For example, to get the Workbook content from an XLS file: ```js var cfb = CFB.read(filename, {type: 'file'}); -var workbook = cfb.find('Workbook'); +var workbook = CFB.find(cfb, 'Workbook'); var data = workbook.content; ``` -The `xlscfb.js` file is designed to be embedded in [js-xlsx](http://git.io/xlsx) -# API +## Command-Line Utility Usage -Typescript definitions are maintained in `types/index.d.ts`. +It is preferable to install the library globally with npm: + +```bash +$ npm install -g cfb +``` + +The global installation adds a command `cfb` which can work with existing files: + +- `cfb file` will extract the contents of the file to the current directory. + It will make the corresponding subdirectories. +- `cfb --list-files file` will show a listing of the contained files. + The format follows the `unzip -l` "short format". +- `cfb --repair file` will attempt to repair by reading and re-writing the file. + This fixes some issues with files generated by non-standard tools. + + +## JS API + +TypeScript definitions are maintained in `types/index.d.ts`. The CFB object exposes the following methods and properties: `CFB.parse(blob)` takes a nodejs Buffer or an array of bytes and returns an parsed representation of the data. -`CFB.read(blob, options)` wraps `parse`. `options.type` controls the behavior: +`CFB.read(blob, opts)` wraps `parse`. `opts.type` controls the behavior: -- `file`: `blob` should be a file name -- `base64`: `blob` should be a base64 string -- `binary`: `blob` should be a binary string +- `file`: `blob` is interpreted as a file name that will be read +- `base64`: `blob` is interpreted as base64 string +- `binary`: `blob` is interpreted as binary string +- default: `blob` is interpreted as nodejs buffer or array of bytes `CFB.find(cfb, path)` performs a case-insensitive match for the path (or file name, if there are no slashes) and returns an entry object or null if not found. +`CFB.write(cfb, opts)` generates a file based on the container. `opts.type` +controls the behavior: + +- `base64`: returns a base64 string +- `binary`: returns a binary string +- default: returns a nodejs buffer or array of bytes + +`CFB.writeFile(cfb, filename, opts)` creates a file with the specified name. + + +## Utility Functions + +The utility functions are available in the `CFB.utils` object. Functions that +accept a `name` argument strictly deal with absolute file names: + +- `.cfb_new(?opts)` creates a new container object. +- `.cfb_add(cfb, name, ?content, ?opts)` adds a new file to the `cfb`. +- `.cfb_del(cfb, name)` deletes the specified file +- `.cfb_mov(cfb, old_name, new_name)` moves the old file to new path and name + + ## Container Object Description -The object returned by `parse` and `read` can be found in the source (`rval`). -It has the following properties and methods: - -- `.find(path)` is equivalent to `CFB.find(cfb, path)` and should not be used. +The objects returned by `parse` and `read` have the following properties: - `.FullPaths` is an array of the names of all of the streams (files) and storages (directories) in the container. The paths are properly prefixed from @@ -77,32 +118,37 @@ It has the following properties and methods: - `.raw` contains the raw header and sectors + ## Entry Object Description The entry objects are available from `FullPathDir` and `FileIndex` elements of -the container object. +the container object: -- `.name` is the (case sensitive) internal name -- `.type` is the type as defined in "Object Type" in [MS-CFB] 2.6.1: - `2 (stream)` for files, `1 (storage)` for dirs, `5 (root)` for root) -- `.content` is a Buffer/Array with the raw content -- `.ct`/`.mt` are the creation and modification time (if provided in file) - -# License - -This implementation is covered under Apache 2.0 license. It complies with the -[Open Specifications Promise](http://www.microsoft.com/openspecifications/) - -[![Build Status](https://travis-ci.org/SheetJS/js-cfb.svg?branch=master)](https://travis-ci.org/SheetJS/js-cfb) - -[![Coverage Status](http://img.shields.io/coveralls/SheetJS/js-cfb/master.svg)](https://coveralls.io/r/SheetJS/js-cfb?branch=master) - -[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/js-cfb?pixel)](https://github.com/SheetJS/js-cfb) - -[![NPM Downloads](https://img.shields.io/npm/dt/cfb.svg)](https://npmjs.org/package/cfb) - -[![Dependencies Status](https://david-dm.org/sheetjs/js-cfb/status.svg)](https://david-dm.org/sheetjs/js-cfb) - -[![ghit.me](https://ghit.me/badge.svg?repo=sheetjs/js-cfb)](https://ghit.me/repo/sheetjs/js-cfb) +```typescript +interface CFBEntry { + name: string; /** Case-sensitive internal name */ + type: number; /** 1 = dir, 2 = file, 5 = root ; see [MS-CFB] 2.6.1 */ + content: Buffer | number[] | Uint8Array; /** Raw Content */ + ct?: Date; /** Creation Time */ + mt?: Date; /** Modification Time */ +} +``` + + +## License + +Please consult the attached LICENSE file for details. All rights not explicitly +granted by the Apache 2.0 License are reserved by the Original Author. + + +## References + + +
+ OSP-covered Specifications (click to show) + + - [MS-CFB]: Compound File Binary File Format + +
diff --git a/bin/cfb.njs b/bin/cfb.njs index ccc2c81..ca7e778 100755 --- a/bin/cfb.njs +++ b/bin/cfb.njs @@ -3,13 +3,16 @@ /* eslint-env node */ /* vim: set ts=2 ft=javascript: */ var X = require('../'); -var fs = require('fs'), program = require('commander'); +var fs = require('fs'); +var program = require('commander'); +var PRINTJ = require("printj"); program .version(X.version) .usage('[options] ') .option('-q, --quiet', 'process but do not report') .option('-l, --list-files', 'list files') .option('-d, --dump', 'dump internal representation but do not extract') + .option('-r, --repair', 'attempt to repair and garbage-collect archive') .option('--dev', 'development mode') .option('--read', 'read but do not print out contents'); @@ -33,26 +36,30 @@ if(program.dump) { console.log(cfb.FullPathDir); process.exit(0); } -if(program.listFiles) { - var PRINTJ = require("printj"), sprintf = PRINTJ.sprintf; +if(program.repair) { + X.writeFile(cfb, program.args[0]); + process.exit(0); +} +var sprintf = PRINTJ.sprintf; +function fix_string(x/*:string*/)/*:string*/ { return x.replace(/[\u0000-\u001f]/, function($$) { return sprintf("\\u%04X", $$.charCodeAt(0)); }); } +if(program.listFiles) { var format_date = function(date/*:Date*/)/*:string*/ { return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes()); }; var basetime = new Date(1980,0,1); - var cnt = 0; - var rootsize = 0, filesize = 0; + var cnt = 0, rootsize = 0, filesize = 0; console.log(" Length Date Time Name"); console.log(" -------- ---- ---- ----"); - cfb.FileIndex.forEach(function(file, i) { + cfb.FileIndex.forEach(function(file, i/*:number*/) { switch(file.type) { case 5: basetime = file.ct || file.mt || basetime; rootsize = file.size; break; case 2: - console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i])); + console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), fix_string(cfb.FullPaths[i]))); filesize += file.size; ++cnt; } @@ -64,10 +71,10 @@ if(program.listFiles) { } for(var i=0; i!==cfb.FullPaths.length; ++i) { if(cfb.FullPaths[i].slice(-1) === "/") { - console.error("mkdir " + cfb.FullPaths[i]); + console.error("mkdir " + fix_string(cfb.FullPaths[i])); fs.mkdirSync(cfb.FullPaths[i]); } else { - console.error("writing " + cfb.FullPaths[i]); + console.error("write " + fix_string(cfb.FullPaths[i])); fs.writeFileSync(cfb.FullPaths[i], /*::new Buffer((*/cfb.FileIndex[i].content/*:: :any))*/); } } diff --git a/bits/00_header.js b/bits/00_header.js index e6f26f7..20d4aac 100644 --- a/bits/00_header.js +++ b/bits/00_header.js @@ -1,4 +1,6 @@ /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ /*jshint eqnull:true */ +/*exported CFB */ +/*global module, require:false, process:false, Buffer:false, Uint8Array:false */ diff --git a/bits/04_base64.js b/bits/04_base64.js new file mode 100644 index 0000000..ea09b3f --- /dev/null +++ b/bits/04_base64.js @@ -0,0 +1,46 @@ +var Base64 = (function make_b64(){ + var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; + return { + encode: function(input/*:string*/)/*:string*/ { + var o = ""; + var c1/*:number*/, c2/*:number*/, c3/*:number*/; + var e1/*:number*/, e2/*:number*/, e3/*:number*/, e4/*:number*/; + for(var i = 0; i < input.length; ) { + c1 = input.charCodeAt(i++); + e1 = (c1 >> 2); + + c2 = input.charCodeAt(i++); + e2 = ((c1 & 3) << 4) | (c2 >> 4); + + c3 = input.charCodeAt(i++); + e3 = ((c2 & 15) << 2) | (c3 >> 6); + e4 = (c3 & 63); + if (isNaN(c2)) { e3 = e4 = 64; } + else if (isNaN(c3)) { e4 = 64; } + o += map.charAt(e1) + map.charAt(e2) + map.charAt(e3) + map.charAt(e4); + } + return o; + }, + decode: function b64_decode(input/*:string*/)/*:string*/ { + var o = ""; + var c1/*:number*/, c2/*:number*/, c3/*:number*/; + var e1/*:number*/, e2/*:number*/, e3/*:number*/, e4/*:number*/; + input = input.replace(/[^\w\+\/\=]/g, ""); + for(var i = 0; i < input.length;) { + e1 = map.indexOf(input.charAt(i++)); + e2 = map.indexOf(input.charAt(i++)); + c1 = (e1 << 2) | (e2 >> 4); + o += String.fromCharCode(c1); + + e3 = map.indexOf(input.charAt(i++)); + c2 = ((e2 & 15) << 4) | (e3 >> 2); + if (e3 !== 64) { o += String.fromCharCode(c2); } + + e4 = map.indexOf(input.charAt(i++)); + c3 = ((e3 & 3) << 6) | e4; + if (e4 !== 64) { o += String.fromCharCode(c3); } + } + return o; + } + }; +})(); diff --git a/bits/05_buf.js b/bits/05_buf.js new file mode 100644 index 0000000..01a3eba --- /dev/null +++ b/bits/05_buf.js @@ -0,0 +1,15 @@ +var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' && typeof process.versions !== 'undefined' && process.versions.node); + +function new_raw_buf(len/*:number*/) { + /* jshint -W056 */ + // $FlowIgnore + return new (has_buf ? Buffer : Array)(len); + /* jshint +W056 */ +} + +var s2a = function s2a(s/*:string*/) { + if(has_buf) return new Buffer(s, "binary"); + return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); +}; + +var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; diff --git a/bits/08_blob.js b/bits/08_blob.js index c909c3d..60aaf76 100644 --- a/bits/08_blob.js +++ b/bits/08_blob.js @@ -1,37 +1,6 @@ -var Base64 = (function(){ - var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; - return { - decode: function(input/*:string*/)/*:string*/ { - var o = ""; - var c1/*:number*/, c2/*:number*/, c3/*:number*/; - var e1/*:number*/, e2/*:number*/, e3/*:number*/, e4/*:number*/; - input = input.replace(/[^\w\+\/\=]/g, ""); - for(var i = 0; i < input.length;) { - e1 = map.indexOf(input.charAt(i++)); - e2 = map.indexOf(input.charAt(i++)); - c1 = (e1 << 2) | (e2 >> 4); - o += String.fromCharCode(c1); - - e3 = map.indexOf(input.charAt(i++)); - c2 = ((e2 & 15) << 4) | (e3 >> 2); - if (e3 !== 64) { o += String.fromCharCode(c2); } - - e4 = map.indexOf(input.charAt(i++)); - c3 = ((e3 & 3) << 6) | e4; - if (e4 !== 64) { o += String.fromCharCode(c3); } - } - return o; - } - }; -})(); - -var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; - -var s2a = function _s2a(s/*:string*/)/*:RawBytes*/ { return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); }; -var _s2a = s2a; var __toBuffer = function(bufs/*:Array >*/)/*:RawBytes*/ { var x = []; for(var i = 0; i < bufs[0].length; ++i) { x.push.apply(x, bufs[0][i]); } return x; }; var ___toBuffer = __toBuffer; -var __utf16le = function(b/*:RawBytes|CFBlob*/,s/*:number*/,e/*:number*/)/*:string*/ { var ss/*:Array*/=[]; for(var i=s; i*/=[]; for(var i=s; i*/=[]; for(var i=s; i*/)/*:RawBytes*/ { var bconcat = __bconcat; -if(typeof Buffer !== "undefined") { +if(has_buf/*:: && typeof Buffer !== 'undefined'*/) { __utf16le = function(b/*:RawBytes|CFBlob*/,s/*:number*/,e/*:number*/)/*:string*/ { if(!Buffer.isBuffer(b)/*:: || !(b instanceof Buffer)*/) return ___utf16le(b,s,e); - return b.toString('utf16le',s,e).replace(chr0,'').replace(chr1,'!'); + return b.toString('utf16le',s,e).replace(chr0,'')/*.replace(chr1,'!')*/; }; __hexlify = function(b/*:RawBytes|CFBlob*/,s/*:number*/,l/*:number*/)/*:string*/ { return Buffer.isBuffer(b)/*:: && b instanceof Buffer*/ ? b.toString('hex',s,s+l) : ___hexlify(b,s,l); }; __toBuffer = function(bufs/*:Array>*/)/*:RawBytes*/ { return (bufs[0].length > 0 && Buffer.isBuffer(bufs[0][0])) ? Buffer.concat((bufs[0]/*:any*/)) : ___toBuffer(bufs);}; @@ -72,7 +41,38 @@ function ReadShift(size/*:number*/, t/*:?string*/)/*:number|string*/ { case 4: oI = __readInt32LE(this, this.l); break; case 16: type = 2; oS = __hexlify(this, this.l, size); } - this.l+=size; if(type === 0) return oI; return oS; + this.l += size; if(type === 0) return oI; return oS; +} + +var __writeUInt32LE = function(b/*:RawBytes|CFBlob*/, val/*:number*/, idx/*:number*/)/*:void*/ { b[idx] = (val & 0xFF); b[idx+1] = ((val >>> 8) & 0xFF); b[idx+2] = ((val >>> 16) & 0xFF); b[idx+3] = ((val >>> 24) & 0xFF); }; +var __writeInt32LE = function(b/*:RawBytes|CFBlob*/, val/*:number*/, idx/*:number*/)/*:void*/ { b[idx] = (val & 0xFF); b[idx+1] = ((val >> 8) & 0xFF); b[idx+2] = ((val >> 16) & 0xFF); b[idx+3] = ((val >> 24) & 0xFF); }; + +function WriteShift(t/*:number*/, val/*:string|number*/, f/*:?string*/)/*:any*/ { + var size = 0, i = 0; + switch(f) { + case "hex": for(; i < t; ++i) { + /*:: if(typeof val !== "string") throw new Error("unreachable"); */ + this[this.l++] = parseInt(val.slice(2*i, 2*i+2), 16)||0; + } return this; + case "utf16le": + /*:: if(typeof val !== "string") throw new Error("unreachable"); */ + var end/*:number*/ = this.l + t; + for(i = 0; i < Math.min(val.length, t); ++i) { + var cc = val.charCodeAt(i); + this[this.l++] = cc & 0xff; + this[this.l++] = cc >> 8; + } + while(this.l < end) this[this.l++] = 0; + return this; + } + /*:: if(typeof val !== "number") throw new Error("unreachable"); */ + switch(t) { + case 1: size = 1; this[this.l] = val&0xFF; break; + case 2: size = 2; this[this.l] = val&0xFF; val >>>= 8; this[this.l+1] = val&0xFF; break; + case 4: size = 4; __writeUInt32LE(this, val, this.l); break; + case -4: size = 4; __writeInt32LE(this, val, this.l); break; + } + this.l += size; return this; } function CheckField(hexstr/*:string*/, fld/*:string*/)/*:void*/ { @@ -85,5 +85,12 @@ function prep_blob(blob/*:CFBlob*/, pos/*:number*/)/*:void*/ { blob.l = pos; blob.read_shift = /*::(*/ReadShift/*:: :any)*/; blob.chk = CheckField; + blob.write_shift = WriteShift; +} + +function new_buf(sz/*:number*/)/*:any*/ { + var o/*:CFBlob*/ = (new_raw_buf(sz)/*:any*/); + prep_blob(o, 0); + return o; } diff --git a/bits/31_version.js b/bits/31_version.js index c011f7f..026922b 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -exports.version = '0.12.1'; +exports.version = '0.13.0'; diff --git a/bits/33_sort.js b/bits/33_sort.js new file mode 100644 index 0000000..6c75763 --- /dev/null +++ b/bits/33_sort.js @@ -0,0 +1,9 @@ +/* [MS-CFB] 2.6.4 */ +function namecmp(l/*:string*/, r/*:string*/)/*:number*/ { + var L = l.split("/"), R = r.split("/"); + for(var i = 0, c = 0, Z = Math.min(L.length, R.length); i < Z; ++i) { + if((c = L[i].length - R[i].length)) return c; + if(L[i] != R[i]) return L[i] < R[i] ? -1 : 1; + } + return L.length - R.length; +} diff --git a/bits/35_path.js b/bits/35_path.js new file mode 100644 index 0000000..f100a09 --- /dev/null +++ b/bits/35_path.js @@ -0,0 +1,11 @@ +function dirname(p/*:string*/)/*:string*/ { + if(p.charAt(p.length - 1) == "/") return (p.slice(0,-1).indexOf("/") === -1) ? p : dirname(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(0, c+1); +} + +function filename(p/*:string*/)/*:string*/ { + if(p.charAt(p.length - 1) == "/") return filename(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(c+1); +} diff --git a/bits/40_parse.js b/bits/40_parse.js index 98eff26..961c10e 100644 --- a/bits/40_parse.js +++ b/bits/40_parse.js @@ -1,11 +1,11 @@ function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { -var mver = 3; // major version -var ssz = 512; // sector size +var mver = 3; +var ssz = 512; var nmfs = 0; // number of mini FAT sectors -var ndfs = 0; // number of DIFAT sectors -var dir_start = 0; // first directory sector location -var minifat_start = 0; // first mini FAT sector location -var difat_start = 0; // first mini FAT sector location +var difat_sec_cnt = 0; +var dir_start = 0; +var minifat_start = 0; +var difat_start = 0; var fat_addrs/*:Array*/ = []; // locations of FAT sectors @@ -29,11 +29,10 @@ var header/*:RawBytes*/ = file.slice(0,ssz); check_shifts(blob, mver); // Number of Directory Sectors -var nds/*:number*/ = blob.read_shift(4, 'i'); -if(mver === 3 && nds !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + nds); +var dir_cnt/*:number*/ = blob.read_shift(4, 'i'); +if(mver === 3 && dir_cnt !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + dir_cnt); // Number of FAT Sectors -//var nfs = blob.read_shift(4, 'i'); blob.l += 4; // First Directory Sector Location @@ -55,7 +54,7 @@ nmfs = blob.read_shift(4, 'i'); difat_start = blob.read_shift(4, 'i'); // Number of DIFAT Sectors -ndfs = blob.read_shift(4, 'i'); +difat_sec_cnt = blob.read_shift(4, 'i'); // Grab FAT Sector Locations for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ @@ -67,7 +66,7 @@ for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ /** Break the file up into sectors */ var sectors/*:Array*/ = sectorify(file, ssz); -sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs); +sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs); /** Chains */ var sector_list/*:SectorList*/ = make_sector_list(sectors, dir_start, fat_addrs, ssz); @@ -83,18 +82,16 @@ var files/*:CFBFiles*/ = {}, Paths/*:Array*/ = [], FileIndex/*:CFBFileIn read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex); build_full_paths(FileIndex, FullPathDir, FullPaths, Paths); +Paths.shift(); -var root_name/*:string*/ = Paths.shift(); - -/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ -var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name); - -return { - raw: {header: header, sectors: sectors}, +var o = { FileIndex: FileIndex, FullPaths: FullPaths, - FullPathDir: FullPathDir, - find: find_path + FullPathDir: FullPathDir }; + +// $FlowIgnore +if(options && options.raw) o.raw = {header: header, sectors: sectors}; +return o; } // parse diff --git a/bits/44_findpath.js b/bits/44_findpath.js deleted file mode 100644 index 910ade5..0000000 --- a/bits/44_findpath.js +++ /dev/null @@ -1,17 +0,0 @@ -/* [MS-CFB] 2.6.4 */ -function make_find_path(FullPaths/*:Array*/, Paths/*:Array*/, FileIndex/*:CFBFileIndex*/, files/*:CFBFiles*/, root_name/*:string*/)/*:CFBFindPath*/ { - var UCFullPaths/*:Array*/ = []; - var UCPaths/*:Array*/ = [], i = 0; - for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - return function find_path(path/*:string*/)/*:?CFBEntry*/ { - var k/*:boolean*/ = false; - if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; } - else k = path.indexOf("/") !== -1; - var UCPath/*:string*/ = path.toUpperCase().replace(chr0,'').replace(chr1,'!'); - var w/*:number*/ = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); - if(w === -1) return null; - return k === true ? FileIndex[w] : files[Paths[w]]; - }; -} - diff --git a/bits/45_readfat.js b/bits/45_readfat.js index 8866a40..3be6784 100644 --- a/bits/45_readfat.js +++ b/bits/45_readfat.js @@ -1,5 +1,5 @@ /** Chase down the rest of the DIFAT chain to build a comprehensive list - DIFAT chains by storing the next sector number as the last 32 bytes */ + DIFAT chains by storing the next sector number as the last 32 bits */ function sleuth_fat(idx/*:number*/, cnt/*:number*/, sectors/*:Array*/, ssz/*:number*/, fat_addrs)/*:void*/ { var q/*:number*/ = ENDOFCHAIN; if(idx === ENDOFCHAIN) { @@ -17,7 +17,6 @@ function sleuth_fat(idx/*:number*/, cnt/*:number*/, sectors/*:Array*/, /** Follow the linked list of sectors for a given starting point */ function get_sector_list(sectors/*:Array*/, start/*:number*/, fat_addrs/*:Array*/, ssz/*:number*/, chkd/*:?Array*/)/*:SectorEntry*/ { - var sl = sectors.length; var buf/*:Array*/ = [], buf_chain/*:Array*/ = []; if(!chkd) chkd = []; var modulus = ssz - 1, j = 0, jj = 0; diff --git a/bits/49_readutils.js b/bits/49_readutils.js index 49d55e6..42db2e5 100644 --- a/bits/49_readutils.js +++ b/bits/49_readutils.js @@ -1,12 +1,12 @@ var fs/*:: = require('fs'); */; -function readFileSync(filename/*:string*/, options/*:CFBReadOpts*/) { +function read_file(filename/*:string*/, options/*:CFBReadOpts*/) { if(fs == null) fs = require('fs'); return parse(fs.readFileSync(filename), options); } -function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { +function read(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { switch(options && options.type || "base64") { - case "file": /*:: if(typeof blob !== 'string') throw "Must pass a filename when type='file'"; */return readFileSync(blob, options); + case "file": /*:: if(typeof blob !== 'string') throw "Must pass a filename when type='file'"; */return read_file(blob, options); case "base64": /*:: if(typeof blob !== 'string') throw "Must pass a base64-encoded binary string when type='file'"; */return parse(s2a(Base64.decode(blob)), options); case "binary": /*:: if(typeof blob !== 'string') throw "Must pass a binary string when type='file'"; */return parse(s2a(blob), options); } diff --git a/bits/50_init.js b/bits/50_init.js new file mode 100644 index 0000000..828cc66 --- /dev/null +++ b/bits/50_init.js @@ -0,0 +1,12 @@ +function init_cfb(cfb/*:CFBContainer*/, opts/*:?any*/)/*:void*/ { + var o = opts || {}, root = o.root || "Root Entry"; + if(!cfb.FullPaths) cfb.FullPaths = []; + if(!cfb.FileIndex) cfb.FileIndex = []; + if(cfb.FullPaths.length !== cfb.FileIndex.length) throw new Error("inconsistent CFB structure"); + if(cfb.FullPaths.length === 0) { + cfb.FullPaths[0] = root + "/"; + cfb.FileIndex[0] = ({ name: root, type: 5 }/*:any*/); + } + if(o.CLSID) cfb.FileIndex[0].clsid = o.CLSID; + seed_cfb(cfb); +} diff --git a/bits/51_seed.js b/bits/51_seed.js new file mode 100644 index 0000000..4799dd3 --- /dev/null +++ b/bits/51_seed.js @@ -0,0 +1,8 @@ +function seed_cfb(cfb/*:CFBContainer*/)/*:void*/ { + var nm = "\u0001Sh33tJ5"; + if(CFB.find(cfb, "/" + nm)) return; + var p = new_buf(4); p[0] = 55; p[1] = p[3] = 50; p[2] = 54; + cfb.FileIndex.push(({ name: nm, type: 2, content:p, size:4, L:69, R:69, C:69 }/*:any*/)); + cfb.FullPaths.push(cfb.FullPaths[0] + nm); + rebuild_cfb(cfb); +} diff --git a/bits/54_rebuild.js b/bits/54_rebuild.js new file mode 100644 index 0000000..125900a --- /dev/null +++ b/bits/54_rebuild.js @@ -0,0 +1,2 @@ +function rebuild_cfb(cfb/*:CFBContainer*/, f/*:?boolean*/)/*:void*/ { + init_cfb(cfb); diff --git a/bits/55_check.js b/bits/55_check.js new file mode 100644 index 0000000..c6a5e08 --- /dev/null +++ b/bits/55_check.js @@ -0,0 +1,18 @@ + var gc = false, s = false; + for(var i = cfb.FullPaths.length - 1; i >= 0; --i) { + var _file = cfb.FileIndex[i]; + switch(_file.type) { + case 0: + if(s) gc = true; + else { cfb.FileIndex.pop(); cfb.FullPaths.pop(); } + break; + case 1: case 2: case 5: + s = true; + if(isNaN(_file.R * _file.L * _file.C)) gc = true; + if(_file.R > -1 && _file.L > -1 && _file.R == _file.L) gc = true; + break; + default: gc = true; break; + } + } + if(!gc && !f) return; + diff --git a/bits/56_dirtree.js b/bits/56_dirtree.js new file mode 100644 index 0000000..c14462b --- /dev/null +++ b/bits/56_dirtree.js @@ -0,0 +1,19 @@ + var now = new Date(), j = 0; + var data/*:Array<[string, CFBEntry]>*/ = []; + for(i = 0; i < cfb.FullPaths.length; ++i) { + if(cfb.FileIndex[i].type === 0) continue; + data.push([cfb.FullPaths[i], cfb.FileIndex[i]]); + } + for(i = 0; i < data.length; ++i) { + var dad = dirname(data[i][0]); + s = false; + for(j = 0; j < data.length; ++j) if(data[j][0] === dad) s = true; + if(!s) data.push([dad, ({ + name: filename(dad).replace("/",""), + type: 1, + clsid: HEADER_CLSID, + ct: now, mt: now, + content: null + }/*:any*/)]); + } + diff --git a/bits/57_resort.js b/bits/57_resort.js new file mode 100644 index 0000000..f47ffe4 --- /dev/null +++ b/bits/57_resort.js @@ -0,0 +1,3 @@ + data.sort(function(x,y) { return namecmp(x[0], y[0]); }); + cfb.FullPaths = []; cfb.FileIndex = []; + for(i = 0; i < data.length; ++i) { cfb.FullPaths[i] = data[i][0]; cfb.FileIndex[i] = data[i][1]; } diff --git a/bits/58_btree.js b/bits/58_btree.js new file mode 100644 index 0000000..b809474 --- /dev/null +++ b/bits/58_btree.js @@ -0,0 +1,25 @@ + for(i = 0; i < data.length; ++i) { + var elt = cfb.FileIndex[i]; + var nm = cfb.FullPaths[i]; + + elt.name = filename(nm).replace("/",""); + elt.L = elt.R = elt.C = -(elt.color = 1); + elt.size = elt.content ? elt.content.length : 0; + elt.start = 0; + elt.clsid = (elt.clsid || HEADER_CLSID); + if(i === 0) { + elt.C = data.length > 1 ? 1 : -1; + elt.size = 0; + elt.type = 5; + } else if(nm.slice(-1) == "/") { + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==nm) break; + elt.C = j >= data.length ? -1 : j; + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==dirname(nm)) break; + elt.R = j >= data.length ? -1 : j; + elt.type = 1; + } else { + if(dirname(cfb.FullPaths[i+1]||"") == dirname(nm)) elt.R = i + 1; + elt.type = 2; + } + } + diff --git a/bits/59_rebuild.js b/bits/59_rebuild.js new file mode 100644 index 0000000..7e9861c --- /dev/null +++ b/bits/59_rebuild.js @@ -0,0 +1,2 @@ +} + diff --git a/bits/60_writehead.js b/bits/60_writehead.js new file mode 100644 index 0000000..e5d0f99 --- /dev/null +++ b/bits/60_writehead.js @@ -0,0 +1,2 @@ +function _write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes*/ { + rebuild_cfb(cfb); diff --git a/bits/61_layout.js b/bits/61_layout.js new file mode 100644 index 0000000..c9bd3dd --- /dev/null +++ b/bits/61_layout.js @@ -0,0 +1,23 @@ + var L = (function(cfb/*:CFBContainer*/)/*:Array*/{ + var mini_size = 0, fat_size = 0; + for(var i = 0; i < cfb.FileIndex.length; ++i) { + var file = cfb.FileIndex[i]; + if(!file.content) continue; + /*:: if(file.content == null) throw new Error("unreachable"); */ + var flen = file.content.length; + if(flen === 0){} + else if(flen < 0x1000) mini_size += (flen + 0x3F) >> 6; + else fat_size += (flen + 0x01FF) >> 9; + } + var dir_cnt = (cfb.FullPaths.length +3) >> 2; + var mini_cnt = (mini_size + 7) >> 3; + var mfat_cnt = (mini_size + 0x7F) >> 7; + var fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt; + var fat_cnt = (fat_base + 0x7F) >> 7; + var difat_cnt = fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + while(((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt) difat_cnt = ++fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + var L = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]; + cfb.FileIndex[0].size = mini_size << 6; + L[7] = (cfb.FileIndex[0].start=L[0]+L[1]+L[2]+L[3]+L[4]+L[5])+((L[6]+7) >> 3); + return L; + })(cfb); diff --git a/bits/62_alloc.js b/bits/62_alloc.js new file mode 100644 index 0000000..b776192 --- /dev/null +++ b/bits/62_alloc.js @@ -0,0 +1,2 @@ + var o = new_buf(L[7] << 9); + var i = 0, T = 0; diff --git a/bits/63_header.js b/bits/63_header.js new file mode 100644 index 0000000..ddd8e67 --- /dev/null +++ b/bits/63_header.js @@ -0,0 +1,20 @@ + { + for(i = 0; i < 8; ++i) o.write_shift(1, HEADER_SIG[i]); + for(i = 0; i < 8; ++i) o.write_shift(2, 0); + o.write_shift(2, 0x003E); + o.write_shift(2, 0x0003); + o.write_shift(2, 0xFFFE); + o.write_shift(2, 0x0009); + o.write_shift(2, 0x0006); + for(i = 0; i < 3; ++i) o.write_shift(2, 0); + o.write_shift(4, 0); + o.write_shift(4, L[2]); + o.write_shift(4, L[0] + L[1] + L[2] + L[3] - 1); + o.write_shift(4, 0); + o.write_shift(4, 1<<12); + o.write_shift(4, L[3] ? L[0] + L[1] + L[2] - 1: ENDOFCHAIN); + o.write_shift(4, L[3]); + o.write_shift(-4, L[1] ? L[0] - 1: ENDOFCHAIN); + o.write_shift(4, L[1]); + for(i = 0; i < 109; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + } diff --git a/bits/64_difat.js b/bits/64_difat.js new file mode 100644 index 0000000..ed2cbf2 --- /dev/null +++ b/bits/64_difat.js @@ -0,0 +1,6 @@ + if(L[1]) { + for(T = 0; T < L[1]; ++T) { + for(; i < 236 + T * 127; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + o.write_shift(-4, T === L[1] - 1 ? ENDOFCHAIN : T + 1); + } + } diff --git a/bits/65_fat.js b/bits/65_fat.js new file mode 100644 index 0000000..9799d22 --- /dev/null +++ b/bits/65_fat.js @@ -0,0 +1,33 @@ + var chainit = function(w/*:number*/)/*:void*/ { + for(T += w; i> 9); + } + chainit((L[6] + 7) >> 3); + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + T = i = 0; + for(j = 0; j < cfb.FileIndex.length; ++j) { + file = cfb.FileIndex[j]; + if(!file.content) continue; + /*:: if(file.content == null) throw new Error("unreachable"); */ + flen = file.content.length; + if(!flen || flen >= 0x1000) continue; + file.start = T; + chainit((flen + 0x3F) >> 6); + } + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); diff --git a/bits/66_dir.js b/bits/66_dir.js new file mode 100644 index 0000000..17b1c6d --- /dev/null +++ b/bits/66_dir.js @@ -0,0 +1,26 @@ + for(i = 0; i < L[4]<<2; ++i) { + var nm = cfb.FullPaths[i]; + if(!nm || nm.length === 0) { + for(j = 0; j < 17; ++j) o.write_shift(4, 0); + for(j = 0; j < 3; ++j) o.write_shift(4, -1); + for(j = 0; j < 12; ++j) o.write_shift(4, 0); + continue; + } + file = cfb.FileIndex[i]; + if(i === 0) file.start = file.size ? file.start - 1 : ENDOFCHAIN; + flen = 2*(file.name.length+1); + o.write_shift(64, file.name, "utf16le"); + o.write_shift(2, flen); + o.write_shift(1, file.type); + o.write_shift(1, file.color); + o.write_shift(-4, file.L); + o.write_shift(-4, file.R); + o.write_shift(-4, file.C); + if(!file.clsid) for(j = 0; j < 4; ++j) o.write_shift(4, 0); + else o.write_shift(16, file.clsid, "hex"); + o.write_shift(4, file.state || 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, file.start); + o.write_shift(4, file.size); o.write_shift(4, 0); + } diff --git a/bits/67_stream.js b/bits/67_stream.js new file mode 100644 index 0000000..cdb26ce --- /dev/null +++ b/bits/67_stream.js @@ -0,0 +1,9 @@ + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; + /*:: if(!file.content) throw new Error("unreachable"); */ + if(file.size >= 0x1000) { + o.l = (file.start+1) << 9; + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x1FF; ++j) o.write_shift(1, 0); + } + } diff --git a/bits/68_mini.js b/bits/68_mini.js new file mode 100644 index 0000000..faf920c --- /dev/null +++ b/bits/68_mini.js @@ -0,0 +1,9 @@ + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; + /*:: if(!file.content) throw new Error("unreachable"); */ + if(file.size > 0 && file.size < 0x1000) { + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x3F; ++j) o.write_shift(1, 0); + } + } + diff --git a/bits/69_writefoot.js b/bits/69_writefoot.js new file mode 100644 index 0000000..b780080 --- /dev/null +++ b/bits/69_writefoot.js @@ -0,0 +1,2 @@ + return o; +} diff --git a/bits/70_find.js b/bits/70_find.js index 0e362ea..93c6d9a 100644 --- a/bits/70_find.js +++ b/bits/70_find.js @@ -1,3 +1,19 @@ +/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { - return cfb.find(path); + //return cfb.find(path); + var UCFullPaths/*:Array*/ = cfb.FullPaths.map(function(x) { return x.toUpperCase(); }); + var UCPaths/*:Array*/ = UCFullPaths.map(function(x) { var y = x.split("/"); return y[y.length - (x.slice(-1) == "/" ? 2 : 1)]; }); + var k/*:boolean*/ = false; + if(path.charCodeAt(0) === 47 /* "/" */) { k = true; path = UCFullPaths[0].slice(0, -1) + path; } + else k = path.indexOf("/") !== -1; + var UCPath/*:string*/ = path.toUpperCase(); + var w/*:number*/ = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); + if(w !== -1) return cfb.FileIndex[w]; + + UCPath = UCPath.replace(chr0,'').replace(chr1,'!'); + for(w = 0; w < UCFullPaths.length; ++w) { + if(UCFullPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + if(UCPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + } + return null; } diff --git a/bits/75_consts.js b/bits/75_consts.js index 6b7429c..41e877c 100644 --- a/bits/75_consts.js +++ b/bits/75_consts.js @@ -5,6 +5,7 @@ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ var ENDOFCHAIN = -2; /* 2.2 Compound File Header */ var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1'; +var HEADER_SIG = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; var HEADER_CLSID = '00000000000000000000000000000000'; var consts = { /* 2.1 Compund File Sector Numbers and Types */ diff --git a/bits/77_writeutils.js b/bits/77_writeutils.js new file mode 100644 index 0000000..2b8382b --- /dev/null +++ b/bits/77_writeutils.js @@ -0,0 +1,21 @@ +function write_file(cfb/*:CFBContainer*/, filename/*:string*/, options/*:CFBWriteOpts*/)/*:void*/ { + var o = _write(cfb, options); + /*:: if(typeof Buffer == 'undefined' || !Buffer.isBuffer(o) || !(o instanceof Buffer)) throw new Error("unreachable"); */ + fs.writeFileSync(filename, o); +} + +function a2s(o/*:RawBytes*/)/*:string*/ { + var out = new Array(o.length); + for(var i = 0; i < o.length; ++i) out[i] = String.fromCharCode(o[i]); + return out.join(""); +} + +function write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes|string*/ { + var o = _write(cfb, options); + switch(options && options.type) { + case "file": fs.writeFileSync(options.filename, (o/*:any*/)); return o; + case "binary": return a2s(o); + case "base64": return Base64.encode(a2s(o)); + } + return o; +} diff --git a/bits/85_api.js b/bits/85_api.js new file mode 100644 index 0000000..8f449c0 --- /dev/null +++ b/bits/85_api.js @@ -0,0 +1,48 @@ +function cfb_new(opts/*:?any*/)/*:CFBContainer*/ { + var o/*:CFBContainer*/ = ({}/*:any*/); + init_cfb(o, opts); + return o; +} + +function cfb_add(cfb/*:CFBContainer*/, name/*:string*/, content/*:?RawBytes*/, opts/*:?any*/)/*:CFBEntry*/ { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(!file) { + file = ({name: filename(name)}/*:any*/); + cfb.FileIndex.push(file); + cfb.FullPaths.push(name); + CFB.utils.cfb_gc(cfb); + } + /*:: if(!file) throw new Error("unreachable"); */ + file.content = (content/*:any*/); + file.size = content ? content.length : 0; + if(opts) { + if(opts.CLSID) file.clsid = opts.CLSID; + } + return file; +} + +function cfb_del(cfb/*:CFBContainer*/, name/*:string*/)/*:boolean*/ { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex.splice(j, 1); + cfb.FullPaths.splice(j, 1); + return true; + } + return false; +} + +function cfb_mov(cfb/*:CFBContainer*/, old_name/*:string*/, new_name/*:string*/)/*:boolean*/ { + init_cfb(cfb); + var file = CFB.find(cfb, old_name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex[j].name = filename(new_name); + cfb.FullPaths[j] = new_name; + return true; + } + return false; +} + +function cfb_gc(cfb/*:CFBContainer*/)/*:void*/ { rebuild_cfb(cfb, true); } + diff --git a/bits/78_cfbexports.js b/bits/88_cfbexports.js similarity index 52% rename from bits/78_cfbexports.js rename to bits/88_cfbexports.js index b6e442b..5ce2829 100644 --- a/bits/78_cfbexports.js +++ b/bits/88_cfbexports.js @@ -1,7 +1,14 @@ exports.find = find; -exports.read = readSync; +exports.read = read; exports.parse = parse; +exports.write = write; +exports.writeFile = write_file; exports.utils = { + cfb_new: cfb_new, + cfb_add: cfb_add, + cfb_del: cfb_del, + cfb_mov: cfb_mov, + cfb_gc: cfb_gc, ReadShift: ReadShift, CheckField: CheckField, prep_blob: prep_blob, diff --git a/bits/79_cfbfooter.js b/bits/89_cfbfooter.js similarity index 100% rename from bits/79_cfbfooter.js rename to bits/89_cfbfooter.js diff --git a/cfb.flow.js b/cfb.flow.js index f91a4c4..c4e9a01 100644 --- a/cfb.flow.js +++ b/cfb.flow.js @@ -1,11 +1,33 @@ /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ /*jshint eqnull:true */ +/*exported CFB */ +/*global module, require:false, process:false, Buffer:false, Uint8Array:false */ -var Base64 = (function(){ +var Base64 = (function make_b64(){ var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; return { - decode: function(input/*:string*/)/*:string*/ { + encode: function(input/*:string*/)/*:string*/ { + var o = ""; + var c1/*:number*/, c2/*:number*/, c3/*:number*/; + var e1/*:number*/, e2/*:number*/, e3/*:number*/, e4/*:number*/; + for(var i = 0; i < input.length; ) { + c1 = input.charCodeAt(i++); + e1 = (c1 >> 2); + + c2 = input.charCodeAt(i++); + e2 = ((c1 & 3) << 4) | (c2 >> 4); + + c3 = input.charCodeAt(i++); + e3 = ((c2 & 15) << 2) | (c3 >> 6); + e4 = (c3 & 63); + if (isNaN(c2)) { e3 = e4 = 64; } + else if (isNaN(c3)) { e4 = 64; } + o += map.charAt(e1) + map.charAt(e2) + map.charAt(e3) + map.charAt(e4); + } + return o; + }, + decode: function b64_decode(input/*:string*/)/*:string*/ { var o = ""; var c1/*:number*/, c2/*:number*/, c3/*:number*/; var e1/*:number*/, e2/*:number*/, e3/*:number*/, e4/*:number*/; @@ -28,14 +50,24 @@ var Base64 = (function(){ } }; })(); +var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' && typeof process.versions !== 'undefined' && process.versions.node); + +function new_raw_buf(len/*:number*/) { + /* jshint -W056 */ + // $FlowIgnore + return new (has_buf ? Buffer : Array)(len); + /* jshint +W056 */ +} + +var s2a = function s2a(s/*:string*/) { + if(has_buf) return new Buffer(s, "binary"); + return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); +}; var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; - -var s2a = function _s2a(s/*:string*/)/*:RawBytes*/ { return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); }; -var _s2a = s2a; var __toBuffer = function(bufs/*:Array >*/)/*:RawBytes*/ { var x = []; for(var i = 0; i < bufs[0].length; ++i) { x.push.apply(x, bufs[0][i]); } return x; }; var ___toBuffer = __toBuffer; -var __utf16le = function(b/*:RawBytes|CFBlob*/,s/*:number*/,e/*:number*/)/*:string*/ { var ss/*:Array*/=[]; for(var i=s; i*/=[]; for(var i=s; i*/=[]; for(var i=s; i*/)/*:RawBytes*/ { var bconcat = __bconcat; -if(typeof Buffer !== "undefined") { +if(has_buf/*:: && typeof Buffer !== 'undefined'*/) { __utf16le = function(b/*:RawBytes|CFBlob*/,s/*:number*/,e/*:number*/)/*:string*/ { if(!Buffer.isBuffer(b)/*:: || !(b instanceof Buffer)*/) return ___utf16le(b,s,e); - return b.toString('utf16le',s,e).replace(chr0,'').replace(chr1,'!'); + return b.toString('utf16le',s,e).replace(chr0,'')/*.replace(chr1,'!')*/; }; __hexlify = function(b/*:RawBytes|CFBlob*/,s/*:number*/,l/*:number*/)/*:string*/ { return Buffer.isBuffer(b)/*:: && b instanceof Buffer*/ ? b.toString('hex',s,s+l) : ___hexlify(b,s,l); }; __toBuffer = function(bufs/*:Array>*/)/*:RawBytes*/ { return (bufs[0].length > 0 && Buffer.isBuffer(bufs[0][0])) ? Buffer.concat((bufs[0]/*:any*/)) : ___toBuffer(bufs);}; @@ -76,7 +108,38 @@ function ReadShift(size/*:number*/, t/*:?string*/)/*:number|string*/ { case 4: oI = __readInt32LE(this, this.l); break; case 16: type = 2; oS = __hexlify(this, this.l, size); } - this.l+=size; if(type === 0) return oI; return oS; + this.l += size; if(type === 0) return oI; return oS; +} + +var __writeUInt32LE = function(b/*:RawBytes|CFBlob*/, val/*:number*/, idx/*:number*/)/*:void*/ { b[idx] = (val & 0xFF); b[idx+1] = ((val >>> 8) & 0xFF); b[idx+2] = ((val >>> 16) & 0xFF); b[idx+3] = ((val >>> 24) & 0xFF); }; +var __writeInt32LE = function(b/*:RawBytes|CFBlob*/, val/*:number*/, idx/*:number*/)/*:void*/ { b[idx] = (val & 0xFF); b[idx+1] = ((val >> 8) & 0xFF); b[idx+2] = ((val >> 16) & 0xFF); b[idx+3] = ((val >> 24) & 0xFF); }; + +function WriteShift(t/*:number*/, val/*:string|number*/, f/*:?string*/)/*:any*/ { + var size = 0, i = 0; + switch(f) { + case "hex": for(; i < t; ++i) { + /*:: if(typeof val !== "string") throw new Error("unreachable"); */ + this[this.l++] = parseInt(val.slice(2*i, 2*i+2), 16)||0; + } return this; + case "utf16le": + /*:: if(typeof val !== "string") throw new Error("unreachable"); */ + var end/*:number*/ = this.l + t; + for(i = 0; i < Math.min(val.length, t); ++i) { + var cc = val.charCodeAt(i); + this[this.l++] = cc & 0xff; + this[this.l++] = cc >> 8; + } + while(this.l < end) this[this.l++] = 0; + return this; + } + /*:: if(typeof val !== "number") throw new Error("unreachable"); */ + switch(t) { + case 1: size = 1; this[this.l] = val&0xFF; break; + case 2: size = 2; this[this.l] = val&0xFF; val >>>= 8; this[this.l+1] = val&0xFF; break; + case 4: size = 4; __writeUInt32LE(this, val, this.l); break; + case -4: size = 4; __writeInt32LE(this, val, this.l); break; + } + this.l += size; return this; } function CheckField(hexstr/*:string*/, fld/*:string*/)/*:void*/ { @@ -89,6 +152,13 @@ function prep_blob(blob/*:CFBlob*/, pos/*:number*/)/*:void*/ { blob.l = pos; blob.read_shift = /*::(*/ReadShift/*:: :any)*/; blob.chk = CheckField; + blob.write_shift = WriteShift; +} + +function new_buf(sz/*:number*/)/*:any*/ { + var o/*:CFBlob*/ = (new_raw_buf(sz)/*:any*/); + prep_blob(o, 0); + return o; } /*:: @@ -109,15 +179,35 @@ type CFBFiles = {[n:string]:CFBEntry}; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/; -exports.version = '0.12.1'; +exports.version = '0.13.0'; +/* [MS-CFB] 2.6.4 */ +function namecmp(l/*:string*/, r/*:string*/)/*:number*/ { + var L = l.split("/"), R = r.split("/"); + for(var i = 0, c = 0, Z = Math.min(L.length, R.length); i < Z; ++i) { + if((c = L[i].length - R[i].length)) return c; + if(L[i] != R[i]) return L[i] < R[i] ? -1 : 1; + } + return L.length - R.length; +} +function dirname(p/*:string*/)/*:string*/ { + if(p.charAt(p.length - 1) == "/") return (p.slice(0,-1).indexOf("/") === -1) ? p : dirname(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(0, c+1); +} + +function filename(p/*:string*/)/*:string*/ { + if(p.charAt(p.length - 1) == "/") return filename(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(c+1); +} function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { -var mver = 3; // major version -var ssz = 512; // sector size +var mver = 3; +var ssz = 512; var nmfs = 0; // number of mini FAT sectors -var ndfs = 0; // number of DIFAT sectors -var dir_start = 0; // first directory sector location -var minifat_start = 0; // first mini FAT sector location -var difat_start = 0; // first mini FAT sector location +var difat_sec_cnt = 0; +var dir_start = 0; +var minifat_start = 0; +var difat_start = 0; var fat_addrs/*:Array*/ = []; // locations of FAT sectors @@ -141,11 +231,10 @@ var header/*:RawBytes*/ = file.slice(0,ssz); check_shifts(blob, mver); // Number of Directory Sectors -var nds/*:number*/ = blob.read_shift(4, 'i'); -if(mver === 3 && nds !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + nds); +var dir_cnt/*:number*/ = blob.read_shift(4, 'i'); +if(mver === 3 && dir_cnt !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + dir_cnt); // Number of FAT Sectors -//var nfs = blob.read_shift(4, 'i'); blob.l += 4; // First Directory Sector Location @@ -167,7 +256,7 @@ nmfs = blob.read_shift(4, 'i'); difat_start = blob.read_shift(4, 'i'); // Number of DIFAT Sectors -ndfs = blob.read_shift(4, 'i'); +difat_sec_cnt = blob.read_shift(4, 'i'); // Grab FAT Sector Locations for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ @@ -179,7 +268,7 @@ for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ /** Break the file up into sectors */ var sectors/*:Array*/ = sectorify(file, ssz); -sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs); +sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs); /** Chains */ var sector_list/*:SectorList*/ = make_sector_list(sectors, dir_start, fat_addrs, ssz); @@ -195,19 +284,17 @@ var files/*:CFBFiles*/ = {}, Paths/*:Array*/ = [], FileIndex/*:CFBFileIn read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex); build_full_paths(FileIndex, FullPathDir, FullPaths, Paths); +Paths.shift(); -var root_name/*:string*/ = Paths.shift(); - -/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ -var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name); - -return { - raw: {header: header, sectors: sectors}, +var o = { FileIndex: FileIndex, FullPaths: FullPaths, - FullPathDir: FullPathDir, - find: find_path + FullPathDir: FullPathDir }; + +// $FlowIgnore +if(options && options.raw) o.raw = {header: header, sectors: sectors}; +return o; } // parse /* [MS-CFB] 2.2 Compound File Header -- read up to major version */ @@ -294,25 +381,8 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra } } -/* [MS-CFB] 2.6.4 */ -function make_find_path(FullPaths/*:Array*/, Paths/*:Array*/, FileIndex/*:CFBFileIndex*/, files/*:CFBFiles*/, root_name/*:string*/)/*:CFBFindPath*/ { - var UCFullPaths/*:Array*/ = []; - var UCPaths/*:Array*/ = [], i = 0; - for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - return function find_path(path/*:string*/)/*:?CFBEntry*/ { - var k/*:boolean*/ = false; - if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; } - else k = path.indexOf("/") !== -1; - var UCPath/*:string*/ = path.toUpperCase().replace(chr0,'').replace(chr1,'!'); - var w/*:number*/ = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); - if(w === -1) return null; - return k === true ? FileIndex[w] : files[Paths[w]]; - }; -} - /** Chase down the rest of the DIFAT chain to build a comprehensive list - DIFAT chains by storing the next sector number as the last 32 bytes */ + DIFAT chains by storing the next sector number as the last 32 bits */ function sleuth_fat(idx/*:number*/, cnt/*:number*/, sectors/*:Array*/, ssz/*:number*/, fat_addrs)/*:void*/ { var q/*:number*/ = ENDOFCHAIN; if(idx === ENDOFCHAIN) { @@ -330,7 +400,6 @@ function sleuth_fat(idx/*:number*/, cnt/*:number*/, sectors/*:Array*/, /** Follow the linked list of sectors for a given starting point */ function get_sector_list(sectors/*:Array*/, start/*:number*/, fat_addrs/*:Array*/, ssz/*:number*/, chkd/*:?Array*/)/*:SectorEntry*/ { - var sl = sectors.length; var buf/*:Array*/ = [], buf_chain/*:Array*/ = []; if(!chkd) chkd = []; var modulus = ssz - 1, j = 0, jj = 0; @@ -429,22 +498,259 @@ function read_date(blob/*:RawBytes|CFBlob*/, offset/*:number*/)/*:Date*/ { } var fs/*:: = require('fs'); */; -function readFileSync(filename/*:string*/, options/*:CFBReadOpts*/) { +function read_file(filename/*:string*/, options/*:CFBReadOpts*/) { if(fs == null) fs = require('fs'); return parse(fs.readFileSync(filename), options); } -function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { +function read(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { switch(options && options.type || "base64") { - case "file": /*:: if(typeof blob !== 'string') throw "Must pass a filename when type='file'"; */return readFileSync(blob, options); + case "file": /*:: if(typeof blob !== 'string') throw "Must pass a filename when type='file'"; */return read_file(blob, options); case "base64": /*:: if(typeof blob !== 'string') throw "Must pass a base64-encoded binary string when type='file'"; */return parse(s2a(Base64.decode(blob)), options); case "binary": /*:: if(typeof blob !== 'string') throw "Must pass a binary string when type='file'"; */return parse(s2a(blob), options); } return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options); } +function init_cfb(cfb/*:CFBContainer*/, opts/*:?any*/)/*:void*/ { + var o = opts || {}, root = o.root || "Root Entry"; + if(!cfb.FullPaths) cfb.FullPaths = []; + if(!cfb.FileIndex) cfb.FileIndex = []; + if(cfb.FullPaths.length !== cfb.FileIndex.length) throw new Error("inconsistent CFB structure"); + if(cfb.FullPaths.length === 0) { + cfb.FullPaths[0] = root + "/"; + cfb.FileIndex[0] = ({ name: root, type: 5 }/*:any*/); + } + if(o.CLSID) cfb.FileIndex[0].clsid = o.CLSID; + seed_cfb(cfb); +} +function seed_cfb(cfb/*:CFBContainer*/)/*:void*/ { + var nm = "\u0001Sh33tJ5"; + if(CFB.find(cfb, "/" + nm)) return; + var p = new_buf(4); p[0] = 55; p[1] = p[3] = 50; p[2] = 54; + cfb.FileIndex.push(({ name: nm, type: 2, content:p, size:4, L:69, R:69, C:69 }/*:any*/)); + cfb.FullPaths.push(cfb.FullPaths[0] + nm); + rebuild_cfb(cfb); +} +function rebuild_cfb(cfb/*:CFBContainer*/, f/*:?boolean*/)/*:void*/ { + init_cfb(cfb); + var gc = false, s = false; + for(var i = cfb.FullPaths.length - 1; i >= 0; --i) { + var _file = cfb.FileIndex[i]; + switch(_file.type) { + case 0: + if(s) gc = true; + else { cfb.FileIndex.pop(); cfb.FullPaths.pop(); } + break; + case 1: case 2: case 5: + s = true; + if(isNaN(_file.R * _file.L * _file.C)) gc = true; + if(_file.R > -1 && _file.L > -1 && _file.R == _file.L) gc = true; + break; + default: gc = true; break; + } + } + if(!gc && !f) return; + + var now = new Date(), j = 0; + var data/*:Array<[string, CFBEntry]>*/ = []; + for(i = 0; i < cfb.FullPaths.length; ++i) { + if(cfb.FileIndex[i].type === 0) continue; + data.push([cfb.FullPaths[i], cfb.FileIndex[i]]); + } + for(i = 0; i < data.length; ++i) { + var dad = dirname(data[i][0]); + s = false; + for(j = 0; j < data.length; ++j) if(data[j][0] === dad) s = true; + if(!s) data.push([dad, ({ + name: filename(dad).replace("/",""), + type: 1, + clsid: HEADER_CLSID, + ct: now, mt: now, + content: null + }/*:any*/)]); + } + + data.sort(function(x,y) { return namecmp(x[0], y[0]); }); + cfb.FullPaths = []; cfb.FileIndex = []; + for(i = 0; i < data.length; ++i) { cfb.FullPaths[i] = data[i][0]; cfb.FileIndex[i] = data[i][1]; } + for(i = 0; i < data.length; ++i) { + var elt = cfb.FileIndex[i]; + var nm = cfb.FullPaths[i]; + + elt.name = filename(nm).replace("/",""); + elt.L = elt.R = elt.C = -(elt.color = 1); + elt.size = elt.content ? elt.content.length : 0; + elt.start = 0; + elt.clsid = (elt.clsid || HEADER_CLSID); + if(i === 0) { + elt.C = data.length > 1 ? 1 : -1; + elt.size = 0; + elt.type = 5; + } else if(nm.slice(-1) == "/") { + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==nm) break; + elt.C = j >= data.length ? -1 : j; + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==dirname(nm)) break; + elt.R = j >= data.length ? -1 : j; + elt.type = 1; + } else { + if(dirname(cfb.FullPaths[i+1]||"") == dirname(nm)) elt.R = i + 1; + elt.type = 2; + } + } + +} + +function _write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes*/ { + rebuild_cfb(cfb); + var L = (function(cfb/*:CFBContainer*/)/*:Array*/{ + var mini_size = 0, fat_size = 0; + for(var i = 0; i < cfb.FileIndex.length; ++i) { + var file = cfb.FileIndex[i]; + if(!file.content) continue; + /*:: if(file.content == null) throw new Error("unreachable"); */ + var flen = file.content.length; + if(flen === 0){} + else if(flen < 0x1000) mini_size += (flen + 0x3F) >> 6; + else fat_size += (flen + 0x01FF) >> 9; + } + var dir_cnt = (cfb.FullPaths.length +3) >> 2; + var mini_cnt = (mini_size + 7) >> 3; + var mfat_cnt = (mini_size + 0x7F) >> 7; + var fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt; + var fat_cnt = (fat_base + 0x7F) >> 7; + var difat_cnt = fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + while(((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt) difat_cnt = ++fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + var L = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]; + cfb.FileIndex[0].size = mini_size << 6; + L[7] = (cfb.FileIndex[0].start=L[0]+L[1]+L[2]+L[3]+L[4]+L[5])+((L[6]+7) >> 3); + return L; + })(cfb); + var o = new_buf(L[7] << 9); + var i = 0, T = 0; + { + for(i = 0; i < 8; ++i) o.write_shift(1, HEADER_SIG[i]); + for(i = 0; i < 8; ++i) o.write_shift(2, 0); + o.write_shift(2, 0x003E); + o.write_shift(2, 0x0003); + o.write_shift(2, 0xFFFE); + o.write_shift(2, 0x0009); + o.write_shift(2, 0x0006); + for(i = 0; i < 3; ++i) o.write_shift(2, 0); + o.write_shift(4, 0); + o.write_shift(4, L[2]); + o.write_shift(4, L[0] + L[1] + L[2] + L[3] - 1); + o.write_shift(4, 0); + o.write_shift(4, 1<<12); + o.write_shift(4, L[3] ? L[0] + L[1] + L[2] - 1: ENDOFCHAIN); + o.write_shift(4, L[3]); + o.write_shift(-4, L[1] ? L[0] - 1: ENDOFCHAIN); + o.write_shift(4, L[1]); + for(i = 0; i < 109; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + } + if(L[1]) { + for(T = 0; T < L[1]; ++T) { + for(; i < 236 + T * 127; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + o.write_shift(-4, T === L[1] - 1 ? ENDOFCHAIN : T + 1); + } + } + var chainit = function(w/*:number*/)/*:void*/ { + for(T += w; i> 9); + } + chainit((L[6] + 7) >> 3); + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + T = i = 0; + for(j = 0; j < cfb.FileIndex.length; ++j) { + file = cfb.FileIndex[j]; + if(!file.content) continue; + /*:: if(file.content == null) throw new Error("unreachable"); */ + flen = file.content.length; + if(!flen || flen >= 0x1000) continue; + file.start = T; + chainit((flen + 0x3F) >> 6); + } + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + for(i = 0; i < L[4]<<2; ++i) { + var nm = cfb.FullPaths[i]; + if(!nm || nm.length === 0) { + for(j = 0; j < 17; ++j) o.write_shift(4, 0); + for(j = 0; j < 3; ++j) o.write_shift(4, -1); + for(j = 0; j < 12; ++j) o.write_shift(4, 0); + continue; + } + file = cfb.FileIndex[i]; + if(i === 0) file.start = file.size ? file.start - 1 : ENDOFCHAIN; + flen = 2*(file.name.length+1); + o.write_shift(64, file.name, "utf16le"); + o.write_shift(2, flen); + o.write_shift(1, file.type); + o.write_shift(1, file.color); + o.write_shift(-4, file.L); + o.write_shift(-4, file.R); + o.write_shift(-4, file.C); + if(!file.clsid) for(j = 0; j < 4; ++j) o.write_shift(4, 0); + else o.write_shift(16, file.clsid, "hex"); + o.write_shift(4, file.state || 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, file.start); + o.write_shift(4, file.size); o.write_shift(4, 0); + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; + /*:: if(!file.content) throw new Error("unreachable"); */ + if(file.size >= 0x1000) { + o.l = (file.start+1) << 9; + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x1FF; ++j) o.write_shift(1, 0); + } + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; + /*:: if(!file.content) throw new Error("unreachable"); */ + if(file.size > 0 && file.size < 0x1000) { + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x3F; ++j) o.write_shift(1, 0); + } + } + + return o; +} +/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { - return cfb.find(path); + //return cfb.find(path); + var UCFullPaths/*:Array*/ = cfb.FullPaths.map(function(x) { return x.toUpperCase(); }); + var UCPaths/*:Array*/ = UCFullPaths.map(function(x) { var y = x.split("/"); return y[y.length - (x.slice(-1) == "/" ? 2 : 1)]; }); + var k/*:boolean*/ = false; + if(path.charCodeAt(0) === 47 /* "/" */) { k = true; path = UCFullPaths[0].slice(0, -1) + path; } + else k = path.indexOf("/") !== -1; + var UCPath/*:string*/ = path.toUpperCase(); + var w/*:number*/ = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); + if(w !== -1) return cfb.FileIndex[w]; + + UCPath = UCPath.replace(chr0,'').replace(chr1,'!'); + for(w = 0; w < UCFullPaths.length; ++w) { + if(UCFullPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + if(UCPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + } + return null; } /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ @@ -453,6 +759,7 @@ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ var ENDOFCHAIN = -2; /* 2.2 Compound File Header */ var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1'; +var HEADER_SIG = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; var HEADER_CLSID = '00000000000000000000000000000000'; var consts = { /* 2.1 Compund File Sector Numbers and Types */ @@ -471,10 +778,86 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +function write_file(cfb/*:CFBContainer*/, filename/*:string*/, options/*:CFBWriteOpts*/)/*:void*/ { + var o = _write(cfb, options); + /*:: if(typeof Buffer == 'undefined' || !Buffer.isBuffer(o) || !(o instanceof Buffer)) throw new Error("unreachable"); */ + fs.writeFileSync(filename, o); +} + +function a2s(o/*:RawBytes*/)/*:string*/ { + var out = new Array(o.length); + for(var i = 0; i < o.length; ++i) out[i] = String.fromCharCode(o[i]); + return out.join(""); +} + +function write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes|string*/ { + var o = _write(cfb, options); + switch(options && options.type) { + case "file": fs.writeFileSync(options.filename, (o/*:any*/)); return o; + case "binary": return a2s(o); + case "base64": return Base64.encode(a2s(o)); + } + return o; +} +function cfb_new(opts/*:?any*/)/*:CFBContainer*/ { + var o/*:CFBContainer*/ = ({}/*:any*/); + init_cfb(o, opts); + return o; +} + +function cfb_add(cfb/*:CFBContainer*/, name/*:string*/, content/*:?RawBytes*/, opts/*:?any*/)/*:CFBEntry*/ { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(!file) { + file = ({name: filename(name)}/*:any*/); + cfb.FileIndex.push(file); + cfb.FullPaths.push(name); + CFB.utils.cfb_gc(cfb); + } + /*:: if(!file) throw new Error("unreachable"); */ + file.content = (content/*:any*/); + file.size = content ? content.length : 0; + if(opts) { + if(opts.CLSID) file.clsid = opts.CLSID; + } + return file; +} + +function cfb_del(cfb/*:CFBContainer*/, name/*:string*/)/*:boolean*/ { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex.splice(j, 1); + cfb.FullPaths.splice(j, 1); + return true; + } + return false; +} + +function cfb_mov(cfb/*:CFBContainer*/, old_name/*:string*/, new_name/*:string*/)/*:boolean*/ { + init_cfb(cfb); + var file = CFB.find(cfb, old_name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex[j].name = filename(new_name); + cfb.FullPaths[j] = new_name; + return true; + } + return false; +} + +function cfb_gc(cfb/*:CFBContainer*/)/*:void*/ { rebuild_cfb(cfb, true); } + exports.find = find; -exports.read = readSync; +exports.read = read; exports.parse = parse; +exports.write = write; +exports.writeFile = write_file; exports.utils = { + cfb_new: cfb_new, + cfb_add: cfb_add, + cfb_del: cfb_del, + cfb_mov: cfb_mov, + cfb_gc: cfb_gc, ReadShift: ReadShift, CheckField: CheckField, prep_blob: prep_blob, diff --git a/cfb.js b/cfb.js index e728a62..85629d5 100644 --- a/cfb.js +++ b/cfb.js @@ -1,11 +1,33 @@ /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ /*jshint eqnull:true */ +/*exported CFB */ +/*global module, require:false, process:false, Buffer:false, Uint8Array:false */ -var Base64 = (function(){ +var Base64 = (function make_b64(){ var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; return { - decode: function(input) { + encode: function(input) { + var o = ""; + var c1, c2, c3; + var e1, e2, e3, e4; + for(var i = 0; i < input.length; ) { + c1 = input.charCodeAt(i++); + e1 = (c1 >> 2); + + c2 = input.charCodeAt(i++); + e2 = ((c1 & 3) << 4) | (c2 >> 4); + + c3 = input.charCodeAt(i++); + e3 = ((c2 & 15) << 2) | (c3 >> 6); + e4 = (c3 & 63); + if (isNaN(c2)) { e3 = e4 = 64; } + else if (isNaN(c3)) { e4 = 64; } + o += map.charAt(e1) + map.charAt(e2) + map.charAt(e3) + map.charAt(e4); + } + return o; + }, + decode: function b64_decode(input) { var o = ""; var c1, c2, c3; var e1, e2, e3, e4; @@ -28,14 +50,24 @@ var Base64 = (function(){ } }; })(); +var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' && typeof process.versions !== 'undefined' && process.versions.node); + +function new_raw_buf(len) { + /* jshint -W056 */ + // $FlowIgnore + return new (has_buf ? Buffer : Array)(len); + /* jshint +W056 */ +} + +var s2a = function s2a(s) { + if(has_buf) return new Buffer(s, "binary"); + return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); +}; var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; - -var s2a = function _s2a(s) { return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); }; -var _s2a = s2a; var __toBuffer = function(bufs) { var x = []; for(var i = 0; i < bufs[0].length; ++i) { x.push.apply(x, bufs[0][i]); } return x; }; var ___toBuffer = __toBuffer; -var __utf16le = function(b,s,e) { var ss=[]; for(var i=s; i 0 && Buffer.isBuffer(bufs[0][0])) ? Buffer.concat((bufs[0])) : ___toBuffer(bufs);}; @@ -76,7 +108,35 @@ function ReadShift(size, t) { case 4: oI = __readInt32LE(this, this.l); break; case 16: type = 2; oS = __hexlify(this, this.l, size); } - this.l+=size; if(type === 0) return oI; return oS; + this.l += size; if(type === 0) return oI; return oS; +} + +var __writeUInt32LE = function(b, val, idx) { b[idx] = (val & 0xFF); b[idx+1] = ((val >>> 8) & 0xFF); b[idx+2] = ((val >>> 16) & 0xFF); b[idx+3] = ((val >>> 24) & 0xFF); }; +var __writeInt32LE = function(b, val, idx) { b[idx] = (val & 0xFF); b[idx+1] = ((val >> 8) & 0xFF); b[idx+2] = ((val >> 16) & 0xFF); b[idx+3] = ((val >> 24) & 0xFF); }; + +function WriteShift(t, val, f) { + var size = 0, i = 0; + switch(f) { + case "hex": for(; i < t; ++i) { +this[this.l++] = parseInt(val.slice(2*i, 2*i+2), 16)||0; + } return this; + case "utf16le": +var end = this.l + t; + for(i = 0; i < Math.min(val.length, t); ++i) { + var cc = val.charCodeAt(i); + this[this.l++] = cc & 0xff; + this[this.l++] = cc >> 8; + } + while(this.l < end) this[this.l++] = 0; + return this; + } +switch(t) { + case 1: size = 1; this[this.l] = val&0xFF; break; + case 2: size = 2; this[this.l] = val&0xFF; val >>>= 8; this[this.l+1] = val&0xFF; break; + case 4: size = 4; __writeUInt32LE(this, val, this.l); break; + case -4: size = 4; __writeInt32LE(this, val, this.l); break; + } + this.l += size; return this; } function CheckField(hexstr, fld) { @@ -89,20 +149,47 @@ function prep_blob(blob, pos) { blob.l = pos; blob.read_shift = ReadShift; blob.chk = CheckField; + blob.write_shift = WriteShift; +} + +function new_buf(sz) { + var o = (new_raw_buf(sz)); + prep_blob(o, 0); + return o; } /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports = {}; -exports.version = '0.12.1'; +exports.version = '0.13.0'; +/* [MS-CFB] 2.6.4 */ +function namecmp(l, r) { + var L = l.split("/"), R = r.split("/"); + for(var i = 0, c = 0, Z = Math.min(L.length, R.length); i < Z; ++i) { + if((c = L[i].length - R[i].length)) return c; + if(L[i] != R[i]) return L[i] < R[i] ? -1 : 1; + } + return L.length - R.length; +} +function dirname(p) { + if(p.charAt(p.length - 1) == "/") return (p.slice(0,-1).indexOf("/") === -1) ? p : dirname(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(0, c+1); +} + +function filename(p) { + if(p.charAt(p.length - 1) == "/") return filename(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(c+1); +} function parse(file, options) { -var mver = 3; // major version -var ssz = 512; // sector size +var mver = 3; +var ssz = 512; var nmfs = 0; // number of mini FAT sectors -var ndfs = 0; // number of DIFAT sectors -var dir_start = 0; // first directory sector location -var minifat_start = 0; // first mini FAT sector location -var difat_start = 0; // first mini FAT sector location +var difat_sec_cnt = 0; +var dir_start = 0; +var minifat_start = 0; +var difat_start = 0; var fat_addrs = []; // locations of FAT sectors @@ -126,11 +213,10 @@ var header = file.slice(0,ssz); check_shifts(blob, mver); // Number of Directory Sectors -var nds = blob.read_shift(4, 'i'); -if(mver === 3 && nds !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + nds); +var dir_cnt = blob.read_shift(4, 'i'); +if(mver === 3 && dir_cnt !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + dir_cnt); // Number of FAT Sectors -//var nfs = blob.read_shift(4, 'i'); blob.l += 4; // First Directory Sector Location @@ -152,7 +238,7 @@ nmfs = blob.read_shift(4, 'i'); difat_start = blob.read_shift(4, 'i'); // Number of DIFAT Sectors -ndfs = blob.read_shift(4, 'i'); +difat_sec_cnt = blob.read_shift(4, 'i'); // Grab FAT Sector Locations for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ @@ -164,7 +250,7 @@ for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ /** Break the file up into sectors */ var sectors = sectorify(file, ssz); -sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs); +sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs); /** Chains */ var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz); @@ -180,19 +266,17 @@ var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {}; read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex); build_full_paths(FileIndex, FullPathDir, FullPaths, Paths); +Paths.shift(); -var root_name = Paths.shift(); - -/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ -var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name); - -return { - raw: {header: header, sectors: sectors}, +var o = { FileIndex: FileIndex, FullPaths: FullPaths, - FullPathDir: FullPathDir, - find: find_path + FullPathDir: FullPathDir }; + +// $FlowIgnore +if(options && options.raw) o.raw = {header: header, sectors: sectors}; +return o; } // parse /* [MS-CFB] 2.2 Compound File Header -- read up to major version */ @@ -279,25 +363,8 @@ function build_full_paths(FI, FPD, FP, Paths) { } } -/* [MS-CFB] 2.6.4 */ -function make_find_path(FullPaths, Paths, FileIndex, files, root_name) { - var UCFullPaths = []; - var UCPaths = [], i = 0; - for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - return function find_path(path) { - var k = false; - if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; } - else k = path.indexOf("/") !== -1; - var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!'); - var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); - if(w === -1) return null; - return k === true ? FileIndex[w] : files[Paths[w]]; - }; -} - /** Chase down the rest of the DIFAT chain to build a comprehensive list - DIFAT chains by storing the next sector number as the last 32 bytes */ + DIFAT chains by storing the next sector number as the last 32 bits */ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { var q = ENDOFCHAIN; if(idx === ENDOFCHAIN) { @@ -315,7 +382,6 @@ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { /** Follow the linked list of sectors for a given starting point */ function get_sector_list(sectors, start, fat_addrs, ssz, chkd) { - var sl = sectors.length; var buf = [], buf_chain = []; if(!chkd) chkd = []; var modulus = ssz - 1, j = 0, jj = 0; @@ -414,22 +480,254 @@ function read_date(blob, offset) { } var fs; -function readFileSync(filename, options) { +function read_file(filename, options) { if(fs == null) fs = require('fs'); return parse(fs.readFileSync(filename), options); } -function readSync(blob, options) { +function read(blob, options) { switch(options && options.type || "base64") { - case "file": return readFileSync(blob, options); + case "file": return read_file(blob, options); case "base64": return parse(s2a(Base64.decode(blob)), options); case "binary": return parse(s2a(blob), options); } return parse(blob, options); } +function init_cfb(cfb, opts) { + var o = opts || {}, root = o.root || "Root Entry"; + if(!cfb.FullPaths) cfb.FullPaths = []; + if(!cfb.FileIndex) cfb.FileIndex = []; + if(cfb.FullPaths.length !== cfb.FileIndex.length) throw new Error("inconsistent CFB structure"); + if(cfb.FullPaths.length === 0) { + cfb.FullPaths[0] = root + "/"; + cfb.FileIndex[0] = ({ name: root, type: 5 }); + } + if(o.CLSID) cfb.FileIndex[0].clsid = o.CLSID; + seed_cfb(cfb); +} +function seed_cfb(cfb) { + var nm = "\u0001Sh33tJ5"; + if(CFB.find(cfb, "/" + nm)) return; + var p = new_buf(4); p[0] = 55; p[1] = p[3] = 50; p[2] = 54; + cfb.FileIndex.push(({ name: nm, type: 2, content:p, size:4, L:69, R:69, C:69 })); + cfb.FullPaths.push(cfb.FullPaths[0] + nm); + rebuild_cfb(cfb); +} +function rebuild_cfb(cfb, f) { + init_cfb(cfb); + var gc = false, s = false; + for(var i = cfb.FullPaths.length - 1; i >= 0; --i) { + var _file = cfb.FileIndex[i]; + switch(_file.type) { + case 0: + if(s) gc = true; + else { cfb.FileIndex.pop(); cfb.FullPaths.pop(); } + break; + case 1: case 2: case 5: + s = true; + if(isNaN(_file.R * _file.L * _file.C)) gc = true; + if(_file.R > -1 && _file.L > -1 && _file.R == _file.L) gc = true; + break; + default: gc = true; break; + } + } + if(!gc && !f) return; + + var now = new Date(), j = 0; + var data = []; + for(i = 0; i < cfb.FullPaths.length; ++i) { + if(cfb.FileIndex[i].type === 0) continue; + data.push([cfb.FullPaths[i], cfb.FileIndex[i]]); + } + for(i = 0; i < data.length; ++i) { + var dad = dirname(data[i][0]); + s = false; + for(j = 0; j < data.length; ++j) if(data[j][0] === dad) s = true; + if(!s) data.push([dad, ({ + name: filename(dad).replace("/",""), + type: 1, + clsid: HEADER_CLSID, + ct: now, mt: now, + content: null + })]); + } + + data.sort(function(x,y) { return namecmp(x[0], y[0]); }); + cfb.FullPaths = []; cfb.FileIndex = []; + for(i = 0; i < data.length; ++i) { cfb.FullPaths[i] = data[i][0]; cfb.FileIndex[i] = data[i][1]; } + for(i = 0; i < data.length; ++i) { + var elt = cfb.FileIndex[i]; + var nm = cfb.FullPaths[i]; + + elt.name = filename(nm).replace("/",""); + elt.L = elt.R = elt.C = -(elt.color = 1); + elt.size = elt.content ? elt.content.length : 0; + elt.start = 0; + elt.clsid = (elt.clsid || HEADER_CLSID); + if(i === 0) { + elt.C = data.length > 1 ? 1 : -1; + elt.size = 0; + elt.type = 5; + } else if(nm.slice(-1) == "/") { + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==nm) break; + elt.C = j >= data.length ? -1 : j; + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==dirname(nm)) break; + elt.R = j >= data.length ? -1 : j; + elt.type = 1; + } else { + if(dirname(cfb.FullPaths[i+1]||"") == dirname(nm)) elt.R = i + 1; + elt.type = 2; + } + } + +} + +function _write(cfb, options) { + rebuild_cfb(cfb); + var L = (function(cfb){ + var mini_size = 0, fat_size = 0; + for(var i = 0; i < cfb.FileIndex.length; ++i) { + var file = cfb.FileIndex[i]; + if(!file.content) continue; +var flen = file.content.length; + if(flen === 0){} + else if(flen < 0x1000) mini_size += (flen + 0x3F) >> 6; + else fat_size += (flen + 0x01FF) >> 9; + } + var dir_cnt = (cfb.FullPaths.length +3) >> 2; + var mini_cnt = (mini_size + 7) >> 3; + var mfat_cnt = (mini_size + 0x7F) >> 7; + var fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt; + var fat_cnt = (fat_base + 0x7F) >> 7; + var difat_cnt = fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + while(((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt) difat_cnt = ++fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + var L = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]; + cfb.FileIndex[0].size = mini_size << 6; + L[7] = (cfb.FileIndex[0].start=L[0]+L[1]+L[2]+L[3]+L[4]+L[5])+((L[6]+7) >> 3); + return L; + })(cfb); + var o = new_buf(L[7] << 9); + var i = 0, T = 0; + { + for(i = 0; i < 8; ++i) o.write_shift(1, HEADER_SIG[i]); + for(i = 0; i < 8; ++i) o.write_shift(2, 0); + o.write_shift(2, 0x003E); + o.write_shift(2, 0x0003); + o.write_shift(2, 0xFFFE); + o.write_shift(2, 0x0009); + o.write_shift(2, 0x0006); + for(i = 0; i < 3; ++i) o.write_shift(2, 0); + o.write_shift(4, 0); + o.write_shift(4, L[2]); + o.write_shift(4, L[0] + L[1] + L[2] + L[3] - 1); + o.write_shift(4, 0); + o.write_shift(4, 1<<12); + o.write_shift(4, L[3] ? L[0] + L[1] + L[2] - 1: ENDOFCHAIN); + o.write_shift(4, L[3]); + o.write_shift(-4, L[1] ? L[0] - 1: ENDOFCHAIN); + o.write_shift(4, L[1]); + for(i = 0; i < 109; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + } + if(L[1]) { + for(T = 0; T < L[1]; ++T) { + for(; i < 236 + T * 127; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + o.write_shift(-4, T === L[1] - 1 ? ENDOFCHAIN : T + 1); + } + } + var chainit = function(w) { + for(T += w; i> 9); + } + chainit((L[6] + 7) >> 3); + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + T = i = 0; + for(j = 0; j < cfb.FileIndex.length; ++j) { + file = cfb.FileIndex[j]; + if(!file.content) continue; +flen = file.content.length; + if(!flen || flen >= 0x1000) continue; + file.start = T; + chainit((flen + 0x3F) >> 6); + } + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + for(i = 0; i < L[4]<<2; ++i) { + var nm = cfb.FullPaths[i]; + if(!nm || nm.length === 0) { + for(j = 0; j < 17; ++j) o.write_shift(4, 0); + for(j = 0; j < 3; ++j) o.write_shift(4, -1); + for(j = 0; j < 12; ++j) o.write_shift(4, 0); + continue; + } + file = cfb.FileIndex[i]; + if(i === 0) file.start = file.size ? file.start - 1 : ENDOFCHAIN; + flen = 2*(file.name.length+1); + o.write_shift(64, file.name, "utf16le"); + o.write_shift(2, flen); + o.write_shift(1, file.type); + o.write_shift(1, file.color); + o.write_shift(-4, file.L); + o.write_shift(-4, file.R); + o.write_shift(-4, file.C); + if(!file.clsid) for(j = 0; j < 4; ++j) o.write_shift(4, 0); + else o.write_shift(16, file.clsid, "hex"); + o.write_shift(4, file.state || 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, file.start); + o.write_shift(4, file.size); o.write_shift(4, 0); + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size >= 0x1000) { + o.l = (file.start+1) << 9; + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x1FF; ++j) o.write_shift(1, 0); + } + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size > 0 && file.size < 0x1000) { + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x3F; ++j) o.write_shift(1, 0); + } + } + + return o; +} +/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ function find(cfb, path) { - return cfb.find(path); + //return cfb.find(path); + var UCFullPaths = cfb.FullPaths.map(function(x) { return x.toUpperCase(); }); + var UCPaths = UCFullPaths.map(function(x) { var y = x.split("/"); return y[y.length - (x.slice(-1) == "/" ? 2 : 1)]; }); + var k = false; + if(path.charCodeAt(0) === 47 /* "/" */) { k = true; path = UCFullPaths[0].slice(0, -1) + path; } + else k = path.indexOf("/") !== -1; + var UCPath = path.toUpperCase(); + var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); + if(w !== -1) return cfb.FileIndex[w]; + + UCPath = UCPath.replace(chr0,'').replace(chr1,'!'); + for(w = 0; w < UCFullPaths.length; ++w) { + if(UCFullPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + if(UCPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + } + return null; } /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ @@ -438,6 +736,7 @@ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ var ENDOFCHAIN = -2; /* 2.2 Compound File Header */ var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1'; +var HEADER_SIG = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; var HEADER_CLSID = '00000000000000000000000000000000'; var consts = { /* 2.1 Compund File Sector Numbers and Types */ @@ -456,10 +755,84 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +function write_file(cfb, filename, options) { + var o = _write(cfb, options); +fs.writeFileSync(filename, o); +} + +function a2s(o) { + var out = new Array(o.length); + for(var i = 0; i < o.length; ++i) out[i] = String.fromCharCode(o[i]); + return out.join(""); +} + +function write(cfb, options) { + var o = _write(cfb, options); + switch(options && options.type) { + case "file": fs.writeFileSync(options.filename, (o)); return o; + case "binary": return a2s(o); + case "base64": return Base64.encode(a2s(o)); + } + return o; +} +function cfb_new(opts) { + var o = ({}); + init_cfb(o, opts); + return o; +} + +function cfb_add(cfb, name, content, opts) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(!file) { + file = ({name: filename(name)}); + cfb.FileIndex.push(file); + cfb.FullPaths.push(name); + CFB.utils.cfb_gc(cfb); + } +file.content = (content); + file.size = content ? content.length : 0; + if(opts) { + if(opts.CLSID) file.clsid = opts.CLSID; + } + return file; +} + +function cfb_del(cfb, name) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex.splice(j, 1); + cfb.FullPaths.splice(j, 1); + return true; + } + return false; +} + +function cfb_mov(cfb, old_name, new_name) { + init_cfb(cfb); + var file = CFB.find(cfb, old_name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex[j].name = filename(new_name); + cfb.FullPaths[j] = new_name; + return true; + } + return false; +} + +function cfb_gc(cfb) { rebuild_cfb(cfb, true); } + exports.find = find; -exports.read = readSync; +exports.read = read; exports.parse = parse; +exports.write = write; +exports.writeFile = write_file; exports.utils = { + cfb_new: cfb_new, + cfb_add: cfb_add, + cfb_del: cfb_del, + cfb_mov: cfb_mov, + cfb_gc: cfb_gc, ReadShift: ReadShift, CheckField: CheckField, prep_blob: prep_blob, diff --git a/dist/cfb.js b/dist/cfb.js index 3df9d36..85629d5 100644 --- a/dist/cfb.js +++ b/dist/cfb.js @@ -1,11 +1,33 @@ /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ /*jshint eqnull:true */ +/*exported CFB */ +/*global module, require:false, process:false, Buffer:false, Uint8Array:false */ -var Base64 = (function(){ +var Base64 = (function make_b64(){ var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; return { - decode: function(input) { + encode: function(input) { + var o = ""; + var c1, c2, c3; + var e1, e2, e3, e4; + for(var i = 0; i < input.length; ) { + c1 = input.charCodeAt(i++); + e1 = (c1 >> 2); + + c2 = input.charCodeAt(i++); + e2 = ((c1 & 3) << 4) | (c2 >> 4); + + c3 = input.charCodeAt(i++); + e3 = ((c2 & 15) << 2) | (c3 >> 6); + e4 = (c3 & 63); + if (isNaN(c2)) { e3 = e4 = 64; } + else if (isNaN(c3)) { e4 = 64; } + o += map.charAt(e1) + map.charAt(e2) + map.charAt(e3) + map.charAt(e4); + } + return o; + }, + decode: function b64_decode(input) { var o = ""; var c1, c2, c3; var e1, e2, e3, e4; @@ -28,14 +50,24 @@ var Base64 = (function(){ } }; })(); +var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' && typeof process.versions !== 'undefined' && process.versions.node); + +function new_raw_buf(len) { + /* jshint -W056 */ + // $FlowIgnore + return new (has_buf ? Buffer : Array)(len); + /* jshint +W056 */ +} + +var s2a = function s2a(s) { + if(has_buf) return new Buffer(s, "binary"); + return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); +}; var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; - -var s2a = function _s2a(s) { return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); }; -var _s2a = s2a; var __toBuffer = function(bufs) { var x = []; for(var i = 0; i < bufs[0].length; ++i) { x.push.apply(x, bufs[0][i]); } return x; }; var ___toBuffer = __toBuffer; -var __utf16le = function(b,s,e) { var ss=[]; for(var i=s; i 0 && Buffer.isBuffer(bufs[0][0])) ? Buffer.concat((bufs[0])) : ___toBuffer(bufs);}; @@ -76,7 +108,35 @@ function ReadShift(size, t) { case 4: oI = __readInt32LE(this, this.l); break; case 16: type = 2; oS = __hexlify(this, this.l, size); } - this.l+=size; if(type === 0) return oI; return oS; + this.l += size; if(type === 0) return oI; return oS; +} + +var __writeUInt32LE = function(b, val, idx) { b[idx] = (val & 0xFF); b[idx+1] = ((val >>> 8) & 0xFF); b[idx+2] = ((val >>> 16) & 0xFF); b[idx+3] = ((val >>> 24) & 0xFF); }; +var __writeInt32LE = function(b, val, idx) { b[idx] = (val & 0xFF); b[idx+1] = ((val >> 8) & 0xFF); b[idx+2] = ((val >> 16) & 0xFF); b[idx+3] = ((val >> 24) & 0xFF); }; + +function WriteShift(t, val, f) { + var size = 0, i = 0; + switch(f) { + case "hex": for(; i < t; ++i) { +this[this.l++] = parseInt(val.slice(2*i, 2*i+2), 16)||0; + } return this; + case "utf16le": +var end = this.l + t; + for(i = 0; i < Math.min(val.length, t); ++i) { + var cc = val.charCodeAt(i); + this[this.l++] = cc & 0xff; + this[this.l++] = cc >> 8; + } + while(this.l < end) this[this.l++] = 0; + return this; + } +switch(t) { + case 1: size = 1; this[this.l] = val&0xFF; break; + case 2: size = 2; this[this.l] = val&0xFF; val >>>= 8; this[this.l+1] = val&0xFF; break; + case 4: size = 4; __writeUInt32LE(this, val, this.l); break; + case -4: size = 4; __writeInt32LE(this, val, this.l); break; + } + this.l += size; return this; } function CheckField(hexstr, fld) { @@ -89,20 +149,47 @@ function prep_blob(blob, pos) { blob.l = pos; blob.read_shift = ReadShift; blob.chk = CheckField; + blob.write_shift = WriteShift; +} + +function new_buf(sz) { + var o = (new_raw_buf(sz)); + prep_blob(o, 0); + return o; } /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports = {}; -exports.version = '0.12.0'; +exports.version = '0.13.0'; +/* [MS-CFB] 2.6.4 */ +function namecmp(l, r) { + var L = l.split("/"), R = r.split("/"); + for(var i = 0, c = 0, Z = Math.min(L.length, R.length); i < Z; ++i) { + if((c = L[i].length - R[i].length)) return c; + if(L[i] != R[i]) return L[i] < R[i] ? -1 : 1; + } + return L.length - R.length; +} +function dirname(p) { + if(p.charAt(p.length - 1) == "/") return (p.slice(0,-1).indexOf("/") === -1) ? p : dirname(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(0, c+1); +} + +function filename(p) { + if(p.charAt(p.length - 1) == "/") return filename(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(c+1); +} function parse(file, options) { -var mver = 3; // major version -var ssz = 512; // sector size +var mver = 3; +var ssz = 512; var nmfs = 0; // number of mini FAT sectors -var ndfs = 0; // number of DIFAT sectors -var dir_start = 0; // first directory sector location -var minifat_start = 0; // first mini FAT sector location -var difat_start = 0; // first mini FAT sector location +var difat_sec_cnt = 0; +var dir_start = 0; +var minifat_start = 0; +var difat_start = 0; var fat_addrs = []; // locations of FAT sectors @@ -126,11 +213,10 @@ var header = file.slice(0,ssz); check_shifts(blob, mver); // Number of Directory Sectors -var nds = blob.read_shift(4, 'i'); -if(mver === 3 && nds !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + nds); +var dir_cnt = blob.read_shift(4, 'i'); +if(mver === 3 && dir_cnt !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + dir_cnt); // Number of FAT Sectors -//var nfs = blob.read_shift(4, 'i'); blob.l += 4; // First Directory Sector Location @@ -152,7 +238,7 @@ nmfs = blob.read_shift(4, 'i'); difat_start = blob.read_shift(4, 'i'); // Number of DIFAT Sectors -ndfs = blob.read_shift(4, 'i'); +difat_sec_cnt = blob.read_shift(4, 'i'); // Grab FAT Sector Locations for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ @@ -164,7 +250,7 @@ for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ /** Break the file up into sectors */ var sectors = sectorify(file, ssz); -sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs); +sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs); /** Chains */ var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz); @@ -180,19 +266,17 @@ var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {}; read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex); build_full_paths(FileIndex, FullPathDir, FullPaths, Paths); +Paths.shift(); -var root_name = Paths.shift(); - -/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ -var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name); - -return { - raw: {header: header, sectors: sectors}, +var o = { FileIndex: FileIndex, FullPaths: FullPaths, - FullPathDir: FullPathDir, - find: find_path + FullPathDir: FullPathDir }; + +// $FlowIgnore +if(options && options.raw) o.raw = {header: header, sectors: sectors}; +return o; } // parse /* [MS-CFB] 2.2 Compound File Header -- read up to major version */ @@ -265,7 +349,7 @@ function build_full_paths(FI, FPD, FP, Paths) { if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -279,25 +363,8 @@ function build_full_paths(FI, FPD, FP, Paths) { } } -/* [MS-CFB] 2.6.4 */ -function make_find_path(FullPaths, Paths, FileIndex, files, root_name) { - var UCFullPaths = []; - var UCPaths = [], i = 0; - for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - return function find_path(path) { - var k = false; - if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; } - else k = path.indexOf("/") !== -1; - var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!'); - var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); - if(w === -1) return null; - return k === true ? FileIndex[w] : files[Paths[w]]; - }; -} - /** Chase down the rest of the DIFAT chain to build a comprehensive list - DIFAT chains by storing the next sector number as the last 32 bytes */ + DIFAT chains by storing the next sector number as the last 32 bits */ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { var q = ENDOFCHAIN; if(idx === ENDOFCHAIN) { @@ -315,7 +382,6 @@ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { /** Follow the linked list of sectors for a given starting point */ function get_sector_list(sectors, start, fat_addrs, ssz, chkd) { - var sl = sectors.length; var buf = [], buf_chain = []; if(!chkd) chkd = []; var modulus = ssz - 1, j = 0, jj = 0; @@ -366,7 +432,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil var blob = sector.slice(i, i+128); prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o = ({ @@ -387,6 +452,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -399,7 +465,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)); prep_blob(o.content, 0); } @@ -414,20 +480,255 @@ function read_date(blob, offset) { } var fs; -function readFileSync(filename, options) { +function read_file(filename, options) { if(fs == null) fs = require('fs'); return parse(fs.readFileSync(filename), options); } -function readSync(blob, options) { +function read(blob, options) { switch(options && options.type || "base64") { - case "file": return readFileSync(blob, options); + case "file": return read_file(blob, options); case "base64": return parse(s2a(Base64.decode(blob)), options); case "binary": return parse(s2a(blob), options); } return parse(blob, options); } +function init_cfb(cfb, opts) { + var o = opts || {}, root = o.root || "Root Entry"; + if(!cfb.FullPaths) cfb.FullPaths = []; + if(!cfb.FileIndex) cfb.FileIndex = []; + if(cfb.FullPaths.length !== cfb.FileIndex.length) throw new Error("inconsistent CFB structure"); + if(cfb.FullPaths.length === 0) { + cfb.FullPaths[0] = root + "/"; + cfb.FileIndex[0] = ({ name: root, type: 5 }); + } + if(o.CLSID) cfb.FileIndex[0].clsid = o.CLSID; + seed_cfb(cfb); +} +function seed_cfb(cfb) { + var nm = "\u0001Sh33tJ5"; + if(CFB.find(cfb, "/" + nm)) return; + var p = new_buf(4); p[0] = 55; p[1] = p[3] = 50; p[2] = 54; + cfb.FileIndex.push(({ name: nm, type: 2, content:p, size:4, L:69, R:69, C:69 })); + cfb.FullPaths.push(cfb.FullPaths[0] + nm); + rebuild_cfb(cfb); +} +function rebuild_cfb(cfb, f) { + init_cfb(cfb); + var gc = false, s = false; + for(var i = cfb.FullPaths.length - 1; i >= 0; --i) { + var _file = cfb.FileIndex[i]; + switch(_file.type) { + case 0: + if(s) gc = true; + else { cfb.FileIndex.pop(); cfb.FullPaths.pop(); } + break; + case 1: case 2: case 5: + s = true; + if(isNaN(_file.R * _file.L * _file.C)) gc = true; + if(_file.R > -1 && _file.L > -1 && _file.R == _file.L) gc = true; + break; + default: gc = true; break; + } + } + if(!gc && !f) return; + + var now = new Date(), j = 0; + var data = []; + for(i = 0; i < cfb.FullPaths.length; ++i) { + if(cfb.FileIndex[i].type === 0) continue; + data.push([cfb.FullPaths[i], cfb.FileIndex[i]]); + } + for(i = 0; i < data.length; ++i) { + var dad = dirname(data[i][0]); + s = false; + for(j = 0; j < data.length; ++j) if(data[j][0] === dad) s = true; + if(!s) data.push([dad, ({ + name: filename(dad).replace("/",""), + type: 1, + clsid: HEADER_CLSID, + ct: now, mt: now, + content: null + })]); + } + + data.sort(function(x,y) { return namecmp(x[0], y[0]); }); + cfb.FullPaths = []; cfb.FileIndex = []; + for(i = 0; i < data.length; ++i) { cfb.FullPaths[i] = data[i][0]; cfb.FileIndex[i] = data[i][1]; } + for(i = 0; i < data.length; ++i) { + var elt = cfb.FileIndex[i]; + var nm = cfb.FullPaths[i]; + + elt.name = filename(nm).replace("/",""); + elt.L = elt.R = elt.C = -(elt.color = 1); + elt.size = elt.content ? elt.content.length : 0; + elt.start = 0; + elt.clsid = (elt.clsid || HEADER_CLSID); + if(i === 0) { + elt.C = data.length > 1 ? 1 : -1; + elt.size = 0; + elt.type = 5; + } else if(nm.slice(-1) == "/") { + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==nm) break; + elt.C = j >= data.length ? -1 : j; + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==dirname(nm)) break; + elt.R = j >= data.length ? -1 : j; + elt.type = 1; + } else { + if(dirname(cfb.FullPaths[i+1]||"") == dirname(nm)) elt.R = i + 1; + elt.type = 2; + } + } + +} + +function _write(cfb, options) { + rebuild_cfb(cfb); + var L = (function(cfb){ + var mini_size = 0, fat_size = 0; + for(var i = 0; i < cfb.FileIndex.length; ++i) { + var file = cfb.FileIndex[i]; + if(!file.content) continue; +var flen = file.content.length; + if(flen === 0){} + else if(flen < 0x1000) mini_size += (flen + 0x3F) >> 6; + else fat_size += (flen + 0x01FF) >> 9; + } + var dir_cnt = (cfb.FullPaths.length +3) >> 2; + var mini_cnt = (mini_size + 7) >> 3; + var mfat_cnt = (mini_size + 0x7F) >> 7; + var fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt; + var fat_cnt = (fat_base + 0x7F) >> 7; + var difat_cnt = fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + while(((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt) difat_cnt = ++fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + var L = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]; + cfb.FileIndex[0].size = mini_size << 6; + L[7] = (cfb.FileIndex[0].start=L[0]+L[1]+L[2]+L[3]+L[4]+L[5])+((L[6]+7) >> 3); + return L; + })(cfb); + var o = new_buf(L[7] << 9); + var i = 0, T = 0; + { + for(i = 0; i < 8; ++i) o.write_shift(1, HEADER_SIG[i]); + for(i = 0; i < 8; ++i) o.write_shift(2, 0); + o.write_shift(2, 0x003E); + o.write_shift(2, 0x0003); + o.write_shift(2, 0xFFFE); + o.write_shift(2, 0x0009); + o.write_shift(2, 0x0006); + for(i = 0; i < 3; ++i) o.write_shift(2, 0); + o.write_shift(4, 0); + o.write_shift(4, L[2]); + o.write_shift(4, L[0] + L[1] + L[2] + L[3] - 1); + o.write_shift(4, 0); + o.write_shift(4, 1<<12); + o.write_shift(4, L[3] ? L[0] + L[1] + L[2] - 1: ENDOFCHAIN); + o.write_shift(4, L[3]); + o.write_shift(-4, L[1] ? L[0] - 1: ENDOFCHAIN); + o.write_shift(4, L[1]); + for(i = 0; i < 109; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + } + if(L[1]) { + for(T = 0; T < L[1]; ++T) { + for(; i < 236 + T * 127; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + o.write_shift(-4, T === L[1] - 1 ? ENDOFCHAIN : T + 1); + } + } + var chainit = function(w) { + for(T += w; i> 9); + } + chainit((L[6] + 7) >> 3); + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + T = i = 0; + for(j = 0; j < cfb.FileIndex.length; ++j) { + file = cfb.FileIndex[j]; + if(!file.content) continue; +flen = file.content.length; + if(!flen || flen >= 0x1000) continue; + file.start = T; + chainit((flen + 0x3F) >> 6); + } + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + for(i = 0; i < L[4]<<2; ++i) { + var nm = cfb.FullPaths[i]; + if(!nm || nm.length === 0) { + for(j = 0; j < 17; ++j) o.write_shift(4, 0); + for(j = 0; j < 3; ++j) o.write_shift(4, -1); + for(j = 0; j < 12; ++j) o.write_shift(4, 0); + continue; + } + file = cfb.FileIndex[i]; + if(i === 0) file.start = file.size ? file.start - 1 : ENDOFCHAIN; + flen = 2*(file.name.length+1); + o.write_shift(64, file.name, "utf16le"); + o.write_shift(2, flen); + o.write_shift(1, file.type); + o.write_shift(1, file.color); + o.write_shift(-4, file.L); + o.write_shift(-4, file.R); + o.write_shift(-4, file.C); + if(!file.clsid) for(j = 0; j < 4; ++j) o.write_shift(4, 0); + else o.write_shift(16, file.clsid, "hex"); + o.write_shift(4, file.state || 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, file.start); + o.write_shift(4, file.size); o.write_shift(4, 0); + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size >= 0x1000) { + o.l = (file.start+1) << 9; + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x1FF; ++j) o.write_shift(1, 0); + } + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size > 0 && file.size < 0x1000) { + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x3F; ++j) o.write_shift(1, 0); + } + } + + return o; +} +/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ +function find(cfb, path) { + //return cfb.find(path); + var UCFullPaths = cfb.FullPaths.map(function(x) { return x.toUpperCase(); }); + var UCPaths = UCFullPaths.map(function(x) { var y = x.split("/"); return y[y.length - (x.slice(-1) == "/" ? 2 : 1)]; }); + var k = false; + if(path.charCodeAt(0) === 47 /* "/" */) { k = true; path = UCFullPaths[0].slice(0, -1) + path; } + else k = path.indexOf("/") !== -1; + var UCPath = path.toUpperCase(); + var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); + if(w !== -1) return cfb.FileIndex[w]; + + UCPath = UCPath.replace(chr0,'').replace(chr1,'!'); + for(w = 0; w < UCFullPaths.length; ++w) { + if(UCFullPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + if(UCPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + } + return null; +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -435,6 +736,7 @@ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ var ENDOFCHAIN = -2; /* 2.2 Compound File Header */ var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1'; +var HEADER_SIG = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; var HEADER_CLSID = '00000000000000000000000000000000'; var consts = { /* 2.1 Compund File Sector Numbers and Types */ @@ -453,9 +755,84 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; -exports.read = readSync; +function write_file(cfb, filename, options) { + var o = _write(cfb, options); +fs.writeFileSync(filename, o); +} + +function a2s(o) { + var out = new Array(o.length); + for(var i = 0; i < o.length; ++i) out[i] = String.fromCharCode(o[i]); + return out.join(""); +} + +function write(cfb, options) { + var o = _write(cfb, options); + switch(options && options.type) { + case "file": fs.writeFileSync(options.filename, (o)); return o; + case "binary": return a2s(o); + case "base64": return Base64.encode(a2s(o)); + } + return o; +} +function cfb_new(opts) { + var o = ({}); + init_cfb(o, opts); + return o; +} + +function cfb_add(cfb, name, content, opts) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(!file) { + file = ({name: filename(name)}); + cfb.FileIndex.push(file); + cfb.FullPaths.push(name); + CFB.utils.cfb_gc(cfb); + } +file.content = (content); + file.size = content ? content.length : 0; + if(opts) { + if(opts.CLSID) file.clsid = opts.CLSID; + } + return file; +} + +function cfb_del(cfb, name) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex.splice(j, 1); + cfb.FullPaths.splice(j, 1); + return true; + } + return false; +} + +function cfb_mov(cfb, old_name, new_name) { + init_cfb(cfb); + var file = CFB.find(cfb, old_name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex[j].name = filename(new_name); + cfb.FullPaths[j] = new_name; + return true; + } + return false; +} + +function cfb_gc(cfb) { rebuild_cfb(cfb, true); } + +exports.find = find; +exports.read = read; exports.parse = parse; +exports.write = write; +exports.writeFile = write_file; exports.utils = { + cfb_new: cfb_new, + cfb_add: cfb_add, + cfb_del: cfb_del, + cfb_mov: cfb_mov, + cfb_gc: cfb_gc, ReadShift: ReadShift, CheckField: CheckField, prep_blob: prep_blob, diff --git a/dist/cfb.min.js b/dist/cfb.min.js index 7551704..cf950bd 100644 --- a/dist/cfb.min.js +++ b/dist/cfb.min.js @@ -1,2 +1,2 @@ /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ -var Base64=function(){var map="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";return{decode:function(input){var o="";var c1,c2,c3;var e1,e2,e3,e4;input=input.replace(/[^\w\+\/\=]/g,"");for(var i=0;i>4;o+=String.fromCharCode(c1);e3=map.indexOf(input.charAt(i++));c2=(e2&15)<<4|e3>>2;if(e3!==64){o+=String.fromCharCode(c2)}e4=map.indexOf(input.charAt(i++));c3=(e3&3)<<6|e4;if(e4!==64){o+=String.fromCharCode(c3)}}return o}}}();var chr0=/\u0000/g,chr1=/[\u0001-\u0006]/;var s2a=function _s2a(s){return s.split("").map(function(x){return x.charCodeAt(0)&255})};var _s2a=s2a;var __toBuffer=function(bufs){var x=[];for(var i=0;i0&&Buffer.isBuffer(bufs[0][0])?Buffer.concat(bufs[0]):___toBuffer(bufs)};s2a=function(s){return new Buffer(s,"binary")};bconcat=function(bufs){return Buffer.isBuffer(bufs[0])?Buffer.concat(bufs):__bconcat(bufs)}}var __readUInt8=function(b,idx){return b[idx]};var __readUInt16LE=function(b,idx){return b[idx+1]*(1<<8)+b[idx]};var __readInt16LE=function(b,idx){var u=b[idx+1]*(1<<8)+b[idx];return u<32768?u:(65535-u+1)*-1};var __readUInt32LE=function(b,idx){return b[idx+3]*(1<<24)+(b[idx+2]<<16)+(b[idx+1]<<8)+b[idx]};var __readInt32LE=function(b,idx){return(b[idx+3]<<24)+(b[idx+2]<<16)+(b[idx+1]<<8)+b[idx]};function ReadShift(size,t){var oI,oS,type=0;switch(size){case 1:oI=__readUInt8(this,this.l);break;case 2:oI=(t!=="i"?__readUInt16LE:__readInt16LE)(this,this.l);break;case 4:oI=__readInt32LE(this,this.l);break;case 16:type=2;oS=__hexlify(this,this.l,size);}this.l+=size;if(type===0)return oI;return oS}function CheckField(hexstr,fld){var m=__hexlify(this,this.l,hexstr.length>>1);if(m!==hexstr)throw new Error(fld+"Expected "+hexstr+" saw "+m);this.l+=hexstr.length>>1}function prep_blob(blob,pos){blob.l=pos;blob.read_shift=ReadShift;blob.chk=CheckField}var CFB=function _CFB(){var exports={};exports.version="0.12.0";function parse(file,options){var mver=3;var ssz=512;var nmfs=0;var ndfs=0;var dir_start=0;var minifat_start=0;var difat_start=0;var fat_addrs=[];var blob=file.slice(0,512);prep_blob(blob,0);var mv=check_get_mver(blob);mver=mv[0];switch(mver){case 3:ssz=512;break;case 4:ssz=4096;break;default:throw new Error("Major Version: Expected 3 or 4 saw "+mver);}if(ssz!==512){blob=file.slice(0,ssz);prep_blob(blob,28)}var header=file.slice(0,ssz);check_shifts(blob,mver);var nds=blob.read_shift(4,"i");if(mver===3&&nds!==0)throw new Error("# Directory Sectors: Expected 0 saw "+nds);blob.l+=4;dir_start=blob.read_shift(4,"i");blob.l+=4;blob.chk("00100000","Mini Stream Cutoff Size: ");minifat_start=blob.read_shift(4,"i");nmfs=blob.read_shift(4,"i");difat_start=blob.read_shift(4,"i");ndfs=blob.read_shift(4,"i");for(var q=-1,j=0;j<109;++j){q=blob.read_shift(4,"i");if(q<0)break;fat_addrs[j]=q}var sectors=sectorify(file,ssz);sleuth_fat(difat_start,ndfs,sectors,ssz,fat_addrs);var sector_list=make_sector_list(sectors,dir_start,fat_addrs,ssz);sector_list[dir_start].name="!Directory";if(nmfs>0&&minifat_start!==ENDOFCHAIN)sector_list[minifat_start].name="!MiniFAT";sector_list[fat_addrs[0]].name="!FAT";sector_list.fat_addrs=fat_addrs;sector_list.ssz=ssz;var files={},Paths=[],FileIndex=[],FullPaths=[],FullPathDir={};read_directory(dir_start,sector_list,sectors,Paths,nmfs,files,FileIndex);build_full_paths(FileIndex,FullPathDir,FullPaths,Paths);var root_name=Paths.shift();var find_path=make_find_path(FullPaths,Paths,FileIndex,files,root_name);return{raw:{header:header,sectors:sectors},FileIndex:FileIndex,FullPaths:FullPaths,FullPathDir:FullPathDir,find:find_path}}function check_get_mver(blob){blob.chk(HEADER_SIGNATURE,"Header Signature: ");blob.chk(HEADER_CLSID,"CLSID: ");var mver=blob.read_shift(2,"u");return[blob.read_shift(2,"u"),mver]}function check_shifts(blob,mver){var shift=9;blob.l+=2;switch(shift=blob.read_shift(2)){case 9:if(mver!=3)throw new Error("Sector Shift: Expected 9 saw "+shift);break;case 12:if(mver!=4)throw new Error("Sector Shift: Expected 12 saw "+shift);break;default:throw new Error("Sector Shift: Expected 9 or 12 saw "+shift);}blob.chk("0600","Mini Sector Shift: ");blob.chk("000000000000","Reserved: ")}function sectorify(file,ssz){var nsectors=Math.ceil(file.length/ssz)-1;var sectors=[];for(var i=1;i>>2)-1;if(!sector)return;for(var i=0;i=0;){chkd[j]=true;buf[buf.length]=j;buf_chain.push(sectors[j]);var addr=fat_addrs[Math.floor(j*4/ssz)];jj=j*4&modulus;if(ssz<4+jj)throw new Error("FAT boundary crossed: "+j+" 4 "+ssz);if(!sectors[addr])break;j=__readInt32LE(sectors[addr],jj)}return{nodes:buf,data:__toBuffer([buf_chain])}}function make_sector_list(sectors,dir_start,fat_addrs,ssz){var sl=sectors.length,sector_list=[];var chkd=[],buf=[],buf_chain=[];var modulus=ssz-1,i=0,j=0,k=0,jj=0;for(i=0;i=sl)k-=sl;if(chkd[k])continue;buf_chain=[];for(j=k;j>=0;){chkd[j]=true;buf[buf.length]=j;buf_chain.push(sectors[j]);var addr=fat_addrs[Math.floor(j*4/ssz)];jj=j*4&modulus;if(ssz<4+jj)throw new Error("FAT boundary crossed: "+j+" 4 "+ssz);if(!sectors[addr])break;j=__readInt32LE(sectors[addr],jj)}sector_list[k]={nodes:buf,data:__toBuffer([buf_chain])}}return sector_list}function read_directory(dir_start,sector_list,sectors,Paths,nmfs,files,FileIndex){var minifat_store=0,pl=Paths.length?2:0;var sector=sector_list[dir_start].data;var i=0,namelen=0,name;for(;i0&&minifat_store!==ENDOFCHAIN)sector_list[minifat_store].name="!StreamData"}else if(o.size>=4096){o.storage="fat";if(sector_list[o.start]===undefined)sector_list[o.start]=get_sector_list(sectors,o.start,sector_list.fat_addrs,sector_list.ssz);sector_list[o.start].name=o.name;o.content=sector_list[o.start].data.slice(0,o.size);prep_blob(o.content,0)}else{o.storage="minifat";if(minifat_store!==ENDOFCHAIN&&o.start!==ENDOFCHAIN){o.content=sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size);prep_blob(o.content,0)}}files[name]=o;FileIndex.push(o)}}function read_date(blob,offset){return new Date((__readUInt32LE(blob,offset+4)/1e7*Math.pow(2,32)+__readUInt32LE(blob,offset)/1e7-11644473600)*1e3)}var fs;function readFileSync(filename,options){if(fs==null)fs=require("fs");return parse(fs.readFileSync(filename),options)}function readSync(blob,options){switch(options&&options.type||"base64"){case"file":return readFileSync(blob,options);case"base64":return parse(s2a(Base64.decode(blob)),options);case"binary":return parse(s2a(blob),options);}return parse(blob,options)}var MSSZ=64;var ENDOFCHAIN=-2;var HEADER_SIGNATURE="d0cf11e0a1b11ae1";var HEADER_CLSID="00000000000000000000000000000000";var consts={MAXREGSECT:-6,DIFSECT:-4,FATSECT:-3,ENDOFCHAIN:ENDOFCHAIN,FREESECT:-1,HEADER_SIGNATURE:HEADER_SIGNATURE,HEADER_MINOR_VERSION:"3e00",MAXREGSID:-6,NOSTREAM:-1,HEADER_CLSID:HEADER_CLSID,EntryTypes:["unknown","storage","stream","lockbytes","property","root"]};exports.read=readSync;exports.parse=parse;exports.utils={ReadShift:ReadShift,CheckField:CheckField,prep_blob:prep_blob,bconcat:bconcat,consts:consts};return exports}();if(typeof require!=="undefined"&&typeof module!=="undefined"&&typeof DO_NOT_EXPORT_CFB==="undefined"){module.exports=CFB} +var Base64=function e(){var e="ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=";return{encode:function(r){var t="";var i,n,a;var f,s,h,l;for(var o=0;o>2;n=r.charCodeAt(o++);s=(i&3)<<4|n>>4;a=r.charCodeAt(o++);h=(n&15)<<2|a>>6;l=a&63;if(isNaN(n)){h=l=64}else if(isNaN(a)){l=64}t+=e.charAt(f)+e.charAt(s)+e.charAt(h)+e.charAt(l)}return t},decode:function r(t){var i="";var n,a,f;var s,h,l,o;t=t.replace(/[^\w\+\/\=]/g,"");for(var u=0;u>4;i+=String.fromCharCode(n);l=e.indexOf(t.charAt(u++));a=(h&15)<<4|l>>2;if(l!==64){i+=String.fromCharCode(a)}o=e.indexOf(t.charAt(u++));f=(l&3)<<6|o;if(o!==64){i+=String.fromCharCode(f)}}return i}}}();var has_buf=typeof Buffer!=="undefined"&&typeof process!=="undefined"&&typeof process.versions!=="undefined"&&process.versions.node;function new_raw_buf(e){return new(has_buf?Buffer:Array)(e)}var s2a=function r(e){if(has_buf)return new Buffer(e,"binary");return e.split("").map(function(e){return e.charCodeAt(0)&255})};var chr0=/\u0000/g,chr1=/[\u0001-\u0006]/;var __toBuffer=function(e){var r=[];for(var t=0;t0&&Buffer.isBuffer(e[0][0])?Buffer.concat(e[0]):___toBuffer(e)};s2a=function(e){return new Buffer(e,"binary")};bconcat=function(e){return Buffer.isBuffer(e[0])?Buffer.concat(e):__bconcat(e)}}var __readUInt8=function(e,r){return e[r]};var __readUInt16LE=function(e,r){return e[r+1]*(1<<8)+e[r]};var __readInt16LE=function(e,r){var t=e[r+1]*(1<<8)+e[r];return t<32768?t:(65535-t+1)*-1};var __readUInt32LE=function(e,r){return e[r+3]*(1<<24)+(e[r+2]<<16)+(e[r+1]<<8)+e[r]};var __readInt32LE=function(e,r){return(e[r+3]<<24)+(e[r+2]<<16)+(e[r+1]<<8)+e[r]};function ReadShift(e,r){var t,i,n=0;switch(e){case 1:t=__readUInt8(this,this.l);break;case 2:t=(r!=="i"?__readUInt16LE:__readInt16LE)(this,this.l);break;case 4:t=__readInt32LE(this,this.l);break;case 16:n=2;i=__hexlify(this,this.l,e);}this.l+=e;if(n===0)return t;return i}var __writeUInt32LE=function(e,r,t){e[t]=r&255;e[t+1]=r>>>8&255;e[t+2]=r>>>16&255;e[t+3]=r>>>24&255};var __writeInt32LE=function(e,r,t){e[t]=r&255;e[t+1]=r>>8&255;e[t+2]=r>>16&255;e[t+3]=r>>24&255};function WriteShift(e,r,t){var i=0,n=0;switch(t){case"hex":for(;n>8}while(this.l>>=8;this[this.l+1]=r&255;break;case 4:i=4;__writeUInt32LE(this,r,this.l);break;case-4:i=4;__writeInt32LE(this,r,this.l);break;}this.l+=i;return this}function CheckField(e,r){var t=__hexlify(this,this.l,e.length>>1);if(t!==e)throw new Error(r+"Expected "+e+" saw "+t);this.l+=e.length>>1}function prep_blob(e,r){e.l=r;e.read_shift=ReadShift;e.chk=CheckField;e.write_shift=WriteShift}function new_buf(e){var r=new_raw_buf(e);prep_blob(r,0);return r}var CFB=function t(){var e={};e.version="0.13.0";function r(e,r){var t=e.split("/"),i=r.split("/");for(var n=0,a=0,f=Math.min(t.length,i.length);n0&&d!==E)y[d].name="!MiniFAT";y[w[0]].name="!FAT";y.fat_addrs=w;y.ssz=i;var S={},A=[],m=[],B=[],k={};c(_,y,C,A,n,S,m);h(m,k,B,A);A.shift();var L={FileIndex:m,FullPaths:B,FullPathDir:k};if(r&&r.raw)L.raw={header:I,sectors:C};return L}function a(e){e.chk(C,"Header Signature: ");e.chk(S,"CLSID: ");var r=e.read_shift(2,"u");return[e.read_shift(2,"u"),r]}function f(e,r){var t=9;e.l+=2;switch(t=e.read_shift(2)){case 9:if(r!=3)throw new Error("Sector Shift: Expected 9 saw "+t);break;case 12:if(r!=4)throw new Error("Sector Shift: Expected 12 saw "+t);break;default:throw new Error("Sector Shift: Expected 9 or 12 saw "+t);}e.chk("0600","Mini Sector Shift: ");e.chk("000000000000","Reserved: ")}function s(e,r){var t=Math.ceil(e.length/r)-1;var i=[];for(var n=1;n>>2)-1;if(!f)return;for(var h=0;h=0;){n[h]=true;a[a.length]=h;f.push(e[h]);var o=t[Math.floor(h*4/i)];l=h*4&s;if(i<4+l)throw new Error("FAT boundary crossed: "+h+" 4 "+i);if(!e[o])break;h=__readInt32LE(e[o],l)}return{nodes:a,data:__toBuffer([f])}}function u(e,r,t,i){var n=e.length,a=[];var f=[],s=[],h=[];var l=i-1,o=0,u=0,c=0,_=0;for(o=0;o=n)c-=n;if(f[c])continue;h=[];for(u=c;u>=0;){f[u]=true;s[s.length]=u;h.push(e[u]);var d=t[Math.floor(u*4/i)];_=u*4&l;if(i<4+_)throw new Error("FAT boundary crossed: "+u+" 4 "+i);if(!e[d])break;u=__readInt32LE(e[d],_)}a[c]={nodes:s,data:__toBuffer([h])}}return a}function c(e,r,t,i,n,a,f){var s=0,h=i.length?2:0;var l=r[e].data;var u=0,c=0,d;for(;u0&&s!==E)r[s].name="!StreamData"}else if(w.size>=4096){w.storage="fat";if(r[w.start]===undefined)r[w.start]=o(t,w.start,r.fat_addrs,r.ssz);r[w.start].name=w.name;w.content=r[w.start].data.slice(0,w.size);prep_blob(w.content,0)}else{w.storage="minifat";if(s!==E&&w.start!==E&&r[s]){w.content=r[s].data.slice(w.start*x,w.start*x+w.size);prep_blob(w.content,0)}}a[d]=w;f.push(w)}}function _(e,r){return new Date((__readUInt32LE(e,r+4)/1e7*Math.pow(2,32)+__readUInt32LE(e,r)/1e7-11644473600)*1e3)}var d;function v(e,r){if(d==null)d=require("fs");return n(d.readFileSync(e),r)}function w(e,r){switch(r&&r.type||"base64"){case"file":return v(e,r);case"base64":return n(s2a(Base64.decode(e)),r);case"binary":return n(s2a(e),r);}return n(e,r)}function p(e,r){var t=r||{},i=t.root||"Root Entry";if(!e.FullPaths)e.FullPaths=[];if(!e.FileIndex)e.FileIndex=[];if(e.FullPaths.length!==e.FileIndex.length)throw new Error("inconsistent CFB structure");if(e.FullPaths.length===0){e.FullPaths[0]=i+"/";e.FileIndex[0]={name:i,type:5}}if(t.CLSID)e.FileIndex[0].clsid=t.CLSID;F(e)}function F(e){var r="Sh33tJ5";if(CFB.find(e,"/"+r))return;var t=new_buf(4);t[0]=55;t[1]=t[3]=50;t[2]=54;e.FileIndex.push({name:r,type:2,content:t,size:4,L:69,R:69,C:69});e.FullPaths.push(e.FullPaths[0]+r);I(e)}function I(e,n){p(e);var a=false,f=false;for(var s=e.FullPaths.length-1;s>=0;--s){var h=e.FileIndex[s];switch(h.type){case 0:if(f)a=true;else{e.FileIndex.pop();e.FullPaths.pop()}break;case 1:;case 2:;case 5:f=true;if(isNaN(h.R*h.L*h.C))a=true;if(h.R>-1&&h.L>-1&&h.R==h.L)a=true;break;default:a=true;break;}}if(!a&&!n)return;var l=new Date,o=0;var u=[];for(s=0;s1?1:-1;_.size=0;_.type=5}else if(d.slice(-1)=="/"){for(o=s+1;o=u.length?-1:o;for(o=s+1;o=u.length?-1:o;_.type=1}else{if(t(e.FullPaths[s+1]||"")==t(d))_.R=s+1;_.type=2}}}function g(e,r){I(e);var t=function(e){var r=0,t=0;for(var i=0;i>6;else t+=a+511>>9}var f=e.FullPaths.length+3>>2;var s=r+7>>3;var h=r+127>>7;var l=s+t+f+h;var o=l+127>>7;var u=o<=109?0:Math.ceil((o-109)/127);while(l+o+u+127>>7>o)u=++o<=109?0:Math.ceil((o-109)/127);var c=[1,u,o,h,f,t,r,0];e.FileIndex[0].size=r<<6;c[7]=(e.FileIndex[0].start=c[0]+c[1]+c[2]+c[3]+c[4]+c[5])+(c[6]+7>>3);return c}(e);var i=new_buf(t[7]<<9);var n=0,a=0;{for(n=0;n<8;++n)i.write_shift(1,y[n]);for(n=0;n<8;++n)i.write_shift(2,0);i.write_shift(2,62);i.write_shift(2,3);i.write_shift(2,65534);i.write_shift(2,9);i.write_shift(2,6);for(n=0;n<3;++n)i.write_shift(2,0);i.write_shift(4,0);i.write_shift(4,t[2]);i.write_shift(4,t[0]+t[1]+t[2]+t[3]-1);i.write_shift(4,0);i.write_shift(4,1<<12);i.write_shift(4,t[3]?t[0]+t[1]+t[2]-1:E);i.write_shift(4,t[3]);i.write_shift(-4,t[1]?t[0]-1:E);i.write_shift(4,t[1]);for(n=0;n<109;++n)i.write_shift(-4,n>9)}f(t[6]+7>>3);while(i.l&511)i.write_shift(-4,A.ENDOFCHAIN);a=n=0;for(s=0;s=4096)continue;l.start=a;f(h+63>>6)}while(i.l&511)i.write_shift(-4,A.ENDOFCHAIN);for(n=0;n=4096){i.l=l.start+1<<9;for(s=0;s0&&l.size<4096){for(s=0;s> 2); + + c2 = input.charCodeAt(i++); + e2 = ((c1 & 3) << 4) | (c2 >> 4); + + c3 = input.charCodeAt(i++); + e3 = ((c2 & 15) << 2) | (c3 >> 6); + e4 = (c3 & 63); + if (isNaN(c2)) { e3 = e4 = 64; } + else if (isNaN(c3)) { e4 = 64; } + o += map.charAt(e1) + map.charAt(e2) + map.charAt(e3) + map.charAt(e4); + } + return o; + }, + decode: function b64_decode(input) { + var o = ""; + var c1, c2, c3; + var e1, e2, e3, e4; + input = input.replace(/[^\w\+\/\=]/g, ""); + for(var i = 0; i < input.length;) { + e1 = map.indexOf(input.charAt(i++)); + e2 = map.indexOf(input.charAt(i++)); + c1 = (e1 << 2) | (e2 >> 4); + o += String.fromCharCode(c1); + + e3 = map.indexOf(input.charAt(i++)); + c2 = ((e2 & 15) << 4) | (e3 >> 2); + if (e3 !== 64) { o += String.fromCharCode(c2); } + + e4 = map.indexOf(input.charAt(i++)); + c3 = ((e3 & 3) << 6) | e4; + if (e4 !== 64) { o += String.fromCharCode(c3); } + } + return o; + } + }; +})(); +var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' && typeof process.versions !== 'undefined' && process.versions.node); + +function new_raw_buf(len) { + /* jshint -W056 */ + // $FlowIgnore + return new (has_buf ? Buffer : Array)(len); + /* jshint +W056 */ +} + +var s2a = function s2a(s) { + if(has_buf) return new Buffer(s, "binary"); + return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); +}; + +var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports = {}; -exports.version = '0.12.0'; +exports.version = '0.13.0'; +/* [MS-CFB] 2.6.4 */ +function namecmp(l, r) { + var L = l.split("/"), R = r.split("/"); + for(var i = 0, c = 0, Z = Math.min(L.length, R.length); i < Z; ++i) { + if((c = L[i].length - R[i].length)) return c; + if(L[i] != R[i]) return L[i] < R[i] ? -1 : 1; + } + return L.length - R.length; +} +function dirname(p) { + if(p.charAt(p.length - 1) == "/") return (p.slice(0,-1).indexOf("/") === -1) ? p : dirname(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(0, c+1); +} + +function filename(p) { + if(p.charAt(p.length - 1) == "/") return filename(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(c+1); +} function parse(file, options) { -var mver = 3; // major version -var ssz = 512; // sector size +var mver = 3; +var ssz = 512; var nmfs = 0; // number of mini FAT sectors -var ndfs = 0; // number of DIFAT sectors -var dir_start = 0; // first directory sector location -var minifat_start = 0; // first mini FAT sector location -var difat_start = 0; // first mini FAT sector location +var difat_sec_cnt = 0; +var dir_start = 0; +var minifat_start = 0; +var difat_start = 0; var fat_addrs = []; // locations of FAT sectors @@ -38,11 +121,10 @@ var header = file.slice(0,ssz); check_shifts(blob, mver); // Number of Directory Sectors -var nds = blob.read_shift(4, 'i'); -if(mver === 3 && nds !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + nds); +var dir_cnt = blob.read_shift(4, 'i'); +if(mver === 3 && dir_cnt !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + dir_cnt); // Number of FAT Sectors -//var nfs = blob.read_shift(4, 'i'); blob.l += 4; // First Directory Sector Location @@ -64,7 +146,7 @@ nmfs = blob.read_shift(4, 'i'); difat_start = blob.read_shift(4, 'i'); // Number of DIFAT Sectors -ndfs = blob.read_shift(4, 'i'); +difat_sec_cnt = blob.read_shift(4, 'i'); // Grab FAT Sector Locations for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ @@ -76,7 +158,7 @@ for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ /** Break the file up into sectors */ var sectors = sectorify(file, ssz); -sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs); +sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs); /** Chains */ var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz); @@ -92,19 +174,17 @@ var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {}; read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex); build_full_paths(FileIndex, FullPathDir, FullPaths, Paths); +Paths.shift(); -var root_name = Paths.shift(); - -/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ -var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name); - -return { - raw: {header: header, sectors: sectors}, +var o = { FileIndex: FileIndex, FullPaths: FullPaths, - FullPathDir: FullPathDir, - find: find_path + FullPathDir: FullPathDir }; + +// $FlowIgnore +if(options && options.raw) o.raw = {header: header, sectors: sectors}; +return o; } // parse /* [MS-CFB] 2.2 Compound File Header -- read up to major version */ @@ -177,7 +257,7 @@ function build_full_paths(FI, FPD, FP, Paths) { if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -191,25 +271,8 @@ function build_full_paths(FI, FPD, FP, Paths) { } } -/* [MS-CFB] 2.6.4 */ -function make_find_path(FullPaths, Paths, FileIndex, files, root_name) { - var UCFullPaths = []; - var UCPaths = [], i = 0; - for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - return function find_path(path) { - var k = false; - if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; } - else k = path.indexOf("/") !== -1; - var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!'); - var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); - if(w === -1) return null; - return k === true ? FileIndex[w] : files[Paths[w]]; - }; -} - /** Chase down the rest of the DIFAT chain to build a comprehensive list - DIFAT chains by storing the next sector number as the last 32 bytes */ + DIFAT chains by storing the next sector number as the last 32 bits */ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { var q = ENDOFCHAIN; if(idx === ENDOFCHAIN) { @@ -227,7 +290,6 @@ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { /** Follow the linked list of sectors for a given starting point */ function get_sector_list(sectors, start, fat_addrs, ssz, chkd) { - var sl = sectors.length; var buf = [], buf_chain = []; if(!chkd) chkd = []; var modulus = ssz - 1, j = 0, jj = 0; @@ -278,7 +340,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil var blob = sector.slice(i, i+128); prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o = ({ @@ -299,6 +360,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -311,7 +373,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)); prep_blob(o.content, 0); } @@ -326,20 +388,255 @@ function read_date(blob, offset) { } var fs; -function readFileSync(filename, options) { +function read_file(filename, options) { if(fs == null) fs = require('fs'); return parse(fs.readFileSync(filename), options); } -function readSync(blob, options) { +function read(blob, options) { switch(options && options.type || "base64") { - case "file": return readFileSync(blob, options); + case "file": return read_file(blob, options); case "base64": return parse(s2a(Base64.decode(blob)), options); case "binary": return parse(s2a(blob), options); } return parse(blob, options); } +function init_cfb(cfb, opts) { + var o = opts || {}, root = o.root || "Root Entry"; + if(!cfb.FullPaths) cfb.FullPaths = []; + if(!cfb.FileIndex) cfb.FileIndex = []; + if(cfb.FullPaths.length !== cfb.FileIndex.length) throw new Error("inconsistent CFB structure"); + if(cfb.FullPaths.length === 0) { + cfb.FullPaths[0] = root + "/"; + cfb.FileIndex[0] = ({ name: root, type: 5 }); + } + if(o.CLSID) cfb.FileIndex[0].clsid = o.CLSID; + seed_cfb(cfb); +} +function seed_cfb(cfb) { + var nm = "\u0001Sh33tJ5"; + if(CFB.find(cfb, "/" + nm)) return; + var p = new_buf(4); p[0] = 55; p[1] = p[3] = 50; p[2] = 54; + cfb.FileIndex.push(({ name: nm, type: 2, content:p, size:4, L:69, R:69, C:69 })); + cfb.FullPaths.push(cfb.FullPaths[0] + nm); + rebuild_cfb(cfb); +} +function rebuild_cfb(cfb, f) { + init_cfb(cfb); + var gc = false, s = false; + for(var i = cfb.FullPaths.length - 1; i >= 0; --i) { + var _file = cfb.FileIndex[i]; + switch(_file.type) { + case 0: + if(s) gc = true; + else { cfb.FileIndex.pop(); cfb.FullPaths.pop(); } + break; + case 1: case 2: case 5: + s = true; + if(isNaN(_file.R * _file.L * _file.C)) gc = true; + if(_file.R > -1 && _file.L > -1 && _file.R == _file.L) gc = true; + break; + default: gc = true; break; + } + } + if(!gc && !f) return; + + var now = new Date(), j = 0; + var data = []; + for(i = 0; i < cfb.FullPaths.length; ++i) { + if(cfb.FileIndex[i].type === 0) continue; + data.push([cfb.FullPaths[i], cfb.FileIndex[i]]); + } + for(i = 0; i < data.length; ++i) { + var dad = dirname(data[i][0]); + s = false; + for(j = 0; j < data.length; ++j) if(data[j][0] === dad) s = true; + if(!s) data.push([dad, ({ + name: filename(dad).replace("/",""), + type: 1, + clsid: HEADER_CLSID, + ct: now, mt: now, + content: null + })]); + } + + data.sort(function(x,y) { return namecmp(x[0], y[0]); }); + cfb.FullPaths = []; cfb.FileIndex = []; + for(i = 0; i < data.length; ++i) { cfb.FullPaths[i] = data[i][0]; cfb.FileIndex[i] = data[i][1]; } + for(i = 0; i < data.length; ++i) { + var elt = cfb.FileIndex[i]; + var nm = cfb.FullPaths[i]; + + elt.name = filename(nm).replace("/",""); + elt.L = elt.R = elt.C = -(elt.color = 1); + elt.size = elt.content ? elt.content.length : 0; + elt.start = 0; + elt.clsid = (elt.clsid || HEADER_CLSID); + if(i === 0) { + elt.C = data.length > 1 ? 1 : -1; + elt.size = 0; + elt.type = 5; + } else if(nm.slice(-1) == "/") { + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==nm) break; + elt.C = j >= data.length ? -1 : j; + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==dirname(nm)) break; + elt.R = j >= data.length ? -1 : j; + elt.type = 1; + } else { + if(dirname(cfb.FullPaths[i+1]||"") == dirname(nm)) elt.R = i + 1; + elt.type = 2; + } + } + +} + +function _write(cfb, options) { + rebuild_cfb(cfb); + var L = (function(cfb){ + var mini_size = 0, fat_size = 0; + for(var i = 0; i < cfb.FileIndex.length; ++i) { + var file = cfb.FileIndex[i]; + if(!file.content) continue; +var flen = file.content.length; + if(flen === 0){} + else if(flen < 0x1000) mini_size += (flen + 0x3F) >> 6; + else fat_size += (flen + 0x01FF) >> 9; + } + var dir_cnt = (cfb.FullPaths.length +3) >> 2; + var mini_cnt = (mini_size + 7) >> 3; + var mfat_cnt = (mini_size + 0x7F) >> 7; + var fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt; + var fat_cnt = (fat_base + 0x7F) >> 7; + var difat_cnt = fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + while(((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt) difat_cnt = ++fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + var L = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]; + cfb.FileIndex[0].size = mini_size << 6; + L[7] = (cfb.FileIndex[0].start=L[0]+L[1]+L[2]+L[3]+L[4]+L[5])+((L[6]+7) >> 3); + return L; + })(cfb); + var o = new_buf(L[7] << 9); + var i = 0, T = 0; + { + for(i = 0; i < 8; ++i) o.write_shift(1, HEADER_SIG[i]); + for(i = 0; i < 8; ++i) o.write_shift(2, 0); + o.write_shift(2, 0x003E); + o.write_shift(2, 0x0003); + o.write_shift(2, 0xFFFE); + o.write_shift(2, 0x0009); + o.write_shift(2, 0x0006); + for(i = 0; i < 3; ++i) o.write_shift(2, 0); + o.write_shift(4, 0); + o.write_shift(4, L[2]); + o.write_shift(4, L[0] + L[1] + L[2] + L[3] - 1); + o.write_shift(4, 0); + o.write_shift(4, 1<<12); + o.write_shift(4, L[3] ? L[0] + L[1] + L[2] - 1: ENDOFCHAIN); + o.write_shift(4, L[3]); + o.write_shift(-4, L[1] ? L[0] - 1: ENDOFCHAIN); + o.write_shift(4, L[1]); + for(i = 0; i < 109; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + } + if(L[1]) { + for(T = 0; T < L[1]; ++T) { + for(; i < 236 + T * 127; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + o.write_shift(-4, T === L[1] - 1 ? ENDOFCHAIN : T + 1); + } + } + var chainit = function(w) { + for(T += w; i> 9); + } + chainit((L[6] + 7) >> 3); + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + T = i = 0; + for(j = 0; j < cfb.FileIndex.length; ++j) { + file = cfb.FileIndex[j]; + if(!file.content) continue; +flen = file.content.length; + if(!flen || flen >= 0x1000) continue; + file.start = T; + chainit((flen + 0x3F) >> 6); + } + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + for(i = 0; i < L[4]<<2; ++i) { + var nm = cfb.FullPaths[i]; + if(!nm || nm.length === 0) { + for(j = 0; j < 17; ++j) o.write_shift(4, 0); + for(j = 0; j < 3; ++j) o.write_shift(4, -1); + for(j = 0; j < 12; ++j) o.write_shift(4, 0); + continue; + } + file = cfb.FileIndex[i]; + if(i === 0) file.start = file.size ? file.start - 1 : ENDOFCHAIN; + flen = 2*(file.name.length+1); + o.write_shift(64, file.name, "utf16le"); + o.write_shift(2, flen); + o.write_shift(1, file.type); + o.write_shift(1, file.color); + o.write_shift(-4, file.L); + o.write_shift(-4, file.R); + o.write_shift(-4, file.C); + if(!file.clsid) for(j = 0; j < 4; ++j) o.write_shift(4, 0); + else o.write_shift(16, file.clsid, "hex"); + o.write_shift(4, file.state || 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, file.start); + o.write_shift(4, file.size); o.write_shift(4, 0); + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size >= 0x1000) { + o.l = (file.start+1) << 9; + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x1FF; ++j) o.write_shift(1, 0); + } + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size > 0 && file.size < 0x1000) { + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x3F; ++j) o.write_shift(1, 0); + } + } + + return o; +} +/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ +function find(cfb, path) { + //return cfb.find(path); + var UCFullPaths = cfb.FullPaths.map(function(x) { return x.toUpperCase(); }); + var UCPaths = UCFullPaths.map(function(x) { var y = x.split("/"); return y[y.length - (x.slice(-1) == "/" ? 2 : 1)]; }); + var k = false; + if(path.charCodeAt(0) === 47 /* "/" */) { k = true; path = UCFullPaths[0].slice(0, -1) + path; } + else k = path.indexOf("/") !== -1; + var UCPath = path.toUpperCase(); + var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); + if(w !== -1) return cfb.FileIndex[w]; + + UCPath = UCPath.replace(chr0,'').replace(chr1,'!'); + for(w = 0; w < UCFullPaths.length; ++w) { + if(UCFullPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + if(UCPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + } + return null; +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -347,6 +644,7 @@ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ var ENDOFCHAIN = -2; /* 2.2 Compound File Header */ var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1'; +var HEADER_SIG = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; var HEADER_CLSID = '00000000000000000000000000000000'; var consts = { /* 2.1 Compund File Sector Numbers and Types */ @@ -365,9 +663,84 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; -exports.read = readSync; +function write_file(cfb, filename, options) { + var o = _write(cfb, options); +fs.writeFileSync(filename, o); +} + +function a2s(o) { + var out = new Array(o.length); + for(var i = 0; i < o.length; ++i) out[i] = String.fromCharCode(o[i]); + return out.join(""); +} + +function write(cfb, options) { + var o = _write(cfb, options); + switch(options && options.type) { + case "file": fs.writeFileSync(options.filename, (o)); return o; + case "binary": return a2s(o); + case "base64": return Base64.encode(a2s(o)); + } + return o; +} +function cfb_new(opts) { + var o = ({}); + init_cfb(o, opts); + return o; +} + +function cfb_add(cfb, name, content, opts) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(!file) { + file = ({name: filename(name)}); + cfb.FileIndex.push(file); + cfb.FullPaths.push(name); + CFB.utils.cfb_gc(cfb); + } +file.content = (content); + file.size = content ? content.length : 0; + if(opts) { + if(opts.CLSID) file.clsid = opts.CLSID; + } + return file; +} + +function cfb_del(cfb, name) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex.splice(j, 1); + cfb.FullPaths.splice(j, 1); + return true; + } + return false; +} + +function cfb_mov(cfb, old_name, new_name) { + init_cfb(cfb); + var file = CFB.find(cfb, old_name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex[j].name = filename(new_name); + cfb.FullPaths[j] = new_name; + return true; + } + return false; +} + +function cfb_gc(cfb) { rebuild_cfb(cfb, true); } + +exports.find = find; +exports.read = read; exports.parse = parse; +exports.write = write; +exports.writeFile = write_file; exports.utils = { + cfb_new: cfb_new, + cfb_add: cfb_add, + cfb_del: cfb_del, + cfb_mov: cfb_mov, + cfb_gc: cfb_gc, ReadShift: ReadShift, CheckField: CheckField, prep_blob: prep_blob, diff --git a/index.html b/index.html index 36f7f57..b314bfe 100644 --- a/index.html +++ b/index.html @@ -54,27 +54,26 @@ function fixdata(data) { return o; } +var sprintf = PRINTJ.sprintf; +function fix_string(x/*:string*/)/*:string*/ { return x.replace(/[\u0000-\u001f]/, function($$) { return sprintf("\\u%04X", $$.charCodeAt(0)); }); } function get_manifest(cfb) { var out = []; - var sprintf = PRINTJ.sprintf; - var format_date = function(date/*:Date*/)/*:string*/ { return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes()); }; var basetime = new Date(1980,0,1); - var cnt = 0; - var rootsize = 0, filesize = 0; + var cnt = 0, rootsize = 0, filesize = 0; out.push(" Length Date Time Name"); out.push(" -------- ---- ---- ----"); - cfb.FileIndex.forEach(function(file, i) { + cfb.FileIndex.forEach(function(file, i/*:number*/) { switch(file.type) { case 5: basetime = file.ct || file.mt || basetime; rootsize = file.size; break; case 2: - out.push(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i])); + out.push(sprintf("%9lu %s %s", file.size, format_date(basetime), fix_string(cfb.FullPaths[i]))); filesize += file.size; ++cnt; } @@ -82,12 +81,12 @@ function get_manifest(cfb) { out.push(" -------- -------"); out.push(sprintf("%9lu %lu file%s", rootsize || filesize, cnt, (cnt !== 1 ? "s" : ""))); - return out.join("\n"); + return out.join("\n"); } function process_data(cfb) { var output = get_manifest(cfb); - if(out.innerText === undefined) out.textContent = output + if(out.innerText === undefined) out.textContent = output; else out.innerText = output; } @@ -100,16 +99,9 @@ function handleDrop(e) { var f = files[0]; { var reader = new FileReader(); - //var name = f.name; reader.onload = function(e) { var data = e.target.result; - var cfb; - if(rABS) { - cfb = CFB.read(data, {type: 'binary'}); - } else { - var arr = fixdata(data); - cfb = CFB.read(btoa(arr), {type: 'base64'}); - } + var cfb = CFB.read(rABS ? data : btoa(fixdata(data)), {type: rABS ? 'binary' : 'base64'}); process_data(cfb); }; if(rABS) reader.readAsBinaryString(f); diff --git a/misc/flow.js b/misc/flow.js index 07ab6c9..35f3b0f 100644 --- a/misc/flow.js +++ b/misc/flow.js @@ -4,6 +4,8 @@ type CFBModule = { version:string; find:(cfb:CFBContainer, path:string)=>?CFBEntry; read:(blob:RawBytes|string, opts:CFBReadOpts)=>CFBContainer; + write:(cfb:CFBContainer, opts:CFBWriteOpts)=>RawBytes|string; + writeFile:(cfb:CFBContainer, filename:string, opts:CFBWriteOpts)=>void; parse:(file:RawBytes, opts:CFBReadOpts)=>CFBContainer; utils:CFBUtils; }; @@ -21,22 +23,31 @@ type ReadShiftFunc = { }; type CheckFieldFunc = {(hexstr:string, fld:string):void;}; +type WriteShiftFunc = { + //(size:number, val:string|number, f:?string):any; + (size:1|2|4|-4, val:number):any; + (size:number, val:string, f:"hex"|"utf16le"):any; +} + type RawBytes = Array | Buffer | Uint8Array; class CFBlobArray extends Array { l:number; + write_shift:WriteShiftFunc; read_shift:ReadShiftFunc; chk:CheckFieldFunc; }; interface CFBlobBuffer extends Buffer { l:number; - slice:(start:number, end:?number)=>Buffer; + slice:(start?:number, end:?number)=>Buffer; + write_shift:WriteShiftFunc; read_shift:ReadShiftFunc; chk:CheckFieldFunc; }; interface CFBlobUint8 extends Uint8Array { l:number; - slice:(start:number, end:?number)=>Uint8Array; + slice:(start?:number, end:?number)=>Uint8Array; + write_shift:WriteShiftFunc; read_shift:ReadShiftFunc; chk:CheckFieldFunc; }; @@ -45,13 +56,16 @@ interface CFBlobber { [n:number]:number; l:number; length:number; - slice:(start:number, end:?number)=>RawBytes; + slice:(start:?number, end:?number)=>RawBytes; + write_shift:WriteShiftFunc; read_shift:ReadShiftFunc; chk:CheckFieldFunc; }; type CFBlob = CFBlobArray | CFBlobBuffer | CFBlobUint8; +type CFBWriteOpts = any; + interface CFBReadOpts { type:?string; }; @@ -61,14 +75,13 @@ type CFBFileIndex = Array; type CFBFindPath = (n:string)=>?CFBEntry; type CFBContainer = { - raw:{ + raw?:{ header:any; sectors:Array; }; FileIndex:CFBFileIndex; - FullPathDir:CFBDirectory; + FullPathDir:CFBFullPathDir; FullPaths:Array; - find:CFBFindPath; } type CFBEntry = { diff --git a/misc/suppress_export.js b/misc/suppress_export.js index c7f0105..2d157da 100644 --- a/misc/suppress_export.js +++ b/misc/suppress_export.js @@ -12,4 +12,5 @@ declare var bconcat:any; declare var s2a:any; declare var chr0:any; declare var chr1:any; +declare var new_buf:any; */ diff --git a/package.json b/package.json index da6a56d..ed81b6a 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cfb", - "version": "0.12.1", + "version": "0.13.0", "author": "sheetjs", "description": "Compound File Binary File Format extractor", "keywords": [ "cfb", "compression", "office" ], @@ -19,6 +19,7 @@ "commander":"~2.11.0" }, "devDependencies": { + "crc-32":"~1.1.1", "mocha":"~2.5.3", "blanket": "~1.2.3", "@sheetjs/uglify-js":"~2.7.3", diff --git a/test.js b/test.js index 0af3c69..76cb137 100644 --- a/test.js +++ b/test.js @@ -2,6 +2,7 @@ var CFB; var fs = require('fs'); describe('source', function() { it('should load', function() { CFB = require('./'); }); }); +if(typeof CRC32 === 'undefined') CRC32 = require('crc-32'); var ex = [".xls",".doc",".ppt"]; if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;}); @@ -16,40 +17,82 @@ var f2013 = fs.readdirSync('test_files/2013').filter(ffunc); var fpres = fs.readdirSync('test_files_pres').filter(ffunc); var dir = "./test_files/"; +var TYPE = "buffer"; + +var names = [ + ["!DocumentSummaryInformation", "\u0005"], + ["!SummaryInformation", "\u0005"], + ["!CompObj", "\u0001"], + ["!DataSpaces", "\u0006"], + ["!DRMContent", "\u0009"], + ["!DRMViewerContent", "\u0009"], + ["!Ole", "\u0001"] +].map(function(x) { return [x[0], x[0].replace("!", x[1])]; }); function parsetest(x, cfb) { describe(x + ' should have basic parts', function() { - /* cfb.find interface */ - it('should find relative path using cfb#find', function() { - switch(x.substr(-4)) { - case '.xls': if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x); break; - case '.ppt': if(!cfb.find('PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; - case '.doc': if(!cfb.find('WordDocument') && !cfb.find('Word Document')) throw new Error("Cannot find doc for " + x); break; - } - }); - it('should find absolute path using cfb#find', function() { - switch(x.substr(-4)) { - case '.xls': if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x); break; - case '.ppt': if(!cfb.find('/PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; - case '.doc': if(!cfb.find('/WordDocument') && !cfb.find('/Word Document')) throw new Error("Cannot find doc for " + x); break; - } - }); - - /* CFB.find function */ - it('should find relative path using CFB.find', function() { + it('should find relative path', function() { switch(x.substr(-4)) { case '.xls': if(!CFB.find(cfb, 'Workbook') && !CFB.find(cfb, 'Book')) throw new Error("Cannot find workbook for " + x); break; case '.ppt': if(!CFB.find(cfb, 'PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; case '.doc': if(!CFB.find(cfb, 'WordDocument') && !CFB.find(cfb, 'Word Document')) throw new Error("Cannot find doc for " + x); break; } }); - it('should find absolute path using CFB.find', function() { + it('should find absolute path', function() { switch(x.substr(-4)) { case '.xls': if(!CFB.find(cfb, '/Workbook') && !CFB.find(cfb, '/Book')) throw new Error("Cannot find workbook for " + x); break; case '.ppt': if(!CFB.find(cfb, '/PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; case '.doc': if(!CFB.find(cfb, '/WordDocument') && !CFB.find(cfb, '/Word Document')) throw new Error("Cannot find doc for " + x); break; } }); + it('should handle "!" aliases', function() { + names.forEach(function(n) { if(CFB.find(cfb,n[0]) != CFB.find(cfb,n[1])) throw new Error("Bad name: " + n.join(" != ")); }); + }); + }); + describe(x + ' should roundtrip', function() { + var data, newcfb; + it('should roundtrip safely', function() { + data = CFB.write(cfb, {type:TYPE}); + newcfb = CFB.read(data, {type:TYPE}); + }); + it('should preserve content', function() { + var _old, _new; + switch(x.substr(-4)) { + case '.xls': + _old = CFB.find(cfb, '/Workbook') || CFB.find(cfb, '/Book'); + _new = CFB.find(newcfb, '/Workbook') || CFB.find(newcfb, '/Book'); + break; + case '.ppt': + _old = CFB.find(cfb, '/PowerPoint Document'); + _new = CFB.find(newcfb, '/PowerPoint Document'); + break; + case '.doc': + _old = CFB.find(cfb, '/WordDocument') || CFB.find(cfb, '/Word Document'); + _new = CFB.find(newcfb, '/WordDocument') || CFB.find(newcfb, '/Word Document'); + break; + } + if(CRC32.buf(_old.content) != CRC32.buf(_new.content)) throw new Error(x + " failed roundtrip test"); + }); + it('should be idempotent', function() { + var dat2 = CFB.write(newcfb, {type:TYPE}); + var newcfb2 = CFB.read(dat2, {type:TYPE}); + var _old, _new; + switch(x.substr(-4)) { + case '.xls': + _old = CFB.find(newcfb2, '/Workbook') || CFB.find(newcfb2, '/Book'); + _new = CFB.find(newcfb, '/Workbook') || CFB.find(newcfb, '/Book'); + break; + case '.ppt': + _old = CFB.find(newcfb2, '/PowerPoint Document'); + _new = CFB.find(newcfb, '/PowerPoint Document'); + break; + case '.doc': + _old = CFB.find(newcfb2, '/WordDocument') || CFB.find(newcfb2, '/Word Document'); + _new = CFB.find(newcfb, '/WordDocument') || CFB.find(newcfb, '/Word Document'); + break; + } + if(CRC32.buf(_old.content) != CRC32.buf(_new.content)) throw new Error(x + " failed idempotent test"); + }); }); } diff --git a/types/bin_cfb.ts b/types/bin_cfb.ts index b721729..4c6bd2f 100755 --- a/types/bin_cfb.ts +++ b/types/bin_cfb.ts @@ -1,16 +1,17 @@ /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* eslint-env node */ /* vim: set ts=2 ft=javascript: */ -import * as CFB from 'cfb'; +import * as X from 'cfb'; import fs = require('fs'); import program = require('commander'); import PRINTJ = require("printj"); program - .version(CFB.version) + .version(X.version) .usage('[options] ') .option('-q, --quiet', 'process but do not report') .option('-l, --list-files', 'list files') .option('-d, --dump', 'dump internal representation but do not extract') + .option('-r, --repair', 'attempt to repair and garbage-collect archive') .option('--dev', 'development mode') .option('--read', 'read but do not print out contents'); @@ -21,10 +22,10 @@ if(program.args.length === 0 || !fs.existsSync(program.args[0])) { process.exit(1); } -const opts: CFB.CFBParsingOptions = {type:'file'}; +const opts: X.CFBParsingOptions = {type:'file'}; if(program.dev) opts.WTF = true; -const cfb: CFB.CFBContainer = CFB.read(program.args[0], opts); +const cfb: X.CFBContainer = X.read(program.args[0], opts); if(program.quiet) process.exit(0); if(program.dump) { @@ -34,26 +35,30 @@ if(program.dump) { console.log(cfb.FullPathDir); process.exit(0); } -if(program.listFiles) { - const sprintf = PRINTJ.sprintf; +if(program.repair) { + X.writeFile(cfb, program.args[0]); + process.exit(0); +} +const sprintf = PRINTJ.sprintf; +function fix_string(x: string): string { return x.replace(/[\u0000-\u001f]/, function($$) { return sprintf("\\u%04X", $$.charCodeAt(0)); }); } +if(program.listFiles) { const format_date = function(date: Date): string { return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes()); }; let basetime = new Date(1980,0,1); - let cnt = 0; - let rootsize = 0, filesize = 0; + let cnt = 0, rootsize = 0, filesize = 0; console.log(" Length Date Time Name"); console.log(" -------- ---- ---- ----"); - cfb.FileIndex.forEach(function(file: CFB.CFBEntry, i: number) { + cfb.FileIndex.forEach(function(file: X.CFBEntry, i: number) { switch(file.type) { case 5: basetime = file.ct || file.mt || basetime; rootsize = file.size; break; case 2: - console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i])); + console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), fix_string(cfb.FullPaths[i]))); filesize += file.size; ++cnt; } @@ -65,10 +70,10 @@ if(program.listFiles) { } for(let i=0; i!==cfb.FullPaths.length; ++i) { if(cfb.FullPaths[i].slice(-1) === "/") { - console.error("mkdir " + cfb.FullPaths[i]); + console.error("mkdir " + fix_string(cfb.FullPaths[i])); fs.mkdirSync(cfb.FullPaths[i]); } else { - console.error("writing " + cfb.FullPaths[i]); + console.error("write " + fix_string(cfb.FullPaths[i])); fs.writeFileSync(cfb.FullPaths[i], cfb.FileIndex[i].content); } } diff --git a/types/index.d.ts b/types/index.d.ts index 9aa17f2..6cfb4a0 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -13,6 +13,12 @@ export function read(f: CFB$Blob | string, options?: CFBParsingOptions): CFBCont /** Find a file entry given a path or file name */ export function find(cfb: CFBContainer, path: string): CFBEntry | null; +/** Generate a container file */ +export function write(cfb: CFBContainer, options?: any): any; + +/** Write a container file to the filesystem */ +export function writeFile(cfb: CFBContainer, filename: string, options?: any): any; + /** Utility functions */ export const utils: CFB$Utils; @@ -23,6 +29,8 @@ export interface CFBParsingOptions { type?: 'base64' | 'binary' | 'buffer' | 'file' | 'array'; /** If true, throw errors when features are not understood */ WTF?: boolean; + /** If true, include raw data in output */ + raw?: boolean; } export type CFB$Blob = Buffer | number[] | Uint8Array; @@ -76,9 +84,6 @@ export interface CFBDirectory { /* File object */ export interface CFBContainer { - /* search by path or file name */ - find(name: string): CFBEntry; - /* list of streams and storages */ FullPaths: string[]; @@ -89,7 +94,7 @@ export interface CFBContainer { FileIndex: CFBEntry[]; /* Raw Content, in chunks (Buffer when available, Array of bytes otherwise) */ - raw: { + raw?: { header: CFB$Blob, sectors: CFB$Blob[]; }; @@ -97,6 +102,11 @@ export interface CFBContainer { /** General utilities */ export interface CFB$Utils { + cfb_new(opts?: any): CFBContainer; + cfb_add(cfb: CFBContainer, name: string, content: any, opts?: any): CFBEntry; + cfb_del(cfb: CFBContainer, name: string): boolean; + cfb_mov(cfb: CFBContainer, old_name: string, new_name: string): boolean; + cfb_gc(cfb: CFBContainer): void; ReadShift(size: number, t?: string): number|string; WarnField(hexstr: string, fld?: string): void; CheckField(hexstr: string, fld?: string): void; diff --git a/types/roundtrip.ts b/types/roundtrip.ts new file mode 100644 index 0000000..a9dfc84 --- /dev/null +++ b/types/roundtrip.ts @@ -0,0 +1,67 @@ +/* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ +import CFB = require("cfb"); +import { buf } from 'crc-32'; +import { sprintf } from 'printj'; +import { writeFileSync } from 'fs'; + +const F = process.argv[2] || "test_files/BlankSheetTypes.xls"; + +const dumpit = process.env.WTF ? console.log : (msg: string) => {}; + +/* read file */ +const cfb1 = CFB.read(F, {type:"file"}); +dumpit(cfb1.FullPaths); + +/* write to t1.xls */ +const out1 = CFB.write(cfb1, {}); +writeFileSync("t1.xls", out1); + +/* read from memory */ +const cfb2 = CFB.read(out1, {type:"buffer"}); +dumpit(cfb2.FullPaths); + +/* compare subfile contents */ +[ + '/Workbook', + '/!SummaryInformation', + '/!DocumentSummaryInformation', + '/MBD01519A90/\u0001Ole', + '/MBD01519A90/\u0001CompObj', + '/MBD01519A90/\u0001Ole10Native', + '/MBD01519A90/\u0001Ole10ItemName', + '/_VBA_PROJECT_CUR/VBA/ThisWorkbook' +].forEach(path => { + if(!CFB.find(cfb1, path)) return; + const d1 = CFB.find(cfb1, path).content, c1 = buf(d1); + const d2 = CFB.find(cfb2, path).content, c2 = buf(d2); + if(c1 === c2) return; + console.log(path); console.log(d1); console.log(d2); + throw sprintf("%s mismatch: %08X != %08X", path, c1, c2); +}); + +const out2 = CFB.write(cfb2, {}); +const cc1 = buf(out1), cc2 = buf(out2); +if(cc1 !== cc2) throw sprintf("idempotent fail: %08X != %08X", cc1, cc2); +dumpit(sprintf("%08X %08X", cc1, cc2)); + +/* roundtrip through buffer, binary, and base64 types */ +const cfb_1 = CFB.read(out2, {type:"buffer"}); +const out_1 = CFB.write(cfb_1, {type:"binary"}); +dumpit(out_1.substr(0,100)); +const cfb_2 = CFB.read(out_1, {type:"binary"}); +const out_2 = CFB.write(cfb_1, {type:"base64"}); +dumpit(out_2.substr(0,100)); +const cfb_3 = CFB.read(out_2, {type:"base64"}); + +/* save to file and re-read */ +CFB.writeFile(cfb_3, "t2.xls"); +const old_cfb = CFB.read("t2.xls", {type:"file"}); + +/* manually build a new file with the old-style "R" root entry */ +const new_cfb = CFB.utils.cfb_new({root:"R", clsid: old_cfb.FileIndex[0].clsid }); +old_cfb.FullPaths.forEach((p, i) => { + if(p.slice(-1) === "/") return; + CFB.utils.cfb_add(new_cfb, p.replace(/^[^/]*/,"R"), old_cfb.FileIndex[i].content); +}); +dumpit(new_cfb); +CFB.writeFile(new_cfb, "t3.xls"); diff --git a/xlscfb.flow.js b/xlscfb.flow.js index 6eb3fe1..24aab39 100644 --- a/xlscfb.flow.js +++ b/xlscfb.flow.js @@ -12,11 +12,75 @@ declare var bconcat:any; declare var s2a:any; declare var chr0:any; declare var chr1:any; +declare var new_buf:any; */ /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ /*jshint eqnull:true */ +/*exported CFB */ +/*global module, require:false, process:false, Buffer:false, Uint8Array:false */ +var Base64 = (function make_b64(){ + var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; + return { + encode: function(input/*:string*/)/*:string*/ { + var o = ""; + var c1/*:number*/, c2/*:number*/, c3/*:number*/; + var e1/*:number*/, e2/*:number*/, e3/*:number*/, e4/*:number*/; + for(var i = 0; i < input.length; ) { + c1 = input.charCodeAt(i++); + e1 = (c1 >> 2); + + c2 = input.charCodeAt(i++); + e2 = ((c1 & 3) << 4) | (c2 >> 4); + + c3 = input.charCodeAt(i++); + e3 = ((c2 & 15) << 2) | (c3 >> 6); + e4 = (c3 & 63); + if (isNaN(c2)) { e3 = e4 = 64; } + else if (isNaN(c3)) { e4 = 64; } + o += map.charAt(e1) + map.charAt(e2) + map.charAt(e3) + map.charAt(e4); + } + return o; + }, + decode: function b64_decode(input/*:string*/)/*:string*/ { + var o = ""; + var c1/*:number*/, c2/*:number*/, c3/*:number*/; + var e1/*:number*/, e2/*:number*/, e3/*:number*/, e4/*:number*/; + input = input.replace(/[^\w\+\/\=]/g, ""); + for(var i = 0; i < input.length;) { + e1 = map.indexOf(input.charAt(i++)); + e2 = map.indexOf(input.charAt(i++)); + c1 = (e1 << 2) | (e2 >> 4); + o += String.fromCharCode(c1); + + e3 = map.indexOf(input.charAt(i++)); + c2 = ((e2 & 15) << 4) | (e3 >> 2); + if (e3 !== 64) { o += String.fromCharCode(c2); } + + e4 = map.indexOf(input.charAt(i++)); + c3 = ((e3 & 3) << 6) | e4; + if (e4 !== 64) { o += String.fromCharCode(c3); } + } + return o; + } + }; +})(); +var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' && typeof process.versions !== 'undefined' && process.versions.node); + +function new_raw_buf(len/*:number*/) { + /* jshint -W056 */ + // $FlowIgnore + return new (has_buf ? Buffer : Array)(len); + /* jshint +W056 */ +} + +var s2a = function s2a(s/*:string*/) { + if(has_buf) return new Buffer(s, "binary"); + return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); +}; + +var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; /*:: declare var DO_NOT_EXPORT_CFB:?boolean; type SectorEntry = { @@ -35,15 +99,35 @@ type CFBFiles = {[n:string]:CFBEntry}; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/; -exports.version = '0.12.1'; +exports.version = '0.13.0'; +/* [MS-CFB] 2.6.4 */ +function namecmp(l/*:string*/, r/*:string*/)/*:number*/ { + var L = l.split("/"), R = r.split("/"); + for(var i = 0, c = 0, Z = Math.min(L.length, R.length); i < Z; ++i) { + if((c = L[i].length - R[i].length)) return c; + if(L[i] != R[i]) return L[i] < R[i] ? -1 : 1; + } + return L.length - R.length; +} +function dirname(p/*:string*/)/*:string*/ { + if(p.charAt(p.length - 1) == "/") return (p.slice(0,-1).indexOf("/") === -1) ? p : dirname(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(0, c+1); +} + +function filename(p/*:string*/)/*:string*/ { + if(p.charAt(p.length - 1) == "/") return filename(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(c+1); +} function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { -var mver = 3; // major version -var ssz = 512; // sector size +var mver = 3; +var ssz = 512; var nmfs = 0; // number of mini FAT sectors -var ndfs = 0; // number of DIFAT sectors -var dir_start = 0; // first directory sector location -var minifat_start = 0; // first mini FAT sector location -var difat_start = 0; // first mini FAT sector location +var difat_sec_cnt = 0; +var dir_start = 0; +var minifat_start = 0; +var difat_start = 0; var fat_addrs/*:Array*/ = []; // locations of FAT sectors @@ -67,11 +151,10 @@ var header/*:RawBytes*/ = file.slice(0,ssz); check_shifts(blob, mver); // Number of Directory Sectors -var nds/*:number*/ = blob.read_shift(4, 'i'); -if(mver === 3 && nds !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + nds); +var dir_cnt/*:number*/ = blob.read_shift(4, 'i'); +if(mver === 3 && dir_cnt !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + dir_cnt); // Number of FAT Sectors -//var nfs = blob.read_shift(4, 'i'); blob.l += 4; // First Directory Sector Location @@ -93,7 +176,7 @@ nmfs = blob.read_shift(4, 'i'); difat_start = blob.read_shift(4, 'i'); // Number of DIFAT Sectors -ndfs = blob.read_shift(4, 'i'); +difat_sec_cnt = blob.read_shift(4, 'i'); // Grab FAT Sector Locations for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ @@ -105,7 +188,7 @@ for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ /** Break the file up into sectors */ var sectors/*:Array*/ = sectorify(file, ssz); -sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs); +sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs); /** Chains */ var sector_list/*:SectorList*/ = make_sector_list(sectors, dir_start, fat_addrs, ssz); @@ -121,19 +204,17 @@ var files/*:CFBFiles*/ = {}, Paths/*:Array*/ = [], FileIndex/*:CFBFileIn read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex); build_full_paths(FileIndex, FullPathDir, FullPaths, Paths); +Paths.shift(); -var root_name/*:string*/ = Paths.shift(); - -/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ -var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name); - -return { - raw: {header: header, sectors: sectors}, +var o = { FileIndex: FileIndex, FullPaths: FullPaths, - FullPathDir: FullPathDir, - find: find_path + FullPathDir: FullPathDir }; + +// $FlowIgnore +if(options && options.raw) o.raw = {header: header, sectors: sectors}; +return o; } // parse /* [MS-CFB] 2.2 Compound File Header -- read up to major version */ @@ -220,25 +301,8 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra } } -/* [MS-CFB] 2.6.4 */ -function make_find_path(FullPaths/*:Array*/, Paths/*:Array*/, FileIndex/*:CFBFileIndex*/, files/*:CFBFiles*/, root_name/*:string*/)/*:CFBFindPath*/ { - var UCFullPaths/*:Array*/ = []; - var UCPaths/*:Array*/ = [], i = 0; - for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - return function find_path(path/*:string*/)/*:?CFBEntry*/ { - var k/*:boolean*/ = false; - if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; } - else k = path.indexOf("/") !== -1; - var UCPath/*:string*/ = path.toUpperCase().replace(chr0,'').replace(chr1,'!'); - var w/*:number*/ = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); - if(w === -1) return null; - return k === true ? FileIndex[w] : files[Paths[w]]; - }; -} - /** Chase down the rest of the DIFAT chain to build a comprehensive list - DIFAT chains by storing the next sector number as the last 32 bytes */ + DIFAT chains by storing the next sector number as the last 32 bits */ function sleuth_fat(idx/*:number*/, cnt/*:number*/, sectors/*:Array*/, ssz/*:number*/, fat_addrs)/*:void*/ { var q/*:number*/ = ENDOFCHAIN; if(idx === ENDOFCHAIN) { @@ -256,7 +320,6 @@ function sleuth_fat(idx/*:number*/, cnt/*:number*/, sectors/*:Array*/, /** Follow the linked list of sectors for a given starting point */ function get_sector_list(sectors/*:Array*/, start/*:number*/, fat_addrs/*:Array*/, ssz/*:number*/, chkd/*:?Array*/)/*:SectorEntry*/ { - var sl = sectors.length; var buf/*:Array*/ = [], buf_chain/*:Array*/ = []; if(!chkd) chkd = []; var modulus = ssz - 1, j = 0, jj = 0; @@ -355,22 +418,259 @@ function read_date(blob/*:RawBytes|CFBlob*/, offset/*:number*/)/*:Date*/ { } var fs/*:: = require('fs'); */; -function readFileSync(filename/*:string*/, options/*:CFBReadOpts*/) { +function read_file(filename/*:string*/, options/*:CFBReadOpts*/) { if(fs == null) fs = require('fs'); return parse(fs.readFileSync(filename), options); } -function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { +function read(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { switch(options && options.type || "base64") { - case "file": /*:: if(typeof blob !== 'string') throw "Must pass a filename when type='file'"; */return readFileSync(blob, options); + case "file": /*:: if(typeof blob !== 'string') throw "Must pass a filename when type='file'"; */return read_file(blob, options); case "base64": /*:: if(typeof blob !== 'string') throw "Must pass a base64-encoded binary string when type='file'"; */return parse(s2a(Base64.decode(blob)), options); case "binary": /*:: if(typeof blob !== 'string') throw "Must pass a binary string when type='file'"; */return parse(s2a(blob), options); } return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options); } +function init_cfb(cfb/*:CFBContainer*/, opts/*:?any*/)/*:void*/ { + var o = opts || {}, root = o.root || "Root Entry"; + if(!cfb.FullPaths) cfb.FullPaths = []; + if(!cfb.FileIndex) cfb.FileIndex = []; + if(cfb.FullPaths.length !== cfb.FileIndex.length) throw new Error("inconsistent CFB structure"); + if(cfb.FullPaths.length === 0) { + cfb.FullPaths[0] = root + "/"; + cfb.FileIndex[0] = ({ name: root, type: 5 }/*:any*/); + } + if(o.CLSID) cfb.FileIndex[0].clsid = o.CLSID; + seed_cfb(cfb); +} +function seed_cfb(cfb/*:CFBContainer*/)/*:void*/ { + var nm = "\u0001Sh33tJ5"; + if(CFB.find(cfb, "/" + nm)) return; + var p = new_buf(4); p[0] = 55; p[1] = p[3] = 50; p[2] = 54; + cfb.FileIndex.push(({ name: nm, type: 2, content:p, size:4, L:69, R:69, C:69 }/*:any*/)); + cfb.FullPaths.push(cfb.FullPaths[0] + nm); + rebuild_cfb(cfb); +} +function rebuild_cfb(cfb/*:CFBContainer*/, f/*:?boolean*/)/*:void*/ { + init_cfb(cfb); + var gc = false, s = false; + for(var i = cfb.FullPaths.length - 1; i >= 0; --i) { + var _file = cfb.FileIndex[i]; + switch(_file.type) { + case 0: + if(s) gc = true; + else { cfb.FileIndex.pop(); cfb.FullPaths.pop(); } + break; + case 1: case 2: case 5: + s = true; + if(isNaN(_file.R * _file.L * _file.C)) gc = true; + if(_file.R > -1 && _file.L > -1 && _file.R == _file.L) gc = true; + break; + default: gc = true; break; + } + } + if(!gc && !f) return; + + var now = new Date(), j = 0; + var data/*:Array<[string, CFBEntry]>*/ = []; + for(i = 0; i < cfb.FullPaths.length; ++i) { + if(cfb.FileIndex[i].type === 0) continue; + data.push([cfb.FullPaths[i], cfb.FileIndex[i]]); + } + for(i = 0; i < data.length; ++i) { + var dad = dirname(data[i][0]); + s = false; + for(j = 0; j < data.length; ++j) if(data[j][0] === dad) s = true; + if(!s) data.push([dad, ({ + name: filename(dad).replace("/",""), + type: 1, + clsid: HEADER_CLSID, + ct: now, mt: now, + content: null + }/*:any*/)]); + } + + data.sort(function(x,y) { return namecmp(x[0], y[0]); }); + cfb.FullPaths = []; cfb.FileIndex = []; + for(i = 0; i < data.length; ++i) { cfb.FullPaths[i] = data[i][0]; cfb.FileIndex[i] = data[i][1]; } + for(i = 0; i < data.length; ++i) { + var elt = cfb.FileIndex[i]; + var nm = cfb.FullPaths[i]; + + elt.name = filename(nm).replace("/",""); + elt.L = elt.R = elt.C = -(elt.color = 1); + elt.size = elt.content ? elt.content.length : 0; + elt.start = 0; + elt.clsid = (elt.clsid || HEADER_CLSID); + if(i === 0) { + elt.C = data.length > 1 ? 1 : -1; + elt.size = 0; + elt.type = 5; + } else if(nm.slice(-1) == "/") { + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==nm) break; + elt.C = j >= data.length ? -1 : j; + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==dirname(nm)) break; + elt.R = j >= data.length ? -1 : j; + elt.type = 1; + } else { + if(dirname(cfb.FullPaths[i+1]||"") == dirname(nm)) elt.R = i + 1; + elt.type = 2; + } + } + +} + +function _write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes*/ { + rebuild_cfb(cfb); + var L = (function(cfb/*:CFBContainer*/)/*:Array*/{ + var mini_size = 0, fat_size = 0; + for(var i = 0; i < cfb.FileIndex.length; ++i) { + var file = cfb.FileIndex[i]; + if(!file.content) continue; + /*:: if(file.content == null) throw new Error("unreachable"); */ + var flen = file.content.length; + if(flen === 0){} + else if(flen < 0x1000) mini_size += (flen + 0x3F) >> 6; + else fat_size += (flen + 0x01FF) >> 9; + } + var dir_cnt = (cfb.FullPaths.length +3) >> 2; + var mini_cnt = (mini_size + 7) >> 3; + var mfat_cnt = (mini_size + 0x7F) >> 7; + var fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt; + var fat_cnt = (fat_base + 0x7F) >> 7; + var difat_cnt = fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + while(((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt) difat_cnt = ++fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + var L = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]; + cfb.FileIndex[0].size = mini_size << 6; + L[7] = (cfb.FileIndex[0].start=L[0]+L[1]+L[2]+L[3]+L[4]+L[5])+((L[6]+7) >> 3); + return L; + })(cfb); + var o = new_buf(L[7] << 9); + var i = 0, T = 0; + { + for(i = 0; i < 8; ++i) o.write_shift(1, HEADER_SIG[i]); + for(i = 0; i < 8; ++i) o.write_shift(2, 0); + o.write_shift(2, 0x003E); + o.write_shift(2, 0x0003); + o.write_shift(2, 0xFFFE); + o.write_shift(2, 0x0009); + o.write_shift(2, 0x0006); + for(i = 0; i < 3; ++i) o.write_shift(2, 0); + o.write_shift(4, 0); + o.write_shift(4, L[2]); + o.write_shift(4, L[0] + L[1] + L[2] + L[3] - 1); + o.write_shift(4, 0); + o.write_shift(4, 1<<12); + o.write_shift(4, L[3] ? L[0] + L[1] + L[2] - 1: ENDOFCHAIN); + o.write_shift(4, L[3]); + o.write_shift(-4, L[1] ? L[0] - 1: ENDOFCHAIN); + o.write_shift(4, L[1]); + for(i = 0; i < 109; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + } + if(L[1]) { + for(T = 0; T < L[1]; ++T) { + for(; i < 236 + T * 127; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + o.write_shift(-4, T === L[1] - 1 ? ENDOFCHAIN : T + 1); + } + } + var chainit = function(w/*:number*/)/*:void*/ { + for(T += w; i> 9); + } + chainit((L[6] + 7) >> 3); + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + T = i = 0; + for(j = 0; j < cfb.FileIndex.length; ++j) { + file = cfb.FileIndex[j]; + if(!file.content) continue; + /*:: if(file.content == null) throw new Error("unreachable"); */ + flen = file.content.length; + if(!flen || flen >= 0x1000) continue; + file.start = T; + chainit((flen + 0x3F) >> 6); + } + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + for(i = 0; i < L[4]<<2; ++i) { + var nm = cfb.FullPaths[i]; + if(!nm || nm.length === 0) { + for(j = 0; j < 17; ++j) o.write_shift(4, 0); + for(j = 0; j < 3; ++j) o.write_shift(4, -1); + for(j = 0; j < 12; ++j) o.write_shift(4, 0); + continue; + } + file = cfb.FileIndex[i]; + if(i === 0) file.start = file.size ? file.start - 1 : ENDOFCHAIN; + flen = 2*(file.name.length+1); + o.write_shift(64, file.name, "utf16le"); + o.write_shift(2, flen); + o.write_shift(1, file.type); + o.write_shift(1, file.color); + o.write_shift(-4, file.L); + o.write_shift(-4, file.R); + o.write_shift(-4, file.C); + if(!file.clsid) for(j = 0; j < 4; ++j) o.write_shift(4, 0); + else o.write_shift(16, file.clsid, "hex"); + o.write_shift(4, file.state || 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, file.start); + o.write_shift(4, file.size); o.write_shift(4, 0); + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; + /*:: if(!file.content) throw new Error("unreachable"); */ + if(file.size >= 0x1000) { + o.l = (file.start+1) << 9; + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x1FF; ++j) o.write_shift(1, 0); + } + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; + /*:: if(!file.content) throw new Error("unreachable"); */ + if(file.size > 0 && file.size < 0x1000) { + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x3F; ++j) o.write_shift(1, 0); + } + } + + return o; +} +/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { - return cfb.find(path); + //return cfb.find(path); + var UCFullPaths/*:Array*/ = cfb.FullPaths.map(function(x) { return x.toUpperCase(); }); + var UCPaths/*:Array*/ = UCFullPaths.map(function(x) { var y = x.split("/"); return y[y.length - (x.slice(-1) == "/" ? 2 : 1)]; }); + var k/*:boolean*/ = false; + if(path.charCodeAt(0) === 47 /* "/" */) { k = true; path = UCFullPaths[0].slice(0, -1) + path; } + else k = path.indexOf("/") !== -1; + var UCPath/*:string*/ = path.toUpperCase(); + var w/*:number*/ = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); + if(w !== -1) return cfb.FileIndex[w]; + + UCPath = UCPath.replace(chr0,'').replace(chr1,'!'); + for(w = 0; w < UCFullPaths.length; ++w) { + if(UCFullPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + if(UCPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + } + return null; } /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ @@ -379,6 +679,7 @@ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ var ENDOFCHAIN = -2; /* 2.2 Compound File Header */ var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1'; +var HEADER_SIG = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; var HEADER_CLSID = '00000000000000000000000000000000'; var consts = { /* 2.1 Compund File Sector Numbers and Types */ @@ -397,10 +698,86 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +function write_file(cfb/*:CFBContainer*/, filename/*:string*/, options/*:CFBWriteOpts*/)/*:void*/ { + var o = _write(cfb, options); + /*:: if(typeof Buffer == 'undefined' || !Buffer.isBuffer(o) || !(o instanceof Buffer)) throw new Error("unreachable"); */ + fs.writeFileSync(filename, o); +} + +function a2s(o/*:RawBytes*/)/*:string*/ { + var out = new Array(o.length); + for(var i = 0; i < o.length; ++i) out[i] = String.fromCharCode(o[i]); + return out.join(""); +} + +function write(cfb/*:CFBContainer*/, options/*:CFBWriteOpts*/)/*:RawBytes|string*/ { + var o = _write(cfb, options); + switch(options && options.type) { + case "file": fs.writeFileSync(options.filename, (o/*:any*/)); return o; + case "binary": return a2s(o); + case "base64": return Base64.encode(a2s(o)); + } + return o; +} +function cfb_new(opts/*:?any*/)/*:CFBContainer*/ { + var o/*:CFBContainer*/ = ({}/*:any*/); + init_cfb(o, opts); + return o; +} + +function cfb_add(cfb/*:CFBContainer*/, name/*:string*/, content/*:?RawBytes*/, opts/*:?any*/)/*:CFBEntry*/ { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(!file) { + file = ({name: filename(name)}/*:any*/); + cfb.FileIndex.push(file); + cfb.FullPaths.push(name); + CFB.utils.cfb_gc(cfb); + } + /*:: if(!file) throw new Error("unreachable"); */ + file.content = (content/*:any*/); + file.size = content ? content.length : 0; + if(opts) { + if(opts.CLSID) file.clsid = opts.CLSID; + } + return file; +} + +function cfb_del(cfb/*:CFBContainer*/, name/*:string*/)/*:boolean*/ { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex.splice(j, 1); + cfb.FullPaths.splice(j, 1); + return true; + } + return false; +} + +function cfb_mov(cfb/*:CFBContainer*/, old_name/*:string*/, new_name/*:string*/)/*:boolean*/ { + init_cfb(cfb); + var file = CFB.find(cfb, old_name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex[j].name = filename(new_name); + cfb.FullPaths[j] = new_name; + return true; + } + return false; +} + +function cfb_gc(cfb/*:CFBContainer*/)/*:void*/ { rebuild_cfb(cfb, true); } + exports.find = find; -exports.read = readSync; +exports.read = read; exports.parse = parse; +exports.write = write; +exports.writeFile = write_file; exports.utils = { + cfb_new: cfb_new, + cfb_add: cfb_add, + cfb_del: cfb_del, + cfb_mov: cfb_mov, + cfb_gc: cfb_gc, ReadShift: ReadShift, CheckField: CheckField, prep_blob: prep_blob, diff --git a/xlscfb.js b/xlscfb.js index b54e680..b8fc040 100644 --- a/xlscfb.js +++ b/xlscfb.js @@ -2,19 +2,102 @@ var DO_NOT_EXPORT_CFB = true; /* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ /* vim: set ts=2: */ /*jshint eqnull:true */ +/*exported CFB */ +/*global module, require:false, process:false, Buffer:false, Uint8Array:false */ +var Base64 = (function make_b64(){ + var map = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/="; + return { + encode: function(input) { + var o = ""; + var c1, c2, c3; + var e1, e2, e3, e4; + for(var i = 0; i < input.length; ) { + c1 = input.charCodeAt(i++); + e1 = (c1 >> 2); + + c2 = input.charCodeAt(i++); + e2 = ((c1 & 3) << 4) | (c2 >> 4); + + c3 = input.charCodeAt(i++); + e3 = ((c2 & 15) << 2) | (c3 >> 6); + e4 = (c3 & 63); + if (isNaN(c2)) { e3 = e4 = 64; } + else if (isNaN(c3)) { e4 = 64; } + o += map.charAt(e1) + map.charAt(e2) + map.charAt(e3) + map.charAt(e4); + } + return o; + }, + decode: function b64_decode(input) { + var o = ""; + var c1, c2, c3; + var e1, e2, e3, e4; + input = input.replace(/[^\w\+\/\=]/g, ""); + for(var i = 0; i < input.length;) { + e1 = map.indexOf(input.charAt(i++)); + e2 = map.indexOf(input.charAt(i++)); + c1 = (e1 << 2) | (e2 >> 4); + o += String.fromCharCode(c1); + + e3 = map.indexOf(input.charAt(i++)); + c2 = ((e2 & 15) << 4) | (e3 >> 2); + if (e3 !== 64) { o += String.fromCharCode(c2); } + + e4 = map.indexOf(input.charAt(i++)); + c3 = ((e3 & 3) << 6) | e4; + if (e4 !== 64) { o += String.fromCharCode(c3); } + } + return o; + } + }; +})(); +var has_buf = (typeof Buffer !== 'undefined' && typeof process !== 'undefined' && typeof process.versions !== 'undefined' && process.versions.node); + +function new_raw_buf(len) { + /* jshint -W056 */ + // $FlowIgnore + return new (has_buf ? Buffer : Array)(len); + /* jshint +W056 */ +} + +var s2a = function s2a(s) { + if(has_buf) return new Buffer(s, "binary"); + return s.split("").map(function(x){ return x.charCodeAt(0) & 0xff; }); +}; + +var chr0 = /\u0000/g, chr1 = /[\u0001-\u0006]/; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports = {}; -exports.version = '0.12.1'; +exports.version = '0.13.0'; +/* [MS-CFB] 2.6.4 */ +function namecmp(l, r) { + var L = l.split("/"), R = r.split("/"); + for(var i = 0, c = 0, Z = Math.min(L.length, R.length); i < Z; ++i) { + if((c = L[i].length - R[i].length)) return c; + if(L[i] != R[i]) return L[i] < R[i] ? -1 : 1; + } + return L.length - R.length; +} +function dirname(p) { + if(p.charAt(p.length - 1) == "/") return (p.slice(0,-1).indexOf("/") === -1) ? p : dirname(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(0, c+1); +} + +function filename(p) { + if(p.charAt(p.length - 1) == "/") return filename(p.slice(0, -1)); + var c = p.lastIndexOf("/"); + return (c === -1) ? p : p.slice(c+1); +} function parse(file, options) { -var mver = 3; // major version -var ssz = 512; // sector size +var mver = 3; +var ssz = 512; var nmfs = 0; // number of mini FAT sectors -var ndfs = 0; // number of DIFAT sectors -var dir_start = 0; // first directory sector location -var minifat_start = 0; // first mini FAT sector location -var difat_start = 0; // first mini FAT sector location +var difat_sec_cnt = 0; +var dir_start = 0; +var minifat_start = 0; +var difat_start = 0; var fat_addrs = []; // locations of FAT sectors @@ -38,11 +121,10 @@ var header = file.slice(0,ssz); check_shifts(blob, mver); // Number of Directory Sectors -var nds = blob.read_shift(4, 'i'); -if(mver === 3 && nds !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + nds); +var dir_cnt = blob.read_shift(4, 'i'); +if(mver === 3 && dir_cnt !== 0) throw new Error('# Directory Sectors: Expected 0 saw ' + dir_cnt); // Number of FAT Sectors -//var nfs = blob.read_shift(4, 'i'); blob.l += 4; // First Directory Sector Location @@ -64,7 +146,7 @@ nmfs = blob.read_shift(4, 'i'); difat_start = blob.read_shift(4, 'i'); // Number of DIFAT Sectors -ndfs = blob.read_shift(4, 'i'); +difat_sec_cnt = blob.read_shift(4, 'i'); // Grab FAT Sector Locations for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ @@ -76,7 +158,7 @@ for(var q = -1, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */ /** Break the file up into sectors */ var sectors = sectorify(file, ssz); -sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs); +sleuth_fat(difat_start, difat_sec_cnt, sectors, ssz, fat_addrs); /** Chains */ var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz); @@ -92,19 +174,17 @@ var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {}; read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex); build_full_paths(FileIndex, FullPathDir, FullPaths, Paths); +Paths.shift(); -var root_name = Paths.shift(); - -/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ -var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name); - -return { - raw: {header: header, sectors: sectors}, +var o = { FileIndex: FileIndex, FullPaths: FullPaths, - FullPathDir: FullPathDir, - find: find_path + FullPathDir: FullPathDir }; + +// $FlowIgnore +if(options && options.raw) o.raw = {header: header, sectors: sectors}; +return o; } // parse /* [MS-CFB] 2.2 Compound File Header -- read up to major version */ @@ -191,25 +271,8 @@ function build_full_paths(FI, FPD, FP, Paths) { } } -/* [MS-CFB] 2.6.4 */ -function make_find_path(FullPaths, Paths, FileIndex, files, root_name) { - var UCFullPaths = []; - var UCPaths = [], i = 0; - for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!'); - return function find_path(path) { - var k = false; - if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; } - else k = path.indexOf("/") !== -1; - var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!'); - var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); - if(w === -1) return null; - return k === true ? FileIndex[w] : files[Paths[w]]; - }; -} - /** Chase down the rest of the DIFAT chain to build a comprehensive list - DIFAT chains by storing the next sector number as the last 32 bytes */ + DIFAT chains by storing the next sector number as the last 32 bits */ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { var q = ENDOFCHAIN; if(idx === ENDOFCHAIN) { @@ -227,7 +290,6 @@ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) { /** Follow the linked list of sectors for a given starting point */ function get_sector_list(sectors, start, fat_addrs, ssz, chkd) { - var sl = sectors.length; var buf = [], buf_chain = []; if(!chkd) chkd = []; var modulus = ssz - 1, j = 0, jj = 0; @@ -326,22 +388,254 @@ function read_date(blob, offset) { } var fs; -function readFileSync(filename, options) { +function read_file(filename, options) { if(fs == null) fs = require('fs'); return parse(fs.readFileSync(filename), options); } -function readSync(blob, options) { +function read(blob, options) { switch(options && options.type || "base64") { - case "file": return readFileSync(blob, options); + case "file": return read_file(blob, options); case "base64": return parse(s2a(Base64.decode(blob)), options); case "binary": return parse(s2a(blob), options); } return parse(blob, options); } +function init_cfb(cfb, opts) { + var o = opts || {}, root = o.root || "Root Entry"; + if(!cfb.FullPaths) cfb.FullPaths = []; + if(!cfb.FileIndex) cfb.FileIndex = []; + if(cfb.FullPaths.length !== cfb.FileIndex.length) throw new Error("inconsistent CFB structure"); + if(cfb.FullPaths.length === 0) { + cfb.FullPaths[0] = root + "/"; + cfb.FileIndex[0] = ({ name: root, type: 5 }); + } + if(o.CLSID) cfb.FileIndex[0].clsid = o.CLSID; + seed_cfb(cfb); +} +function seed_cfb(cfb) { + var nm = "\u0001Sh33tJ5"; + if(CFB.find(cfb, "/" + nm)) return; + var p = new_buf(4); p[0] = 55; p[1] = p[3] = 50; p[2] = 54; + cfb.FileIndex.push(({ name: nm, type: 2, content:p, size:4, L:69, R:69, C:69 })); + cfb.FullPaths.push(cfb.FullPaths[0] + nm); + rebuild_cfb(cfb); +} +function rebuild_cfb(cfb, f) { + init_cfb(cfb); + var gc = false, s = false; + for(var i = cfb.FullPaths.length - 1; i >= 0; --i) { + var _file = cfb.FileIndex[i]; + switch(_file.type) { + case 0: + if(s) gc = true; + else { cfb.FileIndex.pop(); cfb.FullPaths.pop(); } + break; + case 1: case 2: case 5: + s = true; + if(isNaN(_file.R * _file.L * _file.C)) gc = true; + if(_file.R > -1 && _file.L > -1 && _file.R == _file.L) gc = true; + break; + default: gc = true; break; + } + } + if(!gc && !f) return; + + var now = new Date(), j = 0; + var data = []; + for(i = 0; i < cfb.FullPaths.length; ++i) { + if(cfb.FileIndex[i].type === 0) continue; + data.push([cfb.FullPaths[i], cfb.FileIndex[i]]); + } + for(i = 0; i < data.length; ++i) { + var dad = dirname(data[i][0]); + s = false; + for(j = 0; j < data.length; ++j) if(data[j][0] === dad) s = true; + if(!s) data.push([dad, ({ + name: filename(dad).replace("/",""), + type: 1, + clsid: HEADER_CLSID, + ct: now, mt: now, + content: null + })]); + } + + data.sort(function(x,y) { return namecmp(x[0], y[0]); }); + cfb.FullPaths = []; cfb.FileIndex = []; + for(i = 0; i < data.length; ++i) { cfb.FullPaths[i] = data[i][0]; cfb.FileIndex[i] = data[i][1]; } + for(i = 0; i < data.length; ++i) { + var elt = cfb.FileIndex[i]; + var nm = cfb.FullPaths[i]; + + elt.name = filename(nm).replace("/",""); + elt.L = elt.R = elt.C = -(elt.color = 1); + elt.size = elt.content ? elt.content.length : 0; + elt.start = 0; + elt.clsid = (elt.clsid || HEADER_CLSID); + if(i === 0) { + elt.C = data.length > 1 ? 1 : -1; + elt.size = 0; + elt.type = 5; + } else if(nm.slice(-1) == "/") { + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==nm) break; + elt.C = j >= data.length ? -1 : j; + for(j=i+1;j < data.length; ++j) if(dirname(cfb.FullPaths[j])==dirname(nm)) break; + elt.R = j >= data.length ? -1 : j; + elt.type = 1; + } else { + if(dirname(cfb.FullPaths[i+1]||"") == dirname(nm)) elt.R = i + 1; + elt.type = 2; + } + } + +} + +function _write(cfb, options) { + rebuild_cfb(cfb); + var L = (function(cfb){ + var mini_size = 0, fat_size = 0; + for(var i = 0; i < cfb.FileIndex.length; ++i) { + var file = cfb.FileIndex[i]; + if(!file.content) continue; +var flen = file.content.length; + if(flen === 0){} + else if(flen < 0x1000) mini_size += (flen + 0x3F) >> 6; + else fat_size += (flen + 0x01FF) >> 9; + } + var dir_cnt = (cfb.FullPaths.length +3) >> 2; + var mini_cnt = (mini_size + 7) >> 3; + var mfat_cnt = (mini_size + 0x7F) >> 7; + var fat_base = mini_cnt + fat_size + dir_cnt + mfat_cnt; + var fat_cnt = (fat_base + 0x7F) >> 7; + var difat_cnt = fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + while(((fat_base + fat_cnt + difat_cnt + 0x7F) >> 7) > fat_cnt) difat_cnt = ++fat_cnt <= 109 ? 0 : Math.ceil((fat_cnt-109)/0x7F); + var L = [1, difat_cnt, fat_cnt, mfat_cnt, dir_cnt, fat_size, mini_size, 0]; + cfb.FileIndex[0].size = mini_size << 6; + L[7] = (cfb.FileIndex[0].start=L[0]+L[1]+L[2]+L[3]+L[4]+L[5])+((L[6]+7) >> 3); + return L; + })(cfb); + var o = new_buf(L[7] << 9); + var i = 0, T = 0; + { + for(i = 0; i < 8; ++i) o.write_shift(1, HEADER_SIG[i]); + for(i = 0; i < 8; ++i) o.write_shift(2, 0); + o.write_shift(2, 0x003E); + o.write_shift(2, 0x0003); + o.write_shift(2, 0xFFFE); + o.write_shift(2, 0x0009); + o.write_shift(2, 0x0006); + for(i = 0; i < 3; ++i) o.write_shift(2, 0); + o.write_shift(4, 0); + o.write_shift(4, L[2]); + o.write_shift(4, L[0] + L[1] + L[2] + L[3] - 1); + o.write_shift(4, 0); + o.write_shift(4, 1<<12); + o.write_shift(4, L[3] ? L[0] + L[1] + L[2] - 1: ENDOFCHAIN); + o.write_shift(4, L[3]); + o.write_shift(-4, L[1] ? L[0] - 1: ENDOFCHAIN); + o.write_shift(4, L[1]); + for(i = 0; i < 109; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + } + if(L[1]) { + for(T = 0; T < L[1]; ++T) { + for(; i < 236 + T * 127; ++i) o.write_shift(-4, i < L[2] ? L[1] + i : -1); + o.write_shift(-4, T === L[1] - 1 ? ENDOFCHAIN : T + 1); + } + } + var chainit = function(w) { + for(T += w; i> 9); + } + chainit((L[6] + 7) >> 3); + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + T = i = 0; + for(j = 0; j < cfb.FileIndex.length; ++j) { + file = cfb.FileIndex[j]; + if(!file.content) continue; +flen = file.content.length; + if(!flen || flen >= 0x1000) continue; + file.start = T; + chainit((flen + 0x3F) >> 6); + } + while(o.l & 0x1FF) o.write_shift(-4, consts.ENDOFCHAIN); + for(i = 0; i < L[4]<<2; ++i) { + var nm = cfb.FullPaths[i]; + if(!nm || nm.length === 0) { + for(j = 0; j < 17; ++j) o.write_shift(4, 0); + for(j = 0; j < 3; ++j) o.write_shift(4, -1); + for(j = 0; j < 12; ++j) o.write_shift(4, 0); + continue; + } + file = cfb.FileIndex[i]; + if(i === 0) file.start = file.size ? file.start - 1 : ENDOFCHAIN; + flen = 2*(file.name.length+1); + o.write_shift(64, file.name, "utf16le"); + o.write_shift(2, flen); + o.write_shift(1, file.type); + o.write_shift(1, file.color); + o.write_shift(-4, file.L); + o.write_shift(-4, file.R); + o.write_shift(-4, file.C); + if(!file.clsid) for(j = 0; j < 4; ++j) o.write_shift(4, 0); + else o.write_shift(16, file.clsid, "hex"); + o.write_shift(4, file.state || 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, 0); o.write_shift(4, 0); + o.write_shift(4, file.start); + o.write_shift(4, file.size); o.write_shift(4, 0); + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size >= 0x1000) { + o.l = (file.start+1) << 9; + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x1FF; ++j) o.write_shift(1, 0); + } + } + for(i = 1; i < cfb.FileIndex.length; ++i) { + file = cfb.FileIndex[i]; +if(file.size > 0 && file.size < 0x1000) { + for(j = 0; j < file.size; ++j) o.write_shift(1, file.content[j]); + for(; j & 0x3F; ++j) o.write_shift(1, 0); + } + } + + return o; +} +/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */ function find(cfb, path) { - return cfb.find(path); + //return cfb.find(path); + var UCFullPaths = cfb.FullPaths.map(function(x) { return x.toUpperCase(); }); + var UCPaths = UCFullPaths.map(function(x) { var y = x.split("/"); return y[y.length - (x.slice(-1) == "/" ? 2 : 1)]; }); + var k = false; + if(path.charCodeAt(0) === 47 /* "/" */) { k = true; path = UCFullPaths[0].slice(0, -1) + path; } + else k = path.indexOf("/") !== -1; + var UCPath = path.toUpperCase(); + var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath); + if(w !== -1) return cfb.FileIndex[w]; + + UCPath = UCPath.replace(chr0,'').replace(chr1,'!'); + for(w = 0; w < UCFullPaths.length; ++w) { + if(UCFullPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + if(UCPaths[w].replace(chr0,'').replace(chr1,'!') == UCPath) return cfb.FileIndex[w]; + } + return null; } /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ @@ -350,6 +644,7 @@ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ var ENDOFCHAIN = -2; /* 2.2 Compound File Header */ var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1'; +var HEADER_SIG = [0xD0, 0xCF, 0x11, 0xE0, 0xA1, 0xB1, 0x1A, 0xE1]; var HEADER_CLSID = '00000000000000000000000000000000'; var consts = { /* 2.1 Compund File Sector Numbers and Types */ @@ -368,10 +663,84 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +function write_file(cfb, filename, options) { + var o = _write(cfb, options); +fs.writeFileSync(filename, o); +} + +function a2s(o) { + var out = new Array(o.length); + for(var i = 0; i < o.length; ++i) out[i] = String.fromCharCode(o[i]); + return out.join(""); +} + +function write(cfb, options) { + var o = _write(cfb, options); + switch(options && options.type) { + case "file": fs.writeFileSync(options.filename, (o)); return o; + case "binary": return a2s(o); + case "base64": return Base64.encode(a2s(o)); + } + return o; +} +function cfb_new(opts) { + var o = ({}); + init_cfb(o, opts); + return o; +} + +function cfb_add(cfb, name, content, opts) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(!file) { + file = ({name: filename(name)}); + cfb.FileIndex.push(file); + cfb.FullPaths.push(name); + CFB.utils.cfb_gc(cfb); + } +file.content = (content); + file.size = content ? content.length : 0; + if(opts) { + if(opts.CLSID) file.clsid = opts.CLSID; + } + return file; +} + +function cfb_del(cfb, name) { + init_cfb(cfb); + var file = CFB.find(cfb, name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex.splice(j, 1); + cfb.FullPaths.splice(j, 1); + return true; + } + return false; +} + +function cfb_mov(cfb, old_name, new_name) { + init_cfb(cfb); + var file = CFB.find(cfb, old_name); + if(file) for(var j = 0; j < cfb.FileIndex.length; ++j) if(cfb.FileIndex[j] == file) { + cfb.FileIndex[j].name = filename(new_name); + cfb.FullPaths[j] = new_name; + return true; + } + return false; +} + +function cfb_gc(cfb) { rebuild_cfb(cfb, true); } + exports.find = find; -exports.read = readSync; +exports.read = read; exports.parse = parse; +exports.write = write; +exports.writeFile = write_file; exports.utils = { + cfb_new: cfb_new, + cfb_add: cfb_add, + cfb_del: cfb_del, + cfb_mov: cfb_mov, + cfb_gc: cfb_gc, ReadShift: ReadShift, CheckField: CheckField, prep_blob: prep_blob,