version bump 0.10.2: proper directory/FAT analysis

- reprocess directory sector lists if not available
- search now strips out OLE metacharacters \0-\5
- strip sourceMappingURL references from minified scripts (h/t @vinin3)
- added xlscfb.js for js-xls
This commit is contained in:
SheetJS 2014-11-02 23:02:42 -05:00
parent 597de28cf5
commit fcc05e3567
21 changed files with 984 additions and 69 deletions

8
.gitignore vendored

@ -1,6 +1,8 @@
test_files
node_modules/
*.xls
node_modules
misc/coverage.html
prof.js
v8.log
test_files
test_files_pres
*.xls
*.sheetjs

@ -1,7 +1,16 @@
LIB=cfb
FMT=xls doc ppt misc full
REQS=
ADDONS=
AUXTARGETS=xlscfb.js
ULIB=$(shell echo $(LIB) | tr a-z A-Z)
DEPS=$(sort $(wildcard bits/*.js))
TARGET=$(LIB).js
.PHONY: all
all: $(TARGET) $(AUXTARGETS)
$(TARGET): $(DEPS)
cat $^ | tr -d '\15\32' > $@
@ -11,32 +20,48 @@ bits/31_version.js: package.json
.PHONY: clean
clean:
rm -f $(TARGET)
rm -rf ./test_files/
.PHONY: clean-data
clean-data:
rm -fr ./test_files/ ./test_files_pres/
.PHONY: init
init:
if [ ! -e test_files ]; then git clone https://github.com/SheetJS/test_files; fi
cd test_files; git pull; make
if [ ! -e test_files_pres ]; then git clone https://github.com/SheetJS/test_files_pres; fi
cd test_files_pres; git pull
.PHONY: test mocha
test mocha: test.js
mocha -R spec
mocha -R spec -t 20000
.PHONY: prof
prof:
cat misc/prof.js test.js > prof.js
node --prof prof.js
TESTFMT=$(patsubst %,test_%,$(FMT))
.PHONY: $(TESTFMT)
$(TESTFMT): test_%:
FMTS=$* make test
.PHONY: lint
lint: $(TARGET)
jshint --show-non-errors $(TARGET)
jscs $(TARGET)
jshint --show-non-errors $(TARGET) $(AUXTARGETS)
jscs $(TARGET) $(AUXTARGETS)
.PHONY: cov cov-spin
cov: misc/coverage.html
cov-spin:
make cov & bash misc/spin.sh $$!
COVFMT=$(patsubst %,cov_%,$(FMT))
.PHONY: $(COVFMT)
$(COVFMT): cov_%:
FMTS=$* make cov
misc/coverage.html: $(TARGET) test.js
mocha --require blanket -R html-cov > $@
@ -48,7 +73,22 @@ coveralls-spin:
make coveralls & bash misc/spin.sh $$!
.PHONY: dist
dist: $(TARGET)
dist: dist-deps $(TARGET)
cp $(TARGET) dist/
cp LICENSE dist/
uglifyjs $(TARGET) -o dist/$(LIB).min.js --source-map dist/$(LIB).min.map --preamble "$$(head -n 1 bits/00_header.js)"
misc/strip_sourcemap.sh dist/$(LIB).min.js
.PHONY: aux
aux: $(AUXTARGETS)
.PHONY: xls
xls: xlscfb.js
XLSDEPS=misc/suppress_export.js $(filter-out bits/08_blob.js,$(DEPS))
xlscfb.js: $(XLSDEPS)
cat $^ | tr -d '\15\32' > $@
.PHONY: dist-deps
dist-deps: xlscfb.js
cp xlscfb.js dist/xlscfb.js

@ -1,8 +1,8 @@
# Compound File Binary Format
# Compound File Binary Format
This is a Pure-JS implementation of MS-CFB: Compound File Binary File Format, a
format used in many Microsoft file types (such as XLS, DOC, and other Microsoft
Office file types).
Office file types).
# Utility Installation and Usage
@ -14,7 +14,7 @@ $ cfb path/to/CFB/file
```
The command will extract the storages and streams in the container, generating
files that line up with the tree-based structure of the storage. Metadata
files that line up with the tree-based structure of the storage. Metadata
such as the red-black tree are discarded (and in the future, new CFB containers
will exclusively use black nodes)
@ -55,9 +55,9 @@ It has the following properties and methods:
- `.find(path)` performs a case-insensitive match for the path (or file name, if
there are no slashes) and returns an entry object (described later) or null if
not found
not found
- `.FullPaths` is an array of the names of all of the streams (files) and
- `.FullPaths` is an array of the names of all of the streams (files) and
storages (directories) in the container. The paths are properly prefixed from
the root entry (so the entries are unique)
@ -67,24 +67,27 @@ It has the following properties and methods:
- `.FileIndex` is an array of the objects from `.FullPathDir`, in the same order
as `.FullPaths`.
- `.raw` contains the raw header and sectors
- `.raw` contains the raw header and sectors
## Entry Object Description
The entry objects are available from `FullPathDir` and `FileIndex` elements of the
container object.
The entry objects are available from `FullPathDir` and `FileIndex` elements of
the container object.
- `.name` is the (case sensitive) internal name
- `.type` is the type (`2 (stream)` for files, `1 (storage)` for dirs, `5 (root)` for root)
- `.type` is the type as defined in "Object Type" in [MS-CFB] 2.6.1:
`2 (stream)` for files, `1 (storage)` for dirs, `5 (root)` for root)
- `.content` is a Buffer/Array with the raw content
- `.ct`/`.mt` are the creation and modification time (if provided in file)
# Notes
Case comparison has not been verified for non-ASCII character
Case comparison has not been verified for non-ASCII characters
Writing is not supported. It is in the works, but it has not yet been released.
The `xlscfb.js` file is designed to be embedded in [js-xls](http://git.io/xls)
# License
This implementation is covered under Apache 2.0 license. It complies with the

@ -5,7 +5,9 @@ var fs = require('fs'), program = require('commander');
program
.version(CFB.version)
.usage('[options] <file>')
.option('-q, --quiet', 'print but do not extract')
.option('-q, --quiet', 'process but do not report')
.option('-d, --dump', 'dump internal representation but do not extract')
.option('--dev', 'development mode')
.parse(process.argv);
if(program.args.length === 0 || !fs.existsSync(program.args[0])) {
@ -13,15 +15,17 @@ if(program.args.length === 0 || !fs.existsSync(program.args[0])) {
process.exit(1);
}
var cfb = CFB.read(program.args[0], {type:'file'});
if(program.quiet) {
var opts = {type:'file'};
if(program.dev) opts.WTF = true;
var cfb = CFB.read(program.args[0], opts);
if(program.dump) {
console.log("Full Paths:")
console.log(cfb.FullPaths.map(function(x) { return " " + x; }).join("\n"));
console.log("Full Path Directory:")
console.log(cfb.FullPathDir);
return;
}
for(var i=0; i != cfb.FullPaths.length; ++i) {
if(!program.quiet && !program.dump) for(var i=0; i!=cfb.FullPaths.length; ++i) {
if(cfb.FullPaths[i].slice(-1) === "/") {
console.error("mkdir " + cfb.FullPaths[i]);
fs.mkdirSync(cfb.FullPaths[i]);

@ -1 +1 @@
exports.version = '0.10.1';
exports.version = '0.10.2';

@ -14,7 +14,8 @@ var blob = file.slice(0,512);
prep_blob(blob, 0);
/* major version */
mver = check_get_mver(blob);
var mv = check_get_mver(blob);
mver = mv[0];
switch(mver) {
case 3: ssz = 512; break; case 4: ssz = 4096; break;
default: throw "Major Version: Expected 3 or 4 saw " + mver;
@ -74,6 +75,8 @@ var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz);
sector_list[dir_start].name = "!Directory";
if(nmfs > 0 && minifat_start !== ENDOFCHAIN) sector_list[minifat_start].name = "!MiniFAT";
sector_list[fat_addrs[0]].name = "!FAT";
sector_list.fat_addrs = fat_addrs;
sector_list.ssz = ssz;
/* [MS-CFB] 2.6.1 Compound File Directory Entry */
var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {};

@ -7,9 +7,9 @@ function check_get_mver(blob) {
blob.chk(HEADER_CLSID, 'CLSID: ');
// minor version 2
blob.l += 2;
var mver = blob.read_shift(2, 'u');
return blob.read_shift(2,'u');
return [blob.read_shift(2,'u'), mver];
}
function check_shifts(blob, mver) {
var shift = 0x09;

@ -2,13 +2,13 @@
function make_find_path(FullPaths, Paths, FileIndex, files, root_name) {
var UCFullPaths = new Array(FullPaths.length);
var UCPaths = new Array(Paths.length), i;
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase();
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase();
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
return function find_path(path) {
var k;
if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; }
else k = path.indexOf("/") !== -1;
var UCPath = path.toUpperCase();
var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!');
var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath);
if(w === -1) return null;
return k === true ? FileIndex[w] : files[Paths[w]];

@ -14,6 +14,26 @@ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) {
}
}
/** Follow the linked list of sectors for a given starting point */
function get_sector_list(sectors, start, fat_addrs, ssz, chkd) {
var sl = sectors.length;
var buf, buf_chain;
if(!chkd) chkd = new Array(sl);
var modulus = ssz - 1, j, jj;
buf = [];
buf_chain = [];
for(j=start; j>=0;) {
chkd[j] = true;
buf[buf.length] = j;
buf_chain.push(sectors[j]);
var addr = fat_addrs[Math.floor(j*4/ssz)];
jj = ((j*4) & modulus);
if(ssz < 4 + jj) throw "FAT boundary crossed: " + j + " 4 "+ssz;
j = __readInt32LE(sectors[addr], jj);
}
return {nodes: buf, data:__toBuffer([buf_chain])};
}
/** Chase down the sector linked lists */
function make_sector_list(sectors, dir_start, fat_addrs, ssz) {
var sl = sectors.length, sector_list = new Array(sl);

@ -37,7 +37,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
/*minifat_size = o.size;*/
} else if(o.size >= 4096 /* MSCSZ */) {
o.storage = 'fat';
if(sector_list[o.start] === undefined) if((o.start+=dir_start)>=sectors.length) o.start-=sectors.length;
if(sector_list[o.start] === undefined) sector_list[o.start] = get_sector_list(sectors, o.start, sector_list.fat_addrs, sector_list.ssz);
sector_list[o.start].name = o.name;
o.content = sector_list[o.start].data.slice(0,o.size);
prep_blob(o.content, 0);

@ -1,14 +1,14 @@
var fs;
function readFileSync(filename) {
function readFileSync(filename, options) {
if(fs === undefined) fs = require('fs');
return parse(fs.readFileSync(filename));
return parse(fs.readFileSync(filename), options);
}
function readSync(blob, options) {
switch(options !== undefined && options.type !== undefined ? options.type : "base64") {
case "file": return readFileSync(blob);
case "base64": return parse(s2a(Base64.decode(blob)));
case "binary": return parse(s2a(blob));
case "file": return readFileSync(blob, options);
case "base64": return parse(s2a(Base64.decode(blob)), options);
case "binary": return parse(s2a(blob), options);
}
return parse(blob);
}

49
cfb.js

@ -86,7 +86,7 @@ function prep_blob(blob, pos) {
/* [MS-CFB] v20130118 */
var CFB = (function _CFB(){
var exports = {};
exports.version = '0.10.1';
exports.version = '0.10.2';
function parse(file) {
var mver = 3; // major version
var ssz = 512; // sector size
@ -103,7 +103,8 @@ var blob = file.slice(0,512);
prep_blob(blob, 0);
/* major version */
mver = check_get_mver(blob);
var mv = check_get_mver(blob);
mver = mv[0];
switch(mver) {
case 3: ssz = 512; break; case 4: ssz = 4096; break;
default: throw "Major Version: Expected 3 or 4 saw " + mver;
@ -163,6 +164,8 @@ var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz);
sector_list[dir_start].name = "!Directory";
if(nmfs > 0 && minifat_start !== ENDOFCHAIN) sector_list[minifat_start].name = "!MiniFAT";
sector_list[fat_addrs[0]].name = "!FAT";
sector_list.fat_addrs = fat_addrs;
sector_list.ssz = ssz;
/* [MS-CFB] 2.6.1 Compound File Directory Entry */
var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {};
@ -194,9 +197,9 @@ function check_get_mver(blob) {
blob.chk(HEADER_CLSID, 'CLSID: ');
// minor version 2
blob.l += 2;
var mver = blob.read_shift(2, 'u');
return blob.read_shift(2,'u');
return [blob.read_shift(2,'u'), mver];
}
function check_shifts(blob, mver) {
var shift = 0x09;
@ -272,13 +275,13 @@ function build_full_paths(FI, FPD, FP, Paths) {
function make_find_path(FullPaths, Paths, FileIndex, files, root_name) {
var UCFullPaths = new Array(FullPaths.length);
var UCPaths = new Array(Paths.length), i;
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase();
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase();
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
return function find_path(path) {
var k;
if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; }
else k = path.indexOf("/") !== -1;
var UCPath = path.toUpperCase();
var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!');
var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath);
if(w === -1) return null;
return k === true ? FileIndex[w] : files[Paths[w]];
@ -301,6 +304,26 @@ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) {
}
}
/** Follow the linked list of sectors for a given starting point */
function get_sector_list(sectors, start, fat_addrs, ssz, chkd) {
var sl = sectors.length;
var buf, buf_chain;
if(!chkd) chkd = new Array(sl);
var modulus = ssz - 1, j, jj;
buf = [];
buf_chain = [];
for(j=start; j>=0;) {
chkd[j] = true;
buf[buf.length] = j;
buf_chain.push(sectors[j]);
var addr = fat_addrs[Math.floor(j*4/ssz)];
jj = ((j*4) & modulus);
if(ssz < 4 + jj) throw "FAT boundary crossed: " + j + " 4 "+ssz;
j = __readInt32LE(sectors[addr], jj);
}
return {nodes: buf, data:__toBuffer([buf_chain])};
}
/** Chase down the sector linked lists */
function make_sector_list(sectors, dir_start, fat_addrs, ssz) {
var sl = sectors.length, sector_list = new Array(sl);
@ -364,7 +387,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
/*minifat_size = o.size;*/
} else if(o.size >= 4096 /* MSCSZ */) {
o.storage = 'fat';
if(sector_list[o.start] === undefined) if((o.start+=dir_start)>=sectors.length) o.start-=sectors.length;
if(sector_list[o.start] === undefined) sector_list[o.start] = get_sector_list(sectors, o.start, sector_list.fat_addrs, sector_list.ssz);
sector_list[o.start].name = o.name;
o.content = sector_list[o.start].data.slice(0,o.size);
prep_blob(o.content, 0);
@ -385,16 +408,16 @@ function read_date(blob, offset) {
}
var fs;
function readFileSync(filename) {
function readFileSync(filename, options) {
if(fs === undefined) fs = require('fs');
return parse(fs.readFileSync(filename));
return parse(fs.readFileSync(filename), options);
}
function readSync(blob, options) {
switch(options !== undefined && options.type !== undefined ? options.type : "base64") {
case "file": return readFileSync(blob);
case "base64": return parse(s2a(Base64.decode(blob)));
case "binary": return parse(s2a(blob));
case "file": return readFileSync(blob, options);
case "base64": return parse(s2a(Base64.decode(blob)), options);
case "binary": return parse(s2a(blob), options);
}
return parse(blob);
}

49
dist/cfb.js vendored

@ -86,7 +86,7 @@ function prep_blob(blob, pos) {
/* [MS-CFB] v20130118 */
var CFB = (function _CFB(){
var exports = {};
exports.version = '0.10.1';
exports.version = '0.10.2';
function parse(file) {
var mver = 3; // major version
var ssz = 512; // sector size
@ -103,7 +103,8 @@ var blob = file.slice(0,512);
prep_blob(blob, 0);
/* major version */
mver = check_get_mver(blob);
var mv = check_get_mver(blob);
mver = mv[0];
switch(mver) {
case 3: ssz = 512; break; case 4: ssz = 4096; break;
default: throw "Major Version: Expected 3 or 4 saw " + mver;
@ -163,6 +164,8 @@ var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz);
sector_list[dir_start].name = "!Directory";
if(nmfs > 0 && minifat_start !== ENDOFCHAIN) sector_list[minifat_start].name = "!MiniFAT";
sector_list[fat_addrs[0]].name = "!FAT";
sector_list.fat_addrs = fat_addrs;
sector_list.ssz = ssz;
/* [MS-CFB] 2.6.1 Compound File Directory Entry */
var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {};
@ -194,9 +197,9 @@ function check_get_mver(blob) {
blob.chk(HEADER_CLSID, 'CLSID: ');
// minor version 2
blob.l += 2;
var mver = blob.read_shift(2, 'u');
return blob.read_shift(2,'u');
return [blob.read_shift(2,'u'), mver];
}
function check_shifts(blob, mver) {
var shift = 0x09;
@ -272,13 +275,13 @@ function build_full_paths(FI, FPD, FP, Paths) {
function make_find_path(FullPaths, Paths, FileIndex, files, root_name) {
var UCFullPaths = new Array(FullPaths.length);
var UCPaths = new Array(Paths.length), i;
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase();
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase();
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
return function find_path(path) {
var k;
if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; }
else k = path.indexOf("/") !== -1;
var UCPath = path.toUpperCase();
var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!');
var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath);
if(w === -1) return null;
return k === true ? FileIndex[w] : files[Paths[w]];
@ -301,6 +304,26 @@ function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) {
}
}
/** Follow the linked list of sectors for a given starting point */
function get_sector_list(sectors, start, fat_addrs, ssz, chkd) {
var sl = sectors.length;
var buf, buf_chain;
if(!chkd) chkd = new Array(sl);
var modulus = ssz - 1, j, jj;
buf = [];
buf_chain = [];
for(j=start; j>=0;) {
chkd[j] = true;
buf[buf.length] = j;
buf_chain.push(sectors[j]);
var addr = fat_addrs[Math.floor(j*4/ssz)];
jj = ((j*4) & modulus);
if(ssz < 4 + jj) throw "FAT boundary crossed: " + j + " 4 "+ssz;
j = __readInt32LE(sectors[addr], jj);
}
return {nodes: buf, data:__toBuffer([buf_chain])};
}
/** Chase down the sector linked lists */
function make_sector_list(sectors, dir_start, fat_addrs, ssz) {
var sl = sectors.length, sector_list = new Array(sl);
@ -364,7 +387,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
/*minifat_size = o.size;*/
} else if(o.size >= 4096 /* MSCSZ */) {
o.storage = 'fat';
if(sector_list[o.start] === undefined) if((o.start+=dir_start)>=sectors.length) o.start-=sectors.length;
if(sector_list[o.start] === undefined) sector_list[o.start] = get_sector_list(sectors, o.start, sector_list.fat_addrs, sector_list.ssz);
sector_list[o.start].name = o.name;
o.content = sector_list[o.start].data.slice(0,o.size);
prep_blob(o.content, 0);
@ -385,16 +408,16 @@ function read_date(blob, offset) {
}
var fs;
function readFileSync(filename) {
function readFileSync(filename, options) {
if(fs === undefined) fs = require('fs');
return parse(fs.readFileSync(filename));
return parse(fs.readFileSync(filename), options);
}
function readSync(blob, options) {
switch(options !== undefined && options.type !== undefined ? options.type : "base64") {
case "file": return readFileSync(blob);
case "base64": return parse(s2a(Base64.decode(blob)));
case "binary": return parse(s2a(blob));
case "file": return readFileSync(blob, options);
case "base64": return parse(s2a(Base64.decode(blob)), options);
case "binary": return parse(s2a(blob), options);
}
return parse(blob);
}

3
dist/cfb.min.js vendored

File diff suppressed because one or more lines are too long

2
dist/cfb.min.map vendored

File diff suppressed because one or more lines are too long

383
dist/xlscfb.js vendored Normal file

@ -0,0 +1,383 @@
var DO_NOT_EXPORT_CFB = true;
/* cfb.js (C) 2013-2014 SheetJS -- http://sheetjs.com */
/* vim: set ts=2: */
/*jshint eqnull:true */
/* [MS-CFB] v20130118 */
var CFB = (function _CFB(){
var exports = {};
exports.version = '0.10.2';
function parse(file) {
var mver = 3; // major version
var ssz = 512; // sector size
var nmfs = 0; // number of mini FAT sectors
var ndfs = 0; // number of DIFAT sectors
var dir_start = 0; // first directory sector location
var minifat_start = 0; // first mini FAT sector location
var difat_start = 0; // first mini FAT sector location
var fat_addrs = []; // locations of FAT sectors
/* [MS-CFB] 2.2 Compound File Header */
var blob = file.slice(0,512);
prep_blob(blob, 0);
/* major version */
var mv = check_get_mver(blob);
mver = mv[0];
switch(mver) {
case 3: ssz = 512; break; case 4: ssz = 4096; break;
default: throw "Major Version: Expected 3 or 4 saw " + mver;
}
/* reprocess header */
if(ssz !== 512) { blob = file.slice(0,ssz); prep_blob(blob, 28 /* blob.l */); }
/* Save header for final object */
var header = file.slice(0,ssz);
check_shifts(blob, mver);
// Number of Directory Sectors
var nds = blob.read_shift(4, 'i');
if(mver === 3 && nds !== 0) throw '# Directory Sectors: Expected 0 saw ' + nds;
// Number of FAT Sectors
//var nfs = blob.read_shift(4, 'i');
blob.l += 4;
// First Directory Sector Location
dir_start = blob.read_shift(4, 'i');
// Transaction Signature
blob.l += 4;
// Mini Stream Cutoff Size
blob.chk('00100000', 'Mini Stream Cutoff Size: ');
// First Mini FAT Sector Location
minifat_start = blob.read_shift(4, 'i');
// Number of Mini FAT Sectors
nmfs = blob.read_shift(4, 'i');
// First DIFAT sector location
difat_start = blob.read_shift(4, 'i');
// Number of DIFAT Sectors
ndfs = blob.read_shift(4, 'i');
// Grab FAT Sector Locations
for(var q, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */
q = blob.read_shift(4, 'i');
if(q<0) break;
fat_addrs[j] = q;
}
/** Break the file up into sectors */
var sectors = sectorify(file, ssz);
sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs);
/** Chains */
var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz);
sector_list[dir_start].name = "!Directory";
if(nmfs > 0 && minifat_start !== ENDOFCHAIN) sector_list[minifat_start].name = "!MiniFAT";
sector_list[fat_addrs[0]].name = "!FAT";
sector_list.fat_addrs = fat_addrs;
sector_list.ssz = ssz;
/* [MS-CFB] 2.6.1 Compound File Directory Entry */
var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {};
read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex);
build_full_paths(FileIndex, FullPathDir, FullPaths, Paths);
var root_name = Paths.shift();
Paths.root = root_name;
/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */
var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name);
return {
raw: {header: header, sectors: sectors},
FileIndex: FileIndex,
FullPaths: FullPaths,
FullPathDir: FullPathDir,
find: find_path
};
} // parse
/* [MS-CFB] 2.2 Compound File Header -- read up to major version */
function check_get_mver(blob) {
// header signature 8
blob.chk(HEADER_SIGNATURE, 'Header Signature: ');
// clsid 16
blob.chk(HEADER_CLSID, 'CLSID: ');
// minor version 2
var mver = blob.read_shift(2, 'u');
return [blob.read_shift(2,'u'), mver];
}
function check_shifts(blob, mver) {
var shift = 0x09;
// Byte Order
blob.chk('feff', 'Byte Order: ');
// Sector Shift
switch((shift = blob.read_shift(2))) {
case 0x09: if(mver !== 3) throw 'MajorVersion/SectorShift Mismatch'; break;
case 0x0c: if(mver !== 4) throw 'MajorVersion/SectorShift Mismatch'; break;
default: throw 'Sector Shift: Expected 9 or 12 saw ' + shift;
}
// Mini Sector Shift
blob.chk('0600', 'Mini Sector Shift: ');
// Reserved
blob.chk('000000000000', 'Reserved: ');
}
/** Break the file up into sectors */
function sectorify(file, ssz) {
var nsectors = Math.ceil(file.length/ssz)-1;
var sectors = new Array(nsectors);
for(var i=1; i < nsectors; ++i) sectors[i-1] = file.slice(i*ssz,(i+1)*ssz);
sectors[nsectors-1] = file.slice(nsectors*ssz);
return sectors;
}
/* [MS-CFB] 2.6.4 Red-Black Tree */
function build_full_paths(FI, FPD, FP, Paths) {
var i = 0, L = 0, R = 0, C = 0, j = 0, pl = Paths.length;
var dad = new Array(pl), q = new Array(pl);
for(; i < pl; ++i) { dad[i]=q[i]=i; FP[i]=Paths[i]; }
for(; j < q.length; ++j) {
i = q[j];
L = FI[i].L; R = FI[i].R; C = FI[i].C;
if(dad[i] === i) {
if(L !== -1 /*NOSTREAM*/ && dad[L] !== L) dad[i] = dad[L];
if(R !== -1 && dad[R] !== R) dad[i] = dad[R];
}
if(C !== -1 /*NOSTREAM*/) dad[C] = i;
if(L !== -1) { dad[L] = dad[i]; q.push(L); }
if(R !== -1) { dad[R] = dad[i]; q.push(R); }
}
for(i=1; i !== pl; ++i) if(dad[i] === i) {
if(R !== -1 /*NOSTREAM*/ && dad[R] !== R) dad[i] = dad[R];
else if(L !== -1 && dad[L] !== L) dad[i] = dad[L];
}
for(i=1; i < pl; ++i) {
if(FI[i].type === 0 /* unknown */) continue;
j = dad[i];
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
else while(j !== 0) {
FP[i] = FP[j] + "/" + FP[i];
j = dad[j];
}
dad[i] = 0;
}
FP[0] += "/";
for(i=1; i < pl; ++i) {
if(FI[i].type !== 2 /* stream */) FP[i] += "/";
FPD[FP[i]] = FI[i];
}
}
/* [MS-CFB] 2.6.4 */
function make_find_path(FullPaths, Paths, FileIndex, files, root_name) {
var UCFullPaths = new Array(FullPaths.length);
var UCPaths = new Array(Paths.length), i;
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
return function find_path(path) {
var k;
if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; }
else k = path.indexOf("/") !== -1;
var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!');
var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath);
if(w === -1) return null;
return k === true ? FileIndex[w] : files[Paths[w]];
};
}
/** Chase down the rest of the DIFAT chain to build a comprehensive list
DIFAT chains by storing the next sector number as the last 32 bytes */
function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) {
var q;
if(idx === ENDOFCHAIN) {
if(cnt !== 0) throw "DIFAT chain shorter than expected";
} else if(idx !== -1 /*FREESECT*/) {
var sector = sectors[idx], m = (ssz>>>2)-1;
for(var i = 0; i < m; ++i) {
if((q = __readInt32LE(sector,i*4)) === ENDOFCHAIN) break;
fat_addrs.push(q);
}
sleuth_fat(__readInt32LE(sector,ssz-4),cnt - 1, sectors, ssz, fat_addrs);
}
}
/** Follow the linked list of sectors for a given starting point */
function get_sector_list(sectors, start, fat_addrs, ssz, chkd) {
var sl = sectors.length;
var buf, buf_chain;
if(!chkd) chkd = new Array(sl);
var modulus = ssz - 1, j, jj;
buf = [];
buf_chain = [];
for(j=start; j>=0;) {
chkd[j] = true;
buf[buf.length] = j;
buf_chain.push(sectors[j]);
var addr = fat_addrs[Math.floor(j*4/ssz)];
jj = ((j*4) & modulus);
if(ssz < 4 + jj) throw "FAT boundary crossed: " + j + " 4 "+ssz;
j = __readInt32LE(sectors[addr], jj);
}
return {nodes: buf, data:__toBuffer([buf_chain])};
}
/** Chase down the sector linked lists */
function make_sector_list(sectors, dir_start, fat_addrs, ssz) {
var sl = sectors.length, sector_list = new Array(sl);
var chkd = new Array(sl), buf, buf_chain;
var modulus = ssz - 1, i, j, k, jj;
for(i=0; i < sl; ++i) {
buf = [];
k = (i + dir_start); if(k >= sl) k-=sl;
if(chkd[k] === true) continue;
buf_chain = [];
for(j=k; j>=0;) {
chkd[j] = true;
buf[buf.length] = j;
buf_chain.push(sectors[j]);
var addr = fat_addrs[Math.floor(j*4/ssz)];
jj = ((j*4) & modulus);
if(ssz < 4 + jj) throw "FAT boundary crossed: " + j + " 4 "+ssz;
j = __readInt32LE(sectors[addr], jj);
}
sector_list[k] = {nodes: buf, data:__toBuffer([buf_chain])};
}
return sector_list;
}
/* [MS-CFB] 2.6.1 Compound File Directory Entry */
function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex) {
var blob;
var minifat_store = 0, pl = (Paths.length?2:0);
var sector = sector_list[dir_start].data;
var i = 0, namelen = 0, name, o, ctime, mtime;
for(; i < sector.length; i+= 128) {
blob = sector.slice(i, i+128);
prep_blob(blob, 64);
namelen = blob.read_shift(2);
if(namelen === 0) continue;
name = __utf16le(blob,0,namelen-pl);
Paths.push(name);
o = {
name: name,
type: blob.read_shift(1),
color: blob.read_shift(1),
L: blob.read_shift(4, 'i'),
R: blob.read_shift(4, 'i'),
C: blob.read_shift(4, 'i'),
clsid: blob.read_shift(16),
state: blob.read_shift(4, 'i')
};
ctime = blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2);
if(ctime !== 0) {
o.ctime = ctime; o.ct = read_date(blob, blob.l-8);
}
mtime = blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2);
if(mtime !== 0) {
o.mtime = mtime; o.mt = read_date(blob, blob.l-8);
}
o.start = blob.read_shift(4, 'i');
o.size = blob.read_shift(4, 'i');
if(o.type === 5) { /* root */
minifat_store = o.start;
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
/*minifat_size = o.size;*/
} else if(o.size >= 4096 /* MSCSZ */) {
o.storage = 'fat';
if(sector_list[o.start] === undefined) sector_list[o.start] = get_sector_list(sectors, o.start, sector_list.fat_addrs, sector_list.ssz);
sector_list[o.start].name = o.name;
o.content = sector_list[o.start].data.slice(0,o.size);
prep_blob(o.content, 0);
} else {
o.storage = 'minifat';
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
o.content = sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size);
prep_blob(o.content, 0);
}
}
files[name] = o;
FileIndex.push(o);
}
}
function read_date(blob, offset) {
return new Date(( ( (__readUInt32LE(blob,offset+4)/1e7)*Math.pow(2,32)+__readUInt32LE(blob,offset)/1e7 ) - 11644473600)*1000);
}
var fs;
function readFileSync(filename, options) {
if(fs === undefined) fs = require('fs');
return parse(fs.readFileSync(filename), options);
}
function readSync(blob, options) {
switch(options !== undefined && options.type !== undefined ? options.type : "base64") {
case "file": return readFileSync(blob, options);
case "base64": return parse(s2a(Base64.decode(blob)), options);
case "binary": return parse(s2a(blob), options);
}
return parse(blob);
}
/** CFB Constants */
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
/* 2.1 Compound File Sector Numbers and Types */
var ENDOFCHAIN = -2;
/* 2.2 Compound File Header */
var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1';
var HEADER_CLSID = '00000000000000000000000000000000';
var consts = {
/* 2.1 Compund File Sector Numbers and Types */
MAXREGSECT: -6,
DIFSECT: -4,
FATSECT: -3,
ENDOFCHAIN: ENDOFCHAIN,
FREESECT: -1,
/* 2.2 Compound File Header */
HEADER_SIGNATURE: HEADER_SIGNATURE,
HEADER_MINOR_VERSION: '3e00',
MAXREGSID: -6,
NOSTREAM: -1,
HEADER_CLSID: HEADER_CLSID,
/* 2.6.1 Compound File Directory Entry */
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
};
exports.read = readSync;
exports.parse = parse;
exports.utils = {
ReadShift: ReadShift,
CheckField: CheckField,
prep_blob: prep_blob,
bconcat: bconcat,
consts: consts
};
return exports;
})();
if(typeof require !== 'undefined' && typeof module !== 'undefined' && typeof DO_NOT_EXPORT_CFB === 'undefined') { module.exports = CFB; }

11
misc/strip_sourcemap.sh Executable file

@ -0,0 +1,11 @@
#!/bin/bash
# strip_sourcemap.sh -- strip sourcemaps from a JS file (missing from uglifyjs)
# Copyright (C) 2014 SheetJS
if [ $# -gt 0 ]; then
if [ -e "$1" ]; then
sed -i .sheetjs '/sourceMappingURL/d' "$1"
fi
else
cat - | sed '/sourceMappingURL/d'
fi

1
misc/suppress_export.js Normal file

@ -0,0 +1 @@
var DO_NOT_EXPORT_CFB = true;

@ -1,6 +1,6 @@
{
"name": "cfb",
"version": "0.10.1",
"version": "0.10.2",
"author": "sheetjs",
"description": "Compound File Binary File Format extractor",
"keywords": [ "cfb", "compression", "office" ],

26
test.js

@ -3,21 +3,35 @@ var CFB;
var fs = require('fs');
describe('source', function() { it('should load', function() { CFB = require('./'); }); });
var ffunc = function(x){return x.substr(-4)==".xls" && fails.indexOf(x) === -1;};
var ex = [".xls",".doc",".ppt"];
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
if(process.env.FMTS === "full") process.env.FMTS = ex.join(":");
if(process.env.FMTS) ex=process.env.FMTS.split(":").map(function(x){return x[0]==="."?x:"."+x;});
var ffunc = function(x){return (ex.indexOf(x.substr(-4))>=0 || ex.indexOf(x.substr(-3))>=0) && fails.indexOf(x) === -1;};
var fails = fs.existsSync('./fails.lst') ? fs.readFileSync('./fails.lst', 'utf-8').split("\n") : [];
var files = fs.readdirSync('test_files').filter(ffunc);
var f2011 = fs.readdirSync('test_files/2011').filter(ffunc);
var f2013 = fs.readdirSync('test_files/2013').filter(ffunc);
var fpres = fs.readdirSync('test_files_pres').filter(ffunc);
var dir = "./test_files/";
function parsetest(x, cfb) {
describe(x + ' should have basic parts', function() {
it('should find relative path', function() {
if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x);
switch(x.substr(-4)) {
case '.xls': if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x); break;
case '.ppt': if(!cfb.find('PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break;
case '.doc': if(!cfb.find('WordDocument') && !cfb.find('Word Document')) throw new Error("Cannot find doc for " + x); break;
}
});
it('should find absolute path', function() {
if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x);
switch(x.substr(-4)) {
case '.xls': if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x); break;
case '.ppt': if(!cfb.find('/PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break;
case '.doc': if(!cfb.find('/WordDocument') && !cfb.find('/Word Document')) throw new Error("Cannot find doc for " + x); break;
}
});
});
}
@ -29,6 +43,12 @@ describe('should parse test files', function() {
parsetest(x, cfb);
});
});
fpres.forEach(function(x) {
it('should parse ' + x, function() {
var cfb = CFB.read('./test_files_pres/' + x, {type: "file"});
parsetest(x, cfb);
});
});
f2011.forEach(function(x) {
it('should parse ' + x, function() {
var cfb = CFB.read('./test_files/2011/' + x, {type: "file"});

383
xlscfb.js Normal file

@ -0,0 +1,383 @@
var DO_NOT_EXPORT_CFB = true;
/* cfb.js (C) 2013-2014 SheetJS -- http://sheetjs.com */
/* vim: set ts=2: */
/*jshint eqnull:true */
/* [MS-CFB] v20130118 */
var CFB = (function _CFB(){
var exports = {};
exports.version = '0.10.2';
function parse(file) {
var mver = 3; // major version
var ssz = 512; // sector size
var nmfs = 0; // number of mini FAT sectors
var ndfs = 0; // number of DIFAT sectors
var dir_start = 0; // first directory sector location
var minifat_start = 0; // first mini FAT sector location
var difat_start = 0; // first mini FAT sector location
var fat_addrs = []; // locations of FAT sectors
/* [MS-CFB] 2.2 Compound File Header */
var blob = file.slice(0,512);
prep_blob(blob, 0);
/* major version */
var mv = check_get_mver(blob);
mver = mv[0];
switch(mver) {
case 3: ssz = 512; break; case 4: ssz = 4096; break;
default: throw "Major Version: Expected 3 or 4 saw " + mver;
}
/* reprocess header */
if(ssz !== 512) { blob = file.slice(0,ssz); prep_blob(blob, 28 /* blob.l */); }
/* Save header for final object */
var header = file.slice(0,ssz);
check_shifts(blob, mver);
// Number of Directory Sectors
var nds = blob.read_shift(4, 'i');
if(mver === 3 && nds !== 0) throw '# Directory Sectors: Expected 0 saw ' + nds;
// Number of FAT Sectors
//var nfs = blob.read_shift(4, 'i');
blob.l += 4;
// First Directory Sector Location
dir_start = blob.read_shift(4, 'i');
// Transaction Signature
blob.l += 4;
// Mini Stream Cutoff Size
blob.chk('00100000', 'Mini Stream Cutoff Size: ');
// First Mini FAT Sector Location
minifat_start = blob.read_shift(4, 'i');
// Number of Mini FAT Sectors
nmfs = blob.read_shift(4, 'i');
// First DIFAT sector location
difat_start = blob.read_shift(4, 'i');
// Number of DIFAT Sectors
ndfs = blob.read_shift(4, 'i');
// Grab FAT Sector Locations
for(var q, j = 0; j < 109; ++j) { /* 109 = (512 - blob.l)>>>2; */
q = blob.read_shift(4, 'i');
if(q<0) break;
fat_addrs[j] = q;
}
/** Break the file up into sectors */
var sectors = sectorify(file, ssz);
sleuth_fat(difat_start, ndfs, sectors, ssz, fat_addrs);
/** Chains */
var sector_list = make_sector_list(sectors, dir_start, fat_addrs, ssz);
sector_list[dir_start].name = "!Directory";
if(nmfs > 0 && minifat_start !== ENDOFCHAIN) sector_list[minifat_start].name = "!MiniFAT";
sector_list[fat_addrs[0]].name = "!FAT";
sector_list.fat_addrs = fat_addrs;
sector_list.ssz = ssz;
/* [MS-CFB] 2.6.1 Compound File Directory Entry */
var files = {}, Paths = [], FileIndex = [], FullPaths = [], FullPathDir = {};
read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex);
build_full_paths(FileIndex, FullPathDir, FullPaths, Paths);
var root_name = Paths.shift();
Paths.root = root_name;
/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */
var find_path = make_find_path(FullPaths, Paths, FileIndex, files, root_name);
return {
raw: {header: header, sectors: sectors},
FileIndex: FileIndex,
FullPaths: FullPaths,
FullPathDir: FullPathDir,
find: find_path
};
} // parse
/* [MS-CFB] 2.2 Compound File Header -- read up to major version */
function check_get_mver(blob) {
// header signature 8
blob.chk(HEADER_SIGNATURE, 'Header Signature: ');
// clsid 16
blob.chk(HEADER_CLSID, 'CLSID: ');
// minor version 2
var mver = blob.read_shift(2, 'u');
return [blob.read_shift(2,'u'), mver];
}
function check_shifts(blob, mver) {
var shift = 0x09;
// Byte Order
blob.chk('feff', 'Byte Order: ');
// Sector Shift
switch((shift = blob.read_shift(2))) {
case 0x09: if(mver !== 3) throw 'MajorVersion/SectorShift Mismatch'; break;
case 0x0c: if(mver !== 4) throw 'MajorVersion/SectorShift Mismatch'; break;
default: throw 'Sector Shift: Expected 9 or 12 saw ' + shift;
}
// Mini Sector Shift
blob.chk('0600', 'Mini Sector Shift: ');
// Reserved
blob.chk('000000000000', 'Reserved: ');
}
/** Break the file up into sectors */
function sectorify(file, ssz) {
var nsectors = Math.ceil(file.length/ssz)-1;
var sectors = new Array(nsectors);
for(var i=1; i < nsectors; ++i) sectors[i-1] = file.slice(i*ssz,(i+1)*ssz);
sectors[nsectors-1] = file.slice(nsectors*ssz);
return sectors;
}
/* [MS-CFB] 2.6.4 Red-Black Tree */
function build_full_paths(FI, FPD, FP, Paths) {
var i = 0, L = 0, R = 0, C = 0, j = 0, pl = Paths.length;
var dad = new Array(pl), q = new Array(pl);
for(; i < pl; ++i) { dad[i]=q[i]=i; FP[i]=Paths[i]; }
for(; j < q.length; ++j) {
i = q[j];
L = FI[i].L; R = FI[i].R; C = FI[i].C;
if(dad[i] === i) {
if(L !== -1 /*NOSTREAM*/ && dad[L] !== L) dad[i] = dad[L];
if(R !== -1 && dad[R] !== R) dad[i] = dad[R];
}
if(C !== -1 /*NOSTREAM*/) dad[C] = i;
if(L !== -1) { dad[L] = dad[i]; q.push(L); }
if(R !== -1) { dad[R] = dad[i]; q.push(R); }
}
for(i=1; i !== pl; ++i) if(dad[i] === i) {
if(R !== -1 /*NOSTREAM*/ && dad[R] !== R) dad[i] = dad[R];
else if(L !== -1 && dad[L] !== L) dad[i] = dad[L];
}
for(i=1; i < pl; ++i) {
if(FI[i].type === 0 /* unknown */) continue;
j = dad[i];
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
else while(j !== 0) {
FP[i] = FP[j] + "/" + FP[i];
j = dad[j];
}
dad[i] = 0;
}
FP[0] += "/";
for(i=1; i < pl; ++i) {
if(FI[i].type !== 2 /* stream */) FP[i] += "/";
FPD[FP[i]] = FI[i];
}
}
/* [MS-CFB] 2.6.4 */
function make_find_path(FullPaths, Paths, FileIndex, files, root_name) {
var UCFullPaths = new Array(FullPaths.length);
var UCPaths = new Array(Paths.length), i;
for(i = 0; i < FullPaths.length; ++i) UCFullPaths[i] = FullPaths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
for(i = 0; i < Paths.length; ++i) UCPaths[i] = Paths[i].toUpperCase().replace(chr0,'').replace(chr1,'!');
return function find_path(path) {
var k;
if(path.charCodeAt(0) === 47 /* "/" */) { k=true; path = root_name + path; }
else k = path.indexOf("/") !== -1;
var UCPath = path.toUpperCase().replace(chr0,'').replace(chr1,'!');
var w = k === true ? UCFullPaths.indexOf(UCPath) : UCPaths.indexOf(UCPath);
if(w === -1) return null;
return k === true ? FileIndex[w] : files[Paths[w]];
};
}
/** Chase down the rest of the DIFAT chain to build a comprehensive list
DIFAT chains by storing the next sector number as the last 32 bytes */
function sleuth_fat(idx, cnt, sectors, ssz, fat_addrs) {
var q;
if(idx === ENDOFCHAIN) {
if(cnt !== 0) throw "DIFAT chain shorter than expected";
} else if(idx !== -1 /*FREESECT*/) {
var sector = sectors[idx], m = (ssz>>>2)-1;
for(var i = 0; i < m; ++i) {
if((q = __readInt32LE(sector,i*4)) === ENDOFCHAIN) break;
fat_addrs.push(q);
}
sleuth_fat(__readInt32LE(sector,ssz-4),cnt - 1, sectors, ssz, fat_addrs);
}
}
/** Follow the linked list of sectors for a given starting point */
function get_sector_list(sectors, start, fat_addrs, ssz, chkd) {
var sl = sectors.length;
var buf, buf_chain;
if(!chkd) chkd = new Array(sl);
var modulus = ssz - 1, j, jj;
buf = [];
buf_chain = [];
for(j=start; j>=0;) {
chkd[j] = true;
buf[buf.length] = j;
buf_chain.push(sectors[j]);
var addr = fat_addrs[Math.floor(j*4/ssz)];
jj = ((j*4) & modulus);
if(ssz < 4 + jj) throw "FAT boundary crossed: " + j + " 4 "+ssz;
j = __readInt32LE(sectors[addr], jj);
}
return {nodes: buf, data:__toBuffer([buf_chain])};
}
/** Chase down the sector linked lists */
function make_sector_list(sectors, dir_start, fat_addrs, ssz) {
var sl = sectors.length, sector_list = new Array(sl);
var chkd = new Array(sl), buf, buf_chain;
var modulus = ssz - 1, i, j, k, jj;
for(i=0; i < sl; ++i) {
buf = [];
k = (i + dir_start); if(k >= sl) k-=sl;
if(chkd[k] === true) continue;
buf_chain = [];
for(j=k; j>=0;) {
chkd[j] = true;
buf[buf.length] = j;
buf_chain.push(sectors[j]);
var addr = fat_addrs[Math.floor(j*4/ssz)];
jj = ((j*4) & modulus);
if(ssz < 4 + jj) throw "FAT boundary crossed: " + j + " 4 "+ssz;
j = __readInt32LE(sectors[addr], jj);
}
sector_list[k] = {nodes: buf, data:__toBuffer([buf_chain])};
}
return sector_list;
}
/* [MS-CFB] 2.6.1 Compound File Directory Entry */
function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, FileIndex) {
var blob;
var minifat_store = 0, pl = (Paths.length?2:0);
var sector = sector_list[dir_start].data;
var i = 0, namelen = 0, name, o, ctime, mtime;
for(; i < sector.length; i+= 128) {
blob = sector.slice(i, i+128);
prep_blob(blob, 64);
namelen = blob.read_shift(2);
if(namelen === 0) continue;
name = __utf16le(blob,0,namelen-pl);
Paths.push(name);
o = {
name: name,
type: blob.read_shift(1),
color: blob.read_shift(1),
L: blob.read_shift(4, 'i'),
R: blob.read_shift(4, 'i'),
C: blob.read_shift(4, 'i'),
clsid: blob.read_shift(16),
state: blob.read_shift(4, 'i')
};
ctime = blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2);
if(ctime !== 0) {
o.ctime = ctime; o.ct = read_date(blob, blob.l-8);
}
mtime = blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2) + blob.read_shift(2);
if(mtime !== 0) {
o.mtime = mtime; o.mt = read_date(blob, blob.l-8);
}
o.start = blob.read_shift(4, 'i');
o.size = blob.read_shift(4, 'i');
if(o.type === 5) { /* root */
minifat_store = o.start;
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
/*minifat_size = o.size;*/
} else if(o.size >= 4096 /* MSCSZ */) {
o.storage = 'fat';
if(sector_list[o.start] === undefined) sector_list[o.start] = get_sector_list(sectors, o.start, sector_list.fat_addrs, sector_list.ssz);
sector_list[o.start].name = o.name;
o.content = sector_list[o.start].data.slice(0,o.size);
prep_blob(o.content, 0);
} else {
o.storage = 'minifat';
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
o.content = sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size);
prep_blob(o.content, 0);
}
}
files[name] = o;
FileIndex.push(o);
}
}
function read_date(blob, offset) {
return new Date(( ( (__readUInt32LE(blob,offset+4)/1e7)*Math.pow(2,32)+__readUInt32LE(blob,offset)/1e7 ) - 11644473600)*1000);
}
var fs;
function readFileSync(filename, options) {
if(fs === undefined) fs = require('fs');
return parse(fs.readFileSync(filename), options);
}
function readSync(blob, options) {
switch(options !== undefined && options.type !== undefined ? options.type : "base64") {
case "file": return readFileSync(blob, options);
case "base64": return parse(s2a(Base64.decode(blob)), options);
case "binary": return parse(s2a(blob), options);
}
return parse(blob);
}
/** CFB Constants */
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
/* 2.1 Compound File Sector Numbers and Types */
var ENDOFCHAIN = -2;
/* 2.2 Compound File Header */
var HEADER_SIGNATURE = 'd0cf11e0a1b11ae1';
var HEADER_CLSID = '00000000000000000000000000000000';
var consts = {
/* 2.1 Compund File Sector Numbers and Types */
MAXREGSECT: -6,
DIFSECT: -4,
FATSECT: -3,
ENDOFCHAIN: ENDOFCHAIN,
FREESECT: -1,
/* 2.2 Compound File Header */
HEADER_SIGNATURE: HEADER_SIGNATURE,
HEADER_MINOR_VERSION: '3e00',
MAXREGSID: -6,
NOSTREAM: -1,
HEADER_CLSID: HEADER_CLSID,
/* 2.6.1 Compound File Directory Entry */
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
};
exports.read = readSync;
exports.parse = parse;
exports.utils = {
ReadShift: ReadShift,
CheckField: CheckField,
prep_blob: prep_blob,
bconcat: bconcat,
consts: consts
};
return exports;
})();
if(typeof require !== 'undefined' && typeof module !== 'undefined' && typeof DO_NOT_EXPORT_CFB === 'undefined') { module.exports = CFB; }