version bump 0.12.1: CFB.find
- exported object includes static find method - CLI option displays file manifest - ignore mid-table free sectors - ignore lazily-deleted files (orphan nodes)
This commit is contained in:
parent
17f35153f8
commit
2d0c71ac88
24
README.md
24
README.md
@ -5,7 +5,7 @@ format used in many Microsoft file types (such as XLS and DOC)
|
||||
|
||||
# Utility Installation and Usage
|
||||
|
||||
The package is available on NPM:
|
||||
With [npm](https://www.npmjs.org/package/cfb):
|
||||
|
||||
```bash
|
||||
$ npm install -g cfb
|
||||
@ -14,7 +14,7 @@ $ cfb path/to/CFB/file
|
||||
|
||||
The command will extract the storages and streams in the container, generating
|
||||
files that line up with the tree-based structure of the storage. Metadata such
|
||||
as the red-black tree are discarded.
|
||||
as the red-black tree are discarded. The `-l` option displays a manifest.
|
||||
|
||||
# Library Installation and Usage
|
||||
|
||||
@ -34,9 +34,12 @@ For example, to get the Workbook content from an XLS file:
|
||||
|
||||
```js
|
||||
var cfb = CFB.read(filename, {type: 'file'});
|
||||
var workbook = cfb.find('Workbook')
|
||||
var workbook = cfb.find('Workbook');
|
||||
var data = workbook.content;
|
||||
```
|
||||
|
||||
The `xlscfb.js` file is designed to be embedded in [js-xlsx](http://git.io/xlsx)
|
||||
|
||||
# API
|
||||
|
||||
Typescript definitions are maintained in `types/index.d.ts`.
|
||||
@ -52,14 +55,15 @@ parsed representation of the data.
|
||||
- `base64`: `blob` should be a base64 string
|
||||
- `binary`: `blob` should be a binary string
|
||||
|
||||
`CFB.find(cfb, path)` performs a case-insensitive match for the path (or file
|
||||
name, if there are no slashes) and returns an entry object or null if not found.
|
||||
|
||||
## Container Object Description
|
||||
|
||||
The object returned by `parse` and `read` can be found in the source (`rval`).
|
||||
It has the following properties and methods:
|
||||
|
||||
- `.find(path)` performs a case-insensitive match for the path (or file name, if
|
||||
there are no slashes) and returns an entry object (described later) or null if
|
||||
not found
|
||||
- `.find(path)` is equivalent to `CFB.find(cfb, path)` and should not be used.
|
||||
|
||||
- `.FullPaths` is an array of the names of all of the streams (files) and
|
||||
storages (directories) in the container. The paths are properly prefixed from
|
||||
@ -84,14 +88,6 @@ the container object.
|
||||
- `.content` is a Buffer/Array with the raw content
|
||||
- `.ct`/`.mt` are the creation and modification time (if provided in file)
|
||||
|
||||
# Notes
|
||||
|
||||
Case comparison has not been verified for non-ASCII characters
|
||||
|
||||
Writing is not supported. It is in the works, but it has not yet been released.
|
||||
|
||||
The `xlscfb.js` file is designed to be embedded in [js-xlsx](http://git.io/xlsx)
|
||||
|
||||
# License
|
||||
|
||||
This implementation is covered under Apache 2.0 license. It complies with the
|
||||
|
49
bin/cfb.njs
49
bin/cfb.njs
@ -1,14 +1,19 @@
|
||||
#!/usr/bin/env node
|
||||
|
||||
var CFB = require('../');
|
||||
/* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* eslint-env node */
|
||||
/* vim: set ts=2 ft=javascript: */
|
||||
var X = require('../');
|
||||
var fs = require('fs'), program = require('commander');
|
||||
program
|
||||
.version(CFB.version)
|
||||
.version(X.version)
|
||||
.usage('[options] <file>')
|
||||
.option('-q, --quiet', 'process but do not report')
|
||||
.option('-l, --list-files', 'list files')
|
||||
.option('-d, --dump', 'dump internal representation but do not extract')
|
||||
.option('--dev', 'development mode')
|
||||
.parse(process.argv);
|
||||
.option('--read', 'read but do not print out contents');
|
||||
|
||||
program.parse(process.argv);
|
||||
|
||||
if(program.args.length === 0 || !fs.existsSync(program.args[0])) {
|
||||
console.error("Usage: " + process.argv[1] + " [-q] <cfb_file>");
|
||||
@ -18,14 +23,46 @@ if(program.args.length === 0 || !fs.existsSync(program.args[0])) {
|
||||
var opts = ({type:'file'}/*:any*/);
|
||||
if(program.dev) opts.WTF = true;
|
||||
|
||||
var cfb = CFB.read(program.args[0], opts);
|
||||
var cfb = X.read(program.args[0], opts);
|
||||
if(program.quiet) process.exit(0);
|
||||
|
||||
if(program.dump) {
|
||||
console.log("Full Paths:");
|
||||
console.log(cfb.FullPaths.map(function(x) { return " " + x; }).join("\n"));
|
||||
console.log("Full Path Directory:");
|
||||
console.log(cfb.FullPathDir);
|
||||
process.exit(0);
|
||||
}
|
||||
if(!program.quiet && !program.dump) for(var i=0; i!==cfb.FullPaths.length; ++i) {
|
||||
if(program.listFiles) {
|
||||
var PRINTJ = require("printj"), sprintf = PRINTJ.sprintf;
|
||||
|
||||
var format_date = function(date/*:Date*/)/*:string*/ {
|
||||
return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes());
|
||||
};
|
||||
|
||||
var basetime = new Date(1980,0,1);
|
||||
var cnt = 0;
|
||||
var rootsize = 0, filesize = 0;
|
||||
console.log(" Length Date Time Name");
|
||||
console.log(" -------- ---- ---- ----");
|
||||
cfb.FileIndex.forEach(function(file, i) {
|
||||
switch(file.type) {
|
||||
case 5:
|
||||
basetime = file.ct || file.mt || basetime;
|
||||
rootsize = file.size;
|
||||
break;
|
||||
case 2:
|
||||
console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i]));
|
||||
filesize += file.size;
|
||||
++cnt;
|
||||
}
|
||||
});
|
||||
console.log(" -------- -------");
|
||||
console.log(sprintf("%9lu %lu file%s", rootsize || filesize, cnt, (cnt !== 1 ? "s" : "")));
|
||||
|
||||
process.exit(0);
|
||||
}
|
||||
for(var i=0; i!==cfb.FullPaths.length; ++i) {
|
||||
if(cfb.FullPaths[i].slice(-1) === "/") {
|
||||
console.error("mkdir " + cfb.FullPaths[i]);
|
||||
fs.mkdirSync(cfb.FullPaths[i]);
|
||||
|
@ -1 +1 @@
|
||||
exports.version = '0.12.0';
|
||||
exports.version = '0.12.1';
|
||||
|
@ -25,7 +25,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra
|
||||
if(FI[i].type === 0 /* unknown */) continue;
|
||||
j = dad[i];
|
||||
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
|
||||
else while(j !== 0) {
|
||||
else while(j !== 0 && j !== dad[j]) {
|
||||
FP[i] = FP[j] + "/" + FP[i];
|
||||
j = dad[j];
|
||||
}
|
||||
|
@ -7,7 +7,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/;
|
||||
prep_blob(blob, 64);
|
||||
namelen = blob.read_shift(2);
|
||||
if(namelen === 0) continue;
|
||||
name = __utf16le(blob,0,namelen-pl);
|
||||
Paths.push(name);
|
||||
var o/*:CFBEntry*/ = ({
|
||||
@ -28,6 +27,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
|
||||
o.start = blob.read_shift(4, 'i');
|
||||
o.size = blob.read_shift(4, 'i');
|
||||
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
|
||||
if(o.type === 5) { /* root */
|
||||
minifat_store = o.start;
|
||||
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
|
||||
@ -40,7 +40,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
prep_blob(o.content, 0);
|
||||
} else {
|
||||
o.storage = 'minifat';
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
|
||||
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/);
|
||||
prep_blob(o.content, 0);
|
||||
}
|
||||
|
3
bits/70_find.js
Normal file
3
bits/70_find.js
Normal file
@ -0,0 +1,3 @@
|
||||
function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ {
|
||||
return cfb.find(path);
|
||||
}
|
@ -1,3 +1,4 @@
|
||||
exports.find = find;
|
||||
exports.read = readSync;
|
||||
exports.parse = parse;
|
||||
exports.utils = {
|
||||
|
12
cfb.flow.js
12
cfb.flow.js
@ -109,7 +109,7 @@ type CFBFiles = {[n:string]:CFBEntry};
|
||||
/* [MS-CFB] v20130118 */
|
||||
var CFB = (function _CFB(){
|
||||
var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/;
|
||||
exports.version = '0.12.0';
|
||||
exports.version = '0.12.1';
|
||||
function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ {
|
||||
var mver = 3; // major version
|
||||
var ssz = 512; // sector size
|
||||
@ -280,7 +280,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra
|
||||
if(FI[i].type === 0 /* unknown */) continue;
|
||||
j = dad[i];
|
||||
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
|
||||
else while(j !== 0) {
|
||||
else while(j !== 0 && j !== dad[j]) {
|
||||
FP[i] = FP[j] + "/" + FP[i];
|
||||
j = dad[j];
|
||||
}
|
||||
@ -381,7 +381,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/;
|
||||
prep_blob(blob, 64);
|
||||
namelen = blob.read_shift(2);
|
||||
if(namelen === 0) continue;
|
||||
name = __utf16le(blob,0,namelen-pl);
|
||||
Paths.push(name);
|
||||
var o/*:CFBEntry*/ = ({
|
||||
@ -402,6 +401,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
|
||||
o.start = blob.read_shift(4, 'i');
|
||||
o.size = blob.read_shift(4, 'i');
|
||||
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
|
||||
if(o.type === 5) { /* root */
|
||||
minifat_store = o.start;
|
||||
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
|
||||
@ -414,7 +414,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
prep_blob(o.content, 0);
|
||||
} else {
|
||||
o.storage = 'minifat';
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
|
||||
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/);
|
||||
prep_blob(o.content, 0);
|
||||
}
|
||||
@ -443,6 +443,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) {
|
||||
return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options);
|
||||
}
|
||||
|
||||
function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ {
|
||||
return cfb.find(path);
|
||||
}
|
||||
/** CFB Constants */
|
||||
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
|
||||
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
|
||||
@ -468,6 +471,7 @@ var consts = {
|
||||
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
|
||||
};
|
||||
|
||||
exports.find = find;
|
||||
exports.read = readSync;
|
||||
exports.parse = parse;
|
||||
exports.utils = {
|
||||
|
12
cfb.js
12
cfb.js
@ -94,7 +94,7 @@ function prep_blob(blob, pos) {
|
||||
/* [MS-CFB] v20130118 */
|
||||
var CFB = (function _CFB(){
|
||||
var exports = {};
|
||||
exports.version = '0.12.0';
|
||||
exports.version = '0.12.1';
|
||||
function parse(file, options) {
|
||||
var mver = 3; // major version
|
||||
var ssz = 512; // sector size
|
||||
@ -265,7 +265,7 @@ function build_full_paths(FI, FPD, FP, Paths) {
|
||||
if(FI[i].type === 0 /* unknown */) continue;
|
||||
j = dad[i];
|
||||
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
|
||||
else while(j !== 0) {
|
||||
else while(j !== 0 && j !== dad[j]) {
|
||||
FP[i] = FP[j] + "/" + FP[i];
|
||||
j = dad[j];
|
||||
}
|
||||
@ -366,7 +366,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
|
||||
var blob = sector.slice(i, i+128);
|
||||
prep_blob(blob, 64);
|
||||
namelen = blob.read_shift(2);
|
||||
if(namelen === 0) continue;
|
||||
name = __utf16le(blob,0,namelen-pl);
|
||||
Paths.push(name);
|
||||
var o = ({
|
||||
@ -387,6 +386,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
|
||||
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
|
||||
o.start = blob.read_shift(4, 'i');
|
||||
o.size = blob.read_shift(4, 'i');
|
||||
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
|
||||
if(o.type === 5) { /* root */
|
||||
minifat_store = o.start;
|
||||
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
|
||||
@ -399,7 +399,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
|
||||
prep_blob(o.content, 0);
|
||||
} else {
|
||||
o.storage = 'minifat';
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
|
||||
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size));
|
||||
prep_blob(o.content, 0);
|
||||
}
|
||||
@ -428,6 +428,9 @@ function readSync(blob, options) {
|
||||
return parse(blob, options);
|
||||
}
|
||||
|
||||
function find(cfb, path) {
|
||||
return cfb.find(path);
|
||||
}
|
||||
/** CFB Constants */
|
||||
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
|
||||
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
|
||||
@ -453,6 +456,7 @@ var consts = {
|
||||
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
|
||||
};
|
||||
|
||||
exports.find = find;
|
||||
exports.read = readSync;
|
||||
exports.parse = parse;
|
||||
exports.utils = {
|
||||
|
15
fails.lst
15
fails.lst
@ -5,18 +5,3 @@ xlrd_biff4_no_format_no_window2.xls
|
||||
roo_type_excelx.xls
|
||||
roo_type_openoffice.xls
|
||||
libreoffice_calc_csv-import_malformed-quotes.xls
|
||||
ootest_cellformat_import_biff2.xls
|
||||
ootest_cellformat_import_biff3.xls
|
||||
ootest_cells_import_biff2.xls
|
||||
ootest_cells_import_biff3.xls
|
||||
ootest_cells_import_biff4.xls
|
||||
ootest_drawing_import_biff3.xls
|
||||
ootest_externallink_import_biff2.xls
|
||||
ootest_externallink_import_biff3.xls
|
||||
ootest_formula_import_biff2.xls
|
||||
ootest_formula_import_biff3.xls
|
||||
ootest_oleobject_import_biff3.xls
|
||||
ootest_oleobject_import_biff8_12.xls
|
||||
ootest_sheettypes_import_biff8_12.xls
|
||||
ootest_writeprotection_import_biff3.xls
|
||||
ootest_writeprotection_import_biff4.xls
|
||||
|
64
index.html
64
index.html
@ -18,19 +18,28 @@
|
||||
#b64data{
|
||||
width:100%;
|
||||
}
|
||||
a { text-decoration: none }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<b>JS-CFB Live Demo</b><br />
|
||||
<pre>
|
||||
<b><a href="http://sheetjs.com">SheetJS CFB Preview Live Demo</a></b>
|
||||
|
||||
<a href="https://github.com/SheetJS/js-cfb">Source Code Repo</a>
|
||||
<a href="https://github.com/SheetJS/js-cfb/issues">Issues? Something look weird? Click here and report an issue</a>
|
||||
<div id="drop">Drop an XLS file here to see the CFB structure.</div>
|
||||
Advanced Demo Options: <br />
|
||||
Use readAsBinaryString: (when available) <input type="checkbox" name="userabs" checked><br />
|
||||
|
||||
<b>Advanced Demo Options:</b>
|
||||
Use readAsBinaryString: (when available) <input type="checkbox" name="userabs" checked>
|
||||
</pre>
|
||||
<pre id="out"></pre>
|
||||
<br />
|
||||
<script src="https://unpkg.com/printj@1.1.0/dist/printj.min.js"></script>
|
||||
<script src="cfb.js"></script>
|
||||
<script>
|
||||
/*jshint browser:true */
|
||||
/* eslint-env browser */
|
||||
/* eslint no-use-before-define:0 */
|
||||
/*global CFB, out */
|
||||
var rABS = typeof FileReader !== "undefined" && typeof FileReader.prototype !== "undefined" && typeof FileReader.prototype.readAsBinaryString !== "undefined";
|
||||
if(!rABS) {
|
||||
@ -45,9 +54,41 @@ function fixdata(data) {
|
||||
return o;
|
||||
}
|
||||
|
||||
function process_data(data) {
|
||||
if(out.innerText === undefined) out.textContent = data.FullPaths;
|
||||
else out.innerText = data.FullPaths.join("\n");
|
||||
function get_manifest(cfb) {
|
||||
var out = [];
|
||||
var sprintf = PRINTJ.sprintf;
|
||||
|
||||
var format_date = function(date/*:Date*/)/*:string*/ {
|
||||
return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes());
|
||||
};
|
||||
|
||||
var basetime = new Date(1980,0,1);
|
||||
var cnt = 0;
|
||||
var rootsize = 0, filesize = 0;
|
||||
out.push(" Length Date Time Name");
|
||||
out.push(" -------- ---- ---- ----");
|
||||
cfb.FileIndex.forEach(function(file, i) {
|
||||
switch(file.type) {
|
||||
case 5:
|
||||
basetime = file.ct || file.mt || basetime;
|
||||
rootsize = file.size;
|
||||
break;
|
||||
case 2:
|
||||
out.push(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i]));
|
||||
filesize += file.size;
|
||||
++cnt;
|
||||
}
|
||||
});
|
||||
out.push(" -------- -------");
|
||||
out.push(sprintf("%9lu %lu file%s", rootsize || filesize, cnt, (cnt !== 1 ? "s" : "")));
|
||||
|
||||
return out.join("\n");
|
||||
}
|
||||
|
||||
function process_data(cfb) {
|
||||
var output = get_manifest(cfb);
|
||||
if(out.innerText === undefined) out.textContent = output
|
||||
else out.innerText = output;
|
||||
}
|
||||
|
||||
var drop = document.getElementById('drop');
|
||||
@ -88,5 +129,16 @@ if(drop.addEventListener) {
|
||||
drop.addEventListener('drop', handleDrop, false);
|
||||
}
|
||||
</script>
|
||||
<script type="text/javascript">
|
||||
var _gaq = _gaq || [];
|
||||
_gaq.push(['_setAccount', 'UA-36810333-1']);
|
||||
_gaq.push(['_trackPageview']);
|
||||
|
||||
(function() {
|
||||
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
|
||||
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
|
||||
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
|
||||
})();
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
|
@ -2,6 +2,7 @@
|
||||
|
||||
type CFBModule = {
|
||||
version:string;
|
||||
find:(cfb:CFBContainer, path:string)=>?CFBEntry;
|
||||
read:(blob:RawBytes|string, opts:CFBReadOpts)=>CFBContainer;
|
||||
parse:(file:RawBytes, opts:CFBReadOpts)=>CFBContainer;
|
||||
utils:CFBUtils;
|
||||
|
@ -5,5 +5,5 @@ declare module '../' { declare var exports:CFBModule; };
|
||||
declare module './' { declare var exports:CFBModule; };
|
||||
|
||||
declare module 'commander' { declare var exports:any; };
|
||||
|
||||
declare module 'printj' { declare var exports:any; };
|
||||
*/
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "cfb",
|
||||
"version": "0.12.0",
|
||||
"version": "0.12.1",
|
||||
"author": "sheetjs",
|
||||
"description": "Compound File Binary File Format extractor",
|
||||
"keywords": [ "cfb", "compression", "office" ],
|
||||
@ -15,6 +15,7 @@
|
||||
"fs": false
|
||||
},
|
||||
"dependencies": {
|
||||
"printj":"~1.1.0",
|
||||
"commander":"~2.11.0"
|
||||
},
|
||||
"devDependencies": {
|
||||
|
21
test.js
21
test.js
@ -19,20 +19,37 @@ var dir = "./test_files/";
|
||||
|
||||
function parsetest(x, cfb) {
|
||||
describe(x + ' should have basic parts', function() {
|
||||
it('should find relative path', function() {
|
||||
/* cfb.find interface */
|
||||
it('should find relative path using cfb#find', function() {
|
||||
switch(x.substr(-4)) {
|
||||
case '.xls': if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x); break;
|
||||
case '.ppt': if(!cfb.find('PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break;
|
||||
case '.doc': if(!cfb.find('WordDocument') && !cfb.find('Word Document')) throw new Error("Cannot find doc for " + x); break;
|
||||
}
|
||||
});
|
||||
it('should find absolute path', function() {
|
||||
it('should find absolute path using cfb#find', function() {
|
||||
switch(x.substr(-4)) {
|
||||
case '.xls': if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x); break;
|
||||
case '.ppt': if(!cfb.find('/PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break;
|
||||
case '.doc': if(!cfb.find('/WordDocument') && !cfb.find('/Word Document')) throw new Error("Cannot find doc for " + x); break;
|
||||
}
|
||||
});
|
||||
|
||||
/* CFB.find function */
|
||||
it('should find relative path using CFB.find', function() {
|
||||
switch(x.substr(-4)) {
|
||||
case '.xls': if(!CFB.find(cfb, 'Workbook') && !CFB.find(cfb, 'Book')) throw new Error("Cannot find workbook for " + x); break;
|
||||
case '.ppt': if(!CFB.find(cfb, 'PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break;
|
||||
case '.doc': if(!CFB.find(cfb, 'WordDocument') && !CFB.find(cfb, 'Word Document')) throw new Error("Cannot find doc for " + x); break;
|
||||
}
|
||||
});
|
||||
it('should find absolute path using CFB.find', function() {
|
||||
switch(x.substr(-4)) {
|
||||
case '.xls': if(!CFB.find(cfb, '/Workbook') && !CFB.find(cfb, '/Book')) throw new Error("Cannot find workbook for " + x); break;
|
||||
case '.ppt': if(!CFB.find(cfb, '/PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break;
|
||||
case '.doc': if(!CFB.find(cfb, '/WordDocument') && !CFB.find(cfb, '/Word Document')) throw new Error("Cannot find doc for " + x); break;
|
||||
}
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -1,13 +1,20 @@
|
||||
/* vim: set ts=2: */
|
||||
/* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
/* eslint-env node */
|
||||
/* vim: set ts=2 ft=javascript: */
|
||||
import * as CFB from 'cfb';
|
||||
import fs = require('fs');
|
||||
import program = require('commander');
|
||||
import PRINTJ = require("printj");
|
||||
program
|
||||
.version(CFB.version)
|
||||
.usage('[options] <file>')
|
||||
.option('-q, --quiet', 'process but do not report')
|
||||
.option('-l, --list-files', 'list files')
|
||||
.option('-d, --dump', 'dump internal representation but do not extract')
|
||||
.parse(process.argv);
|
||||
.option('--dev', 'development mode')
|
||||
.option('--read', 'read but do not print out contents');
|
||||
|
||||
program.parse(process.argv);
|
||||
|
||||
if(program.args.length === 0 || !fs.existsSync(program.args[0])) {
|
||||
console.error("Usage: " + process.argv[1] + " [-q] <cfb_file>");
|
||||
@ -15,16 +22,48 @@ if(program.args.length === 0 || !fs.existsSync(program.args[0])) {
|
||||
}
|
||||
|
||||
const opts: CFB.CFBParsingOptions = {type:'file'};
|
||||
if(program.dev) opts.WTF = true;
|
||||
|
||||
const cfb: CFB.CFBContainer = CFB.read(program.args[0], opts);
|
||||
if(program.quiet) process.exit(0);
|
||||
|
||||
if(program.dump) {
|
||||
console.log("Full Paths:");
|
||||
console.log(cfb.FullPaths.map((x) => " " + x).join("\n"));
|
||||
console.log("Full Path Directory:");
|
||||
console.log(cfb.FullPathDir);
|
||||
process.exit(0);
|
||||
}
|
||||
if(!program.quiet && !program.dump) for(let i=0; i!==cfb.FullPaths.length; ++i) {
|
||||
if(program.listFiles) {
|
||||
const sprintf = PRINTJ.sprintf;
|
||||
|
||||
const format_date = function(date: Date): string {
|
||||
return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes());
|
||||
};
|
||||
|
||||
let basetime = new Date(1980,0,1);
|
||||
let cnt = 0;
|
||||
let rootsize = 0, filesize = 0;
|
||||
console.log(" Length Date Time Name");
|
||||
console.log(" -------- ---- ---- ----");
|
||||
cfb.FileIndex.forEach(function(file: CFB.CFBEntry, i: number) {
|
||||
switch(file.type) {
|
||||
case 5:
|
||||
basetime = file.ct || file.mt || basetime;
|
||||
rootsize = file.size;
|
||||
break;
|
||||
case 2:
|
||||
console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i]));
|
||||
filesize += file.size;
|
||||
++cnt;
|
||||
}
|
||||
});
|
||||
console.log(" -------- -------");
|
||||
console.log(sprintf("%9lu %lu file%s", rootsize || filesize, cnt, (cnt !== 1 ? "s" : "")));
|
||||
|
||||
process.exit(0);
|
||||
}
|
||||
for(let i=0; i!==cfb.FullPaths.length; ++i) {
|
||||
if(cfb.FullPaths[i].slice(-1) === "/") {
|
||||
console.error("mkdir " + cfb.FullPaths[i]);
|
||||
fs.mkdirSync(cfb.FullPaths[i]);
|
||||
|
11
types/index.d.ts
vendored
11
types/index.d.ts
vendored
@ -10,6 +10,9 @@ export function parse(f: CFB$Blob, options?: CFBParsingOptions): CFBContainer;
|
||||
/** Read a blob or file or binary string */
|
||||
export function read(f: CFB$Blob | string, options?: CFBParsingOptions): CFBContainer;
|
||||
|
||||
/** Find a file entry given a path or file name */
|
||||
export function find(cfb: CFBContainer, path: string): CFBEntry | null;
|
||||
|
||||
/** Utility functions */
|
||||
export const utils: CFB$Utils;
|
||||
|
||||
@ -18,6 +21,8 @@ export const utils: CFB$Utils;
|
||||
export interface CFBParsingOptions {
|
||||
/** Input data encoding */
|
||||
type?: 'base64' | 'binary' | 'buffer' | 'file' | 'array';
|
||||
/** If true, throw errors when features are not understood */
|
||||
WTF?: boolean;
|
||||
}
|
||||
|
||||
export type CFB$Blob = Buffer | number[] | Uint8Array;
|
||||
@ -85,9 +90,9 @@ export interface CFBContainer {
|
||||
|
||||
/* Raw Content, in chunks (Buffer when available, Array of bytes otherwise) */
|
||||
raw: {
|
||||
header: CFB$Blob,
|
||||
sectors: CFB$Blob[];
|
||||
};
|
||||
header: CFB$Blob,
|
||||
sectors: CFB$Blob[];
|
||||
};
|
||||
}
|
||||
|
||||
/** General utilities */
|
||||
|
@ -35,7 +35,7 @@ type CFBFiles = {[n:string]:CFBEntry};
|
||||
/* [MS-CFB] v20130118 */
|
||||
var CFB = (function _CFB(){
|
||||
var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/;
|
||||
exports.version = '0.12.0';
|
||||
exports.version = '0.12.1';
|
||||
function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ {
|
||||
var mver = 3; // major version
|
||||
var ssz = 512; // sector size
|
||||
@ -206,7 +206,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra
|
||||
if(FI[i].type === 0 /* unknown */) continue;
|
||||
j = dad[i];
|
||||
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
|
||||
else while(j !== 0) {
|
||||
else while(j !== 0 && j !== dad[j]) {
|
||||
FP[i] = FP[j] + "/" + FP[i];
|
||||
j = dad[j];
|
||||
}
|
||||
@ -307,7 +307,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/;
|
||||
prep_blob(blob, 64);
|
||||
namelen = blob.read_shift(2);
|
||||
if(namelen === 0) continue;
|
||||
name = __utf16le(blob,0,namelen-pl);
|
||||
Paths.push(name);
|
||||
var o/*:CFBEntry*/ = ({
|
||||
@ -328,6 +327,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
|
||||
o.start = blob.read_shift(4, 'i');
|
||||
o.size = blob.read_shift(4, 'i');
|
||||
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
|
||||
if(o.type === 5) { /* root */
|
||||
minifat_store = o.start;
|
||||
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
|
||||
@ -340,7 +340,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector
|
||||
prep_blob(o.content, 0);
|
||||
} else {
|
||||
o.storage = 'minifat';
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
|
||||
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/);
|
||||
prep_blob(o.content, 0);
|
||||
}
|
||||
@ -369,6 +369,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) {
|
||||
return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options);
|
||||
}
|
||||
|
||||
function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ {
|
||||
return cfb.find(path);
|
||||
}
|
||||
/** CFB Constants */
|
||||
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
|
||||
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
|
||||
@ -394,6 +397,7 @@ var consts = {
|
||||
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
|
||||
};
|
||||
|
||||
exports.find = find;
|
||||
exports.read = readSync;
|
||||
exports.parse = parse;
|
||||
exports.utils = {
|
||||
|
12
xlscfb.js
12
xlscfb.js
@ -6,7 +6,7 @@ var DO_NOT_EXPORT_CFB = true;
|
||||
/* [MS-CFB] v20130118 */
|
||||
var CFB = (function _CFB(){
|
||||
var exports = {};
|
||||
exports.version = '0.12.0';
|
||||
exports.version = '0.12.1';
|
||||
function parse(file, options) {
|
||||
var mver = 3; // major version
|
||||
var ssz = 512; // sector size
|
||||
@ -177,7 +177,7 @@ function build_full_paths(FI, FPD, FP, Paths) {
|
||||
if(FI[i].type === 0 /* unknown */) continue;
|
||||
j = dad[i];
|
||||
if(j === 0) FP[i] = FP[0] + "/" + FP[i];
|
||||
else while(j !== 0) {
|
||||
else while(j !== 0 && j !== dad[j]) {
|
||||
FP[i] = FP[j] + "/" + FP[i];
|
||||
j = dad[j];
|
||||
}
|
||||
@ -278,7 +278,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
|
||||
var blob = sector.slice(i, i+128);
|
||||
prep_blob(blob, 64);
|
||||
namelen = blob.read_shift(2);
|
||||
if(namelen === 0) continue;
|
||||
name = __utf16le(blob,0,namelen-pl);
|
||||
Paths.push(name);
|
||||
var o = ({
|
||||
@ -299,6 +298,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
|
||||
if(mtime !== 0) o.mt = read_date(blob, blob.l-8);
|
||||
o.start = blob.read_shift(4, 'i');
|
||||
o.size = blob.read_shift(4, 'i');
|
||||
if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; }
|
||||
if(o.type === 5) { /* root */
|
||||
minifat_store = o.start;
|
||||
if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData";
|
||||
@ -311,7 +311,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil
|
||||
prep_blob(o.content, 0);
|
||||
} else {
|
||||
o.storage = 'minifat';
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) {
|
||||
if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) {
|
||||
o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size));
|
||||
prep_blob(o.content, 0);
|
||||
}
|
||||
@ -340,6 +340,9 @@ function readSync(blob, options) {
|
||||
return parse(blob, options);
|
||||
}
|
||||
|
||||
function find(cfb, path) {
|
||||
return cfb.find(path);
|
||||
}
|
||||
/** CFB Constants */
|
||||
var MSSZ = 64; /* Mini Sector Size = 1<<6 */
|
||||
//var MSCSZ = 4096; /* Mini Stream Cutoff Size */
|
||||
@ -365,6 +368,7 @@ var consts = {
|
||||
EntryTypes: ['unknown','storage','stream','lockbytes','property','root']
|
||||
};
|
||||
|
||||
exports.find = find;
|
||||
exports.read = readSync;
|
||||
exports.parse = parse;
|
||||
exports.utils = {
|
||||
|
Loading…
Reference in New Issue
Block a user