version bump 0.6.0: case insensitive find
This commit is contained in:
parent
369f05ae0f
commit
99d86f01b7
4
.jshintrc
Normal file
4
.jshintrc
Normal file
@ -0,0 +1,4 @@
|
||||
{
|
||||
"bitwise": false,
|
||||
"curly": false
|
||||
}
|
75
README.md
75
README.md
@ -4,7 +4,7 @@ This is a Pure-JS implementation of MS-CFB: Compound File Binary File Format, a
|
||||
format used in many Microsoft file types (such as XLS, DOC, and other Microsoft
|
||||
Office file types).
|
||||
|
||||
# Installation and Usage
|
||||
# Utility Installation and Usage
|
||||
|
||||
The package is available on NPM:
|
||||
|
||||
@ -18,6 +18,79 @@ files that line up with the tree-based structure of the storage. Metadata
|
||||
such as the red-black tree are discarded (and in the future, new CFB containers
|
||||
will exclusively use black nodes)
|
||||
|
||||
# Library Installation and Usage
|
||||
|
||||
In the browser:
|
||||
|
||||
<script src="cfb.js" type="text/javascript"></script>
|
||||
|
||||
In node:
|
||||
|
||||
var CFB = require('cfb');
|
||||
|
||||
For example, to get the Workbook content from an XLS file:
|
||||
|
||||
var cfb = CFB.read(filename, {type: 'file'});
|
||||
var has_vba = cfb.Directory['Workbook']
|
||||
|
||||
## API
|
||||
|
||||
The CFB object exposes the following methods and properties:
|
||||
|
||||
`CFB.parse(blob)` takes a nodejs Buffer or an array of bytes and returns an
|
||||
parsed representation of the data.
|
||||
|
||||
`CFB.read(blob, options)` wraps `parse`. `options.type` controls the behavior:
|
||||
|
||||
- `file`: `blob` should be a file name
|
||||
- `base64`: `blob` should be a base64 string
|
||||
- `binary`: `blob` should be a binary string
|
||||
|
||||
## Container Object Description
|
||||
|
||||
The object returned by `parse` and `read` can be found in the source (`rval`).
|
||||
It has the following properties and methods:
|
||||
|
||||
- `.find(path)` performs a case-insensitive match for the path (or file name, if
|
||||
there are no slashes) and returns an entry object (described later) or null if
|
||||
not found
|
||||
|
||||
- `.FullPaths` is an array of the names of all of the streams (files) and
|
||||
storages (directories) in the container. The paths are properly prefixed from
|
||||
the root entry (so the entries are unique)
|
||||
|
||||
- `.FullPathDir` is an object whose keys are entries in `.FullPaths` and whose
|
||||
values are objects with metadata and content (described below)
|
||||
|
||||
- `.FileIndex` is an array of the objects from `.FullPathDir`, in the same order
|
||||
as `.FullPaths`.
|
||||
|
||||
- `.raw` contains the raw header and sectors
|
||||
|
||||
- `.Paths` is an array of the names of all of the streams (files) and storages
|
||||
(directories) in the container. There is no disambiguation in the case of
|
||||
streams with the same name.
|
||||
|
||||
- `.Directory` is an object whose keys are entries in `.Paths` and whose values
|
||||
are objects with metadata and content. Since collisions are not properly
|
||||
handled here, `.FullPathDir` is the better option for new projects.
|
||||
|
||||
## Entry Object Description
|
||||
|
||||
The entry objects are available from `FullPathDir`, `FileIndex`, and `Directory`
|
||||
elements of the container object.
|
||||
|
||||
- `.name` is the (case sensitive) internal name
|
||||
- `.type` is the type (`stream` for files, `storage` for dirs, `root` for root)
|
||||
- `.content` is a Buffer/Array with the raw content
|
||||
- `.ct`/`.mt` are the creation and modification time (if provided in file)
|
||||
|
||||
# Notes
|
||||
|
||||
Case comparison has not been verified for non-ASCII character
|
||||
|
||||
Writing is not supported. It is in the works, but it has not yet been released.
|
||||
|
||||
# License
|
||||
|
||||
This implementation is covered under Apache 2.0 license. It complies with the
|
||||
|
14
cfb.js
14
cfb.js
@ -355,6 +355,7 @@ function read_directory(idx) {
|
||||
}
|
||||
read_directory(dir_start);
|
||||
|
||||
/* [MS-CFB] 2.6.4 Red-Black Tree */
|
||||
function build_full_paths(Dir, pathobj, paths, patharr) {
|
||||
var i;
|
||||
var dad = new Array(patharr.length);
|
||||
@ -389,13 +390,24 @@ build_full_paths(FileIndex, FullPathDir, FullPaths, Paths);
|
||||
var root_name = Paths.shift();
|
||||
Paths.root = root_name;
|
||||
|
||||
/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */
|
||||
function find_path(path) {
|
||||
if(path[0] === "/") path = root_name + path;
|
||||
var UCNames = (path.indexOf("/") !== -1 ? FullPaths : Paths).map(function(x) { return x.toUpperCase(); });
|
||||
var UCPath = path.toUpperCase();
|
||||
var w = UCNames.indexOf(UCPath);
|
||||
if(w === -1) return null;
|
||||
return path.indexOf("/") !== -1 ? FileIndex[w] : files[Paths[w]];
|
||||
}
|
||||
|
||||
var rval = {
|
||||
raw: {header: header, sectors: sectors},
|
||||
Paths: Paths,
|
||||
FileIndex: FileIndex,
|
||||
FullPaths: FullPaths,
|
||||
FullPathDir: FullPathDir,
|
||||
Directory: files
|
||||
Directory: files,
|
||||
find: find_path
|
||||
};
|
||||
|
||||
return rval;
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "cfb",
|
||||
"version": "0.5.0",
|
||||
"version": "0.6.0",
|
||||
"author": "Niggler",
|
||||
"description": "Compound File Binary File Format extractor",
|
||||
"keywords": [ "cfb", "compression", "office" ],
|
||||
|
19
test.js
19
test.js
@ -1,11 +1,26 @@
|
||||
/* vim: set ts=2: */
|
||||
var CFB;
|
||||
var fs = require('fs');
|
||||
describe('source', function() { it('should load', function() { CFB = require('./'); }); });
|
||||
|
||||
var files = fs.readdirSync('test_files').filter(function(x){return x.substr(-4)==".xls";});
|
||||
files.forEach(function(x) {
|
||||
describe(x, function() {
|
||||
|
||||
function parsetest(x, cfb) {
|
||||
describe(x + ' should have basic parts', function() {
|
||||
it('should find relative path', function() {
|
||||
if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x);
|
||||
});
|
||||
it('should find absolute path', function() {
|
||||
if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x);
|
||||
});
|
||||
});
|
||||
}
|
||||
|
||||
describe('should parse test files', function() {
|
||||
files.forEach(function(x) {
|
||||
it('should parse ' + x, function() {
|
||||
var cfb = CFB.read('./test_files/' + x, {type: "file"});
|
||||
parsetest(x, cfb);
|
||||
});
|
||||
});
|
||||
});
|
||||
|
Loading…
Reference in New Issue
Block a user