version bump 0.6.0: case insensitive find

This commit is contained in:
SheetJS 2013-10-29 11:50:54 -07:00
parent 369f05ae0f
commit 99d86f01b7
6 changed files with 109 additions and 5 deletions

4
.jshintrc Normal file
View File

@ -0,0 +1,4 @@
{
"bitwise": false,
"curly": false
}

View File

@ -4,7 +4,7 @@ This is a Pure-JS implementation of MS-CFB: Compound File Binary File Format, a
format used in many Microsoft file types (such as XLS, DOC, and other Microsoft
Office file types).
# Installation and Usage
# Utility Installation and Usage
The package is available on NPM:
@ -18,6 +18,79 @@ files that line up with the tree-based structure of the storage. Metadata
such as the red-black tree are discarded (and in the future, new CFB containers
will exclusively use black nodes)
# Library Installation and Usage
In the browser:
<script src="cfb.js" type="text/javascript"></script>
In node:
var CFB = require('cfb');
For example, to get the Workbook content from an XLS file:
var cfb = CFB.read(filename, {type: 'file'});
var has_vba = cfb.Directory['Workbook']
## API
The CFB object exposes the following methods and properties:
`CFB.parse(blob)` takes a nodejs Buffer or an array of bytes and returns an
parsed representation of the data.
`CFB.read(blob, options)` wraps `parse`. `options.type` controls the behavior:
- `file`: `blob` should be a file name
- `base64`: `blob` should be a base64 string
- `binary`: `blob` should be a binary string
## Container Object Description
The object returned by `parse` and `read` can be found in the source (`rval`).
It has the following properties and methods:
- `.find(path)` performs a case-insensitive match for the path (or file name, if
there are no slashes) and returns an entry object (described later) or null if
not found
- `.FullPaths` is an array of the names of all of the streams (files) and
storages (directories) in the container. The paths are properly prefixed from
the root entry (so the entries are unique)
- `.FullPathDir` is an object whose keys are entries in `.FullPaths` and whose
values are objects with metadata and content (described below)
- `.FileIndex` is an array of the objects from `.FullPathDir`, in the same order
as `.FullPaths`.
- `.raw` contains the raw header and sectors
- `.Paths` is an array of the names of all of the streams (files) and storages
(directories) in the container. There is no disambiguation in the case of
streams with the same name.
- `.Directory` is an object whose keys are entries in `.Paths` and whose values
are objects with metadata and content. Since collisions are not properly
handled here, `.FullPathDir` is the better option for new projects.
## Entry Object Description
The entry objects are available from `FullPathDir`, `FileIndex`, and `Directory`
elements of the container object.
- `.name` is the (case sensitive) internal name
- `.type` is the type (`stream` for files, `storage` for dirs, `root` for root)
- `.content` is a Buffer/Array with the raw content
- `.ct`/`.mt` are the creation and modification time (if provided in file)
# Notes
Case comparison has not been verified for non-ASCII character
Writing is not supported. It is in the works, but it has not yet been released.
# License
This implementation is covered under Apache 2.0 license. It complies with the

14
cfb.js
View File

@ -355,6 +355,7 @@ function read_directory(idx) {
}
read_directory(dir_start);
/* [MS-CFB] 2.6.4 Red-Black Tree */
function build_full_paths(Dir, pathobj, paths, patharr) {
var i;
var dad = new Array(patharr.length);
@ -389,13 +390,24 @@ build_full_paths(FileIndex, FullPathDir, FullPaths, Paths);
var root_name = Paths.shift();
Paths.root = root_name;
/* [MS-CFB] 2.6.4 (Unicode 3.0.1 case conversion) */
function find_path(path) {
if(path[0] === "/") path = root_name + path;
var UCNames = (path.indexOf("/") !== -1 ? FullPaths : Paths).map(function(x) { return x.toUpperCase(); });
var UCPath = path.toUpperCase();
var w = UCNames.indexOf(UCPath);
if(w === -1) return null;
return path.indexOf("/") !== -1 ? FileIndex[w] : files[Paths[w]];
}
var rval = {
raw: {header: header, sectors: sectors},
Paths: Paths,
FileIndex: FileIndex,
FullPaths: FullPaths,
FullPathDir: FullPathDir,
Directory: files
Directory: files,
find: find_path
};
return rval;

View File

@ -1,6 +1,6 @@
{
"name": "cfb",
"version": "0.5.0",
"version": "0.6.0",
"author": "Niggler",
"description": "Compound File Binary File Format extractor",
"keywords": [ "cfb", "compression", "office" ],

19
test.js
View File

@ -1,11 +1,26 @@
/* vim: set ts=2: */
var CFB;
var fs = require('fs');
describe('source', function() { it('should load', function() { CFB = require('./'); }); });
var files = fs.readdirSync('test_files').filter(function(x){return x.substr(-4)==".xls";});
files.forEach(function(x) {
describe(x, function() {
function parsetest(x, cfb) {
describe(x + ' should have basic parts', function() {
it('should find relative path', function() {
if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x);
});
it('should find absolute path', function() {
if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x);
});
});
}
describe('should parse test files', function() {
files.forEach(function(x) {
it('should parse ' + x, function() {
var cfb = CFB.read('./test_files/' + x, {type: "file"});
parsetest(x, cfb);
});
});
});