diff --git a/README.md b/README.md index a9dc7f2..d7481d5 100644 --- a/README.md +++ b/README.md @@ -5,7 +5,7 @@ format used in many Microsoft file types (such as XLS and DOC) # Utility Installation and Usage -The package is available on NPM: +With [npm](https://www.npmjs.org/package/cfb): ```bash $ npm install -g cfb @@ -14,7 +14,7 @@ $ cfb path/to/CFB/file The command will extract the storages and streams in the container, generating files that line up with the tree-based structure of the storage. Metadata such -as the red-black tree are discarded. +as the red-black tree are discarded. The `-l` option displays a manifest. # Library Installation and Usage @@ -34,9 +34,12 @@ For example, to get the Workbook content from an XLS file: ```js var cfb = CFB.read(filename, {type: 'file'}); -var workbook = cfb.find('Workbook') +var workbook = cfb.find('Workbook'); +var data = workbook.content; ``` +The `xlscfb.js` file is designed to be embedded in [js-xlsx](http://git.io/xlsx) + # API Typescript definitions are maintained in `types/index.d.ts`. @@ -52,14 +55,15 @@ parsed representation of the data. - `base64`: `blob` should be a base64 string - `binary`: `blob` should be a binary string +`CFB.find(cfb, path)` performs a case-insensitive match for the path (or file +name, if there are no slashes) and returns an entry object or null if not found. + ## Container Object Description The object returned by `parse` and `read` can be found in the source (`rval`). It has the following properties and methods: -- `.find(path)` performs a case-insensitive match for the path (or file name, if - there are no slashes) and returns an entry object (described later) or null if - not found +- `.find(path)` is equivalent to `CFB.find(cfb, path)` and should not be used. - `.FullPaths` is an array of the names of all of the streams (files) and storages (directories) in the container. The paths are properly prefixed from @@ -84,14 +88,6 @@ the container object. - `.content` is a Buffer/Array with the raw content - `.ct`/`.mt` are the creation and modification time (if provided in file) -# Notes - -Case comparison has not been verified for non-ASCII characters - -Writing is not supported. It is in the works, but it has not yet been released. - -The `xlscfb.js` file is designed to be embedded in [js-xlsx](http://git.io/xlsx) - # License This implementation is covered under Apache 2.0 license. It complies with the diff --git a/bin/cfb.njs b/bin/cfb.njs index e8fa302..ccc2c81 100755 --- a/bin/cfb.njs +++ b/bin/cfb.njs @@ -1,14 +1,19 @@ #!/usr/bin/env node - -var CFB = require('../'); +/* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ +/* eslint-env node */ +/* vim: set ts=2 ft=javascript: */ +var X = require('../'); var fs = require('fs'), program = require('commander'); program - .version(CFB.version) + .version(X.version) .usage('[options] ') .option('-q, --quiet', 'process but do not report') + .option('-l, --list-files', 'list files') .option('-d, --dump', 'dump internal representation but do not extract') .option('--dev', 'development mode') - .parse(process.argv); + .option('--read', 'read but do not print out contents'); + +program.parse(process.argv); if(program.args.length === 0 || !fs.existsSync(program.args[0])) { console.error("Usage: " + process.argv[1] + " [-q] "); @@ -18,14 +23,46 @@ if(program.args.length === 0 || !fs.existsSync(program.args[0])) { var opts = ({type:'file'}/*:any*/); if(program.dev) opts.WTF = true; -var cfb = CFB.read(program.args[0], opts); +var cfb = X.read(program.args[0], opts); +if(program.quiet) process.exit(0); + if(program.dump) { console.log("Full Paths:"); console.log(cfb.FullPaths.map(function(x) { return " " + x; }).join("\n")); console.log("Full Path Directory:"); console.log(cfb.FullPathDir); + process.exit(0); } -if(!program.quiet && !program.dump) for(var i=0; i!==cfb.FullPaths.length; ++i) { +if(program.listFiles) { + var PRINTJ = require("printj"), sprintf = PRINTJ.sprintf; + + var format_date = function(date/*:Date*/)/*:string*/ { + return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes()); + }; + + var basetime = new Date(1980,0,1); + var cnt = 0; + var rootsize = 0, filesize = 0; + console.log(" Length Date Time Name"); + console.log(" -------- ---- ---- ----"); + cfb.FileIndex.forEach(function(file, i) { + switch(file.type) { + case 5: + basetime = file.ct || file.mt || basetime; + rootsize = file.size; + break; + case 2: + console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i])); + filesize += file.size; + ++cnt; + } + }); + console.log(" -------- -------"); + console.log(sprintf("%9lu %lu file%s", rootsize || filesize, cnt, (cnt !== 1 ? "s" : ""))); + + process.exit(0); +} +for(var i=0; i!==cfb.FullPaths.length; ++i) { if(cfb.FullPaths[i].slice(-1) === "/") { console.error("mkdir " + cfb.FullPaths[i]); fs.mkdirSync(cfb.FullPaths[i]); diff --git a/bits/31_version.js b/bits/31_version.js index dd07128..c011f7f 100644 --- a/bits/31_version.js +++ b/bits/31_version.js @@ -1 +1 @@ -exports.version = '0.12.0'; +exports.version = '0.12.1'; diff --git a/bits/43_rbtree.js b/bits/43_rbtree.js index 9f9e58f..51f0414 100644 --- a/bits/43_rbtree.js +++ b/bits/43_rbtree.js @@ -25,7 +25,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } diff --git a/bits/46_readdir.js b/bits/46_readdir.js index 82b5d0f..3478e73 100644 --- a/bits/46_readdir.js +++ b/bits/46_readdir.js @@ -7,7 +7,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/; prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o/*:CFBEntry*/ = ({ @@ -28,6 +27,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -40,7 +40,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/); prep_blob(o.content, 0); } diff --git a/bits/70_find.js b/bits/70_find.js new file mode 100644 index 0000000..0e362ea --- /dev/null +++ b/bits/70_find.js @@ -0,0 +1,3 @@ +function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { + return cfb.find(path); +} diff --git a/bits/78_cfbexports.js b/bits/78_cfbexports.js index d2da979..b6e442b 100644 --- a/bits/78_cfbexports.js +++ b/bits/78_cfbexports.js @@ -1,3 +1,4 @@ +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = { diff --git a/cfb.flow.js b/cfb.flow.js index 9b16f2d..f91a4c4 100644 --- a/cfb.flow.js +++ b/cfb.flow.js @@ -109,7 +109,7 @@ type CFBFiles = {[n:string]:CFBEntry}; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/; -exports.version = '0.12.0'; +exports.version = '0.12.1'; function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { var mver = 3; // major version var ssz = 512; // sector size @@ -280,7 +280,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -381,7 +381,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/; prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o/*:CFBEntry*/ = ({ @@ -402,6 +401,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -414,7 +414,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/); prep_blob(o.content, 0); } @@ -443,6 +443,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options); } +function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { + return cfb.find(path); +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -468,6 +471,7 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = { diff --git a/cfb.js b/cfb.js index 3df9d36..e728a62 100644 --- a/cfb.js +++ b/cfb.js @@ -94,7 +94,7 @@ function prep_blob(blob, pos) { /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports = {}; -exports.version = '0.12.0'; +exports.version = '0.12.1'; function parse(file, options) { var mver = 3; // major version var ssz = 512; // sector size @@ -265,7 +265,7 @@ function build_full_paths(FI, FPD, FP, Paths) { if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -366,7 +366,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil var blob = sector.slice(i, i+128); prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o = ({ @@ -387,6 +386,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -399,7 +399,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)); prep_blob(o.content, 0); } @@ -428,6 +428,9 @@ function readSync(blob, options) { return parse(blob, options); } +function find(cfb, path) { + return cfb.find(path); +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -453,6 +456,7 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = { diff --git a/fails.lst b/fails.lst index 4f6d844..2a9ce50 100644 --- a/fails.lst +++ b/fails.lst @@ -5,18 +5,3 @@ xlrd_biff4_no_format_no_window2.xls roo_type_excelx.xls roo_type_openoffice.xls libreoffice_calc_csv-import_malformed-quotes.xls -ootest_cellformat_import_biff2.xls -ootest_cellformat_import_biff3.xls -ootest_cells_import_biff2.xls -ootest_cells_import_biff3.xls -ootest_cells_import_biff4.xls -ootest_drawing_import_biff3.xls -ootest_externallink_import_biff2.xls -ootest_externallink_import_biff3.xls -ootest_formula_import_biff2.xls -ootest_formula_import_biff3.xls -ootest_oleobject_import_biff3.xls -ootest_oleobject_import_biff8_12.xls -ootest_sheettypes_import_biff8_12.xls -ootest_writeprotection_import_biff3.xls -ootest_writeprotection_import_biff4.xls diff --git a/index.html b/index.html index 1758e7f..36f7f57 100644 --- a/index.html +++ b/index.html @@ -18,19 +18,28 @@ #b64data{ width:100%; } +a { text-decoration: none } -JS-CFB Live Demo
+
+SheetJS CFB Preview Live Demo
 
+Source Code Repo
+Issues?  Something look weird?  Click here and report an issue
 
Drop an XLS file here to see the CFB structure.
-Advanced Demo Options:
-Use readAsBinaryString: (when available)
+ +Advanced Demo Options: +Use readAsBinaryString: (when available) +

 
+ + diff --git a/misc/flow.js b/misc/flow.js index ef99213..07ab6c9 100644 --- a/misc/flow.js +++ b/misc/flow.js @@ -2,6 +2,7 @@ type CFBModule = { version:string; + find:(cfb:CFBContainer, path:string)=>?CFBEntry; read:(blob:RawBytes|string, opts:CFBReadOpts)=>CFBContainer; parse:(file:RawBytes, opts:CFBReadOpts)=>CFBContainer; utils:CFBUtils; diff --git a/misc/flowdeps.js b/misc/flowdeps.js index 16cac2a..fb19bc9 100644 --- a/misc/flowdeps.js +++ b/misc/flowdeps.js @@ -5,5 +5,5 @@ declare module '../' { declare var exports:CFBModule; }; declare module './' { declare var exports:CFBModule; }; declare module 'commander' { declare var exports:any; }; - +declare module 'printj' { declare var exports:any; }; */ diff --git a/package.json b/package.json index 8291027..da6a56d 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "cfb", - "version": "0.12.0", + "version": "0.12.1", "author": "sheetjs", "description": "Compound File Binary File Format extractor", "keywords": [ "cfb", "compression", "office" ], @@ -15,6 +15,7 @@ "fs": false }, "dependencies": { + "printj":"~1.1.0", "commander":"~2.11.0" }, "devDependencies": { diff --git a/test.js b/test.js index aacb80e..0af3c69 100644 --- a/test.js +++ b/test.js @@ -19,20 +19,37 @@ var dir = "./test_files/"; function parsetest(x, cfb) { describe(x + ' should have basic parts', function() { - it('should find relative path', function() { + /* cfb.find interface */ + it('should find relative path using cfb#find', function() { switch(x.substr(-4)) { case '.xls': if(!cfb.find('Workbook') && !cfb.find('Book')) throw new Error("Cannot find workbook for " + x); break; case '.ppt': if(!cfb.find('PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; case '.doc': if(!cfb.find('WordDocument') && !cfb.find('Word Document')) throw new Error("Cannot find doc for " + x); break; } }); - it('should find absolute path', function() { + it('should find absolute path using cfb#find', function() { switch(x.substr(-4)) { case '.xls': if(!cfb.find('/Workbook') && !cfb.find('/Book')) throw new Error("Cannot find workbook for " + x); break; case '.ppt': if(!cfb.find('/PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; case '.doc': if(!cfb.find('/WordDocument') && !cfb.find('/Word Document')) throw new Error("Cannot find doc for " + x); break; } }); + + /* CFB.find function */ + it('should find relative path using CFB.find', function() { + switch(x.substr(-4)) { + case '.xls': if(!CFB.find(cfb, 'Workbook') && !CFB.find(cfb, 'Book')) throw new Error("Cannot find workbook for " + x); break; + case '.ppt': if(!CFB.find(cfb, 'PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; + case '.doc': if(!CFB.find(cfb, 'WordDocument') && !CFB.find(cfb, 'Word Document')) throw new Error("Cannot find doc for " + x); break; + } + }); + it('should find absolute path using CFB.find', function() { + switch(x.substr(-4)) { + case '.xls': if(!CFB.find(cfb, '/Workbook') && !CFB.find(cfb, '/Book')) throw new Error("Cannot find workbook for " + x); break; + case '.ppt': if(!CFB.find(cfb, '/PowerPoint Document')) throw new Error("Cannot find presentation for " + x); break; + case '.doc': if(!CFB.find(cfb, '/WordDocument') && !CFB.find(cfb, '/Word Document')) throw new Error("Cannot find doc for " + x); break; + } + }); }); } diff --git a/types/bin_cfb.ts b/types/bin_cfb.ts index ec7855a..b721729 100755 --- a/types/bin_cfb.ts +++ b/types/bin_cfb.ts @@ -1,13 +1,20 @@ -/* vim: set ts=2: */ +/* cfb.js (C) 2013-present SheetJS -- http://sheetjs.com */ +/* eslint-env node */ +/* vim: set ts=2 ft=javascript: */ import * as CFB from 'cfb'; import fs = require('fs'); import program = require('commander'); +import PRINTJ = require("printj"); program .version(CFB.version) .usage('[options] ') .option('-q, --quiet', 'process but do not report') + .option('-l, --list-files', 'list files') .option('-d, --dump', 'dump internal representation but do not extract') - .parse(process.argv); + .option('--dev', 'development mode') + .option('--read', 'read but do not print out contents'); + +program.parse(process.argv); if(program.args.length === 0 || !fs.existsSync(program.args[0])) { console.error("Usage: " + process.argv[1] + " [-q] "); @@ -15,16 +22,48 @@ if(program.args.length === 0 || !fs.existsSync(program.args[0])) { } const opts: CFB.CFBParsingOptions = {type:'file'}; +if(program.dev) opts.WTF = true; const cfb: CFB.CFBContainer = CFB.read(program.args[0], opts); +if(program.quiet) process.exit(0); if(program.dump) { console.log("Full Paths:"); console.log(cfb.FullPaths.map((x) => " " + x).join("\n")); console.log("Full Path Directory:"); console.log(cfb.FullPathDir); + process.exit(0); } -if(!program.quiet && !program.dump) for(let i=0; i!==cfb.FullPaths.length; ++i) { +if(program.listFiles) { + const sprintf = PRINTJ.sprintf; + + const format_date = function(date: Date): string { + return sprintf("%02u-%02u-%02u %02u:%02u", date.getUTCMonth()+1, date.getUTCDate(), date.getUTCFullYear()%100, date.getUTCHours(), date.getUTCMinutes()); + }; + + let basetime = new Date(1980,0,1); + let cnt = 0; + let rootsize = 0, filesize = 0; + console.log(" Length Date Time Name"); + console.log(" -------- ---- ---- ----"); + cfb.FileIndex.forEach(function(file: CFB.CFBEntry, i: number) { + switch(file.type) { + case 5: + basetime = file.ct || file.mt || basetime; + rootsize = file.size; + break; + case 2: + console.log(sprintf("%9lu %s %s", file.size, format_date(basetime), cfb.FullPaths[i])); + filesize += file.size; + ++cnt; + } + }); + console.log(" -------- -------"); + console.log(sprintf("%9lu %lu file%s", rootsize || filesize, cnt, (cnt !== 1 ? "s" : ""))); + + process.exit(0); +} +for(let i=0; i!==cfb.FullPaths.length; ++i) { if(cfb.FullPaths[i].slice(-1) === "/") { console.error("mkdir " + cfb.FullPaths[i]); fs.mkdirSync(cfb.FullPaths[i]); diff --git a/types/index.d.ts b/types/index.d.ts index 2d2255d..9aa17f2 100644 --- a/types/index.d.ts +++ b/types/index.d.ts @@ -10,6 +10,9 @@ export function parse(f: CFB$Blob, options?: CFBParsingOptions): CFBContainer; /** Read a blob or file or binary string */ export function read(f: CFB$Blob | string, options?: CFBParsingOptions): CFBContainer; +/** Find a file entry given a path or file name */ +export function find(cfb: CFBContainer, path: string): CFBEntry | null; + /** Utility functions */ export const utils: CFB$Utils; @@ -18,6 +21,8 @@ export const utils: CFB$Utils; export interface CFBParsingOptions { /** Input data encoding */ type?: 'base64' | 'binary' | 'buffer' | 'file' | 'array'; + /** If true, throw errors when features are not understood */ + WTF?: boolean; } export type CFB$Blob = Buffer | number[] | Uint8Array; @@ -85,9 +90,9 @@ export interface CFBContainer { /* Raw Content, in chunks (Buffer when available, Array of bytes otherwise) */ raw: { - header: CFB$Blob, - sectors: CFB$Blob[]; - }; + header: CFB$Blob, + sectors: CFB$Blob[]; + }; } /** General utilities */ diff --git a/xlscfb.flow.js b/xlscfb.flow.js index 22ba520..6eb3fe1 100644 --- a/xlscfb.flow.js +++ b/xlscfb.flow.js @@ -35,7 +35,7 @@ type CFBFiles = {[n:string]:CFBEntry}; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports/*:CFBModule*/ = /*::(*/{}/*:: :any)*/; -exports.version = '0.12.0'; +exports.version = '0.12.1'; function parse(file/*:RawBytes*/, options/*:CFBReadOpts*/)/*:CFBContainer*/ { var mver = 3; // major version var ssz = 512; // sector size @@ -206,7 +206,7 @@ function build_full_paths(FI/*:CFBFileIndex*/, FPD/*:CFBFullPathDir*/, FP/*:Arra if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -307,7 +307,6 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector var blob/*:CFBlob*/ = /*::(*/sector.slice(i, i+128)/*:: :any)*/; prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o/*:CFBEntry*/ = ({ @@ -328,6 +327,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -340,7 +340,7 @@ function read_directory(dir_start/*:number*/, sector_list/*:SectorList*/, sector prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)/*:any*/); prep_blob(o.content, 0); } @@ -369,6 +369,9 @@ function readSync(blob/*:RawBytes|string*/, options/*:CFBReadOpts*/) { return parse(/*::typeof blob == 'string' ? new Buffer(blob, 'utf-8') : */blob, options); } +function find(cfb/*:CFBContainer*/, path/*:string*/)/*:?CFBEntry*/ { + return cfb.find(path); +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -394,6 +397,7 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = { diff --git a/xlscfb.js b/xlscfb.js index a4b30e2..b54e680 100644 --- a/xlscfb.js +++ b/xlscfb.js @@ -6,7 +6,7 @@ var DO_NOT_EXPORT_CFB = true; /* [MS-CFB] v20130118 */ var CFB = (function _CFB(){ var exports = {}; -exports.version = '0.12.0'; +exports.version = '0.12.1'; function parse(file, options) { var mver = 3; // major version var ssz = 512; // sector size @@ -177,7 +177,7 @@ function build_full_paths(FI, FPD, FP, Paths) { if(FI[i].type === 0 /* unknown */) continue; j = dad[i]; if(j === 0) FP[i] = FP[0] + "/" + FP[i]; - else while(j !== 0) { + else while(j !== 0 && j !== dad[j]) { FP[i] = FP[j] + "/" + FP[i]; j = dad[j]; } @@ -278,7 +278,6 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil var blob = sector.slice(i, i+128); prep_blob(blob, 64); namelen = blob.read_shift(2); - if(namelen === 0) continue; name = __utf16le(blob,0,namelen-pl); Paths.push(name); var o = ({ @@ -299,6 +298,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil if(mtime !== 0) o.mt = read_date(blob, blob.l-8); o.start = blob.read_shift(4, 'i'); o.size = blob.read_shift(4, 'i'); + if(o.size < 0 && o.start < 0) { o.size = o.type = 0; o.start = ENDOFCHAIN; o.name = ""; } if(o.type === 5) { /* root */ minifat_store = o.start; if(nmfs > 0 && minifat_store !== ENDOFCHAIN) sector_list[minifat_store].name = "!StreamData"; @@ -311,7 +311,7 @@ function read_directory(dir_start, sector_list, sectors, Paths, nmfs, files, Fil prep_blob(o.content, 0); } else { o.storage = 'minifat'; - if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN) { + if(minifat_store !== ENDOFCHAIN && o.start !== ENDOFCHAIN && sector_list[minifat_store]) { o.content = (sector_list[minifat_store].data.slice(o.start*MSSZ,o.start*MSSZ+o.size)); prep_blob(o.content, 0); } @@ -340,6 +340,9 @@ function readSync(blob, options) { return parse(blob, options); } +function find(cfb, path) { + return cfb.find(path); +} /** CFB Constants */ var MSSZ = 64; /* Mini Sector Size = 1<<6 */ //var MSCSZ = 4096; /* Mini Stream Cutoff Size */ @@ -365,6 +368,7 @@ var consts = { EntryTypes: ['unknown','storage','stream','lockbytes','property','root'] }; +exports.find = find; exports.read = readSync; exports.parse = parse; exports.utils = {