arithmancy

This commit is contained in:
SheetJS 2022-01-31 06:40:30 -05:00
parent bb997654ef
commit cb8046c447
22 changed files with 332 additions and 9154 deletions

1
.gitignore vendored
View File

@ -27,6 +27,7 @@ tmp
*.[bB][iI][fF][fF][23458]
*.[rR][tT][fF]
*.[eE][tT][hH]
*.[nN][uU][mM][bB][eE][rR][sS]
*.123
*.htm
*.html

View File

@ -1,4 +1,5 @@
test_files/
modules/
packages/
.github/
tests/files/

View File

@ -51,6 +51,7 @@ VueJS
WebSQL
WK_
iOS
iWork
nodejs
node.js
npm

View File

@ -7,6 +7,7 @@ changes may not be included if they are not expected to break existing code.
## v0.18.0
* Browser scripts only expose `XLSX` variable
* Module no longer ships with `dist/jszip.js` browser script
## v0.17.4

View File

@ -94,7 +94,6 @@ dist: dist-deps $(TARGET) bower.json ## Prepare JS files for distribution
dist-deps: ## Copy dependencies for distribution
mkdir -p dist
cp node_modules/codepage/dist/cpexcel.full.js dist/cpexcel.js
cp jszip.js dist/jszip.js
.PHONY: aux
aux: $(AUXTARGETS)

View File

@ -54,6 +54,11 @@ var NUMBERS = (function() {
});
return out;
};
var popcnt = function(x) {
x -= x >> 1 & 1431655765;
x = (x & 858993459) + (x >> 2 & 858993459);
return (x + (x >> 4) & 252645135) * 16843009 >>> 24;
};
// src/proto.ts
function parse_varint49(buf, ptr) {
@ -126,12 +131,16 @@ var NUMBERS = (function() {
break;
case 5:
len = 4;
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
case 1:
if (!len)
len = 8;
len = 8;
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
case 2:
if (!len)
len = parse_varint49(buf, ptr);
len = parse_varint49(buf, ptr);
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
@ -140,7 +149,7 @@ var NUMBERS = (function() {
default:
throw new Error("PB Type ".concat(type, " for Field ").concat(num, " at offset ").concat(off));
}
var v = { offset: off, data: res };
var v = { offset: off, data: res, type: type };
if (out[num] == null)
out[num] = [v];
else
@ -172,14 +181,14 @@ var NUMBERS = (function() {
var t = buf[l++];
var len = buf[l] | buf[l + 1] << 8 | buf[l + 2] << 16;
l += 3;
out.push(process_chunk(t, buf.slice(l, l + len)));
out.push(parse_snappy_chunk(t, buf.slice(l, l + len)));
l += len;
}
if (l !== buf.length)
throw new Error("data is not a valid framed stream!");
return u8concat(out);
}
function process_chunk(type, buf) {
function parse_snappy_chunk(type, buf) {
if (type != 0)
throw new Error("Unexpected Snappy chunk type ".concat(type));
var ptr = [0];
@ -270,6 +279,62 @@ var NUMBERS = (function() {
return out;
}
// src/prebnccell.ts
function parseit(buf, version) {
var dv = u8_to_dataview(buf);
var ctype = buf[version == 4 ? 1 : 2];
var flags = dv.getUint32(4, true);
var data_offset = 12 + popcnt(flags & 16270) * 4;
var sidx = -1, ieee = NaN, dt = NaN;
if (flags & 16) {
sidx = dv.getUint32(data_offset, true);
data_offset += 4;
}
if (flags & 32) {
ieee = dv.getFloat64(data_offset, true);
data_offset += 8;
}
if (flags & 64) {
dt = dv.getFloat64(data_offset, true);
data_offset += 8;
}
var ret;
switch (ctype) {
case 0:
break;
case 2:
ret = { t: "n", v: ieee };
break;
case 3:
ret = { t: "s", v: sidx };
break;
case 5:
var dd = new Date(2001, 0, 1);
dd.setTime(dd.getTime() + dt * 1e3);
ret = { t: "d", v: dd };
break;
case 6:
ret = { t: "b", v: ieee > 0 };
break;
case 7:
ret = { t: "n", v: ieee };
break;
default:
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
}
return ret;
}
function parse(buf) {
var version = buf[0];
switch (version) {
case 3:
case 4:
return parseit(buf, version);
default:
throw new Error("Unsupported pre-BNC version ".concat(version));
}
}
// src/numbers.ts
var encode_col = function(C) {
var s = "";
@ -343,7 +408,7 @@ var NUMBERS = (function() {
var pb = parse_shallow(root.data);
var entries = pb[3];
var data = [];
entries == null ? void 0 : entries.forEach(function(entry) {
(entries || []).forEach(function(entry) {
var le = parse_shallow(entry.data);
var key = varint_to_i32(le[1][0].data) >>> 0;
data[key] = u8str(le[3][0].data);
@ -405,50 +470,12 @@ var NUMBERS = (function() {
tiles.forEach(function(tile2) {
tile2.ref.forEach(function(row, R) {
row.forEach(function(buf, C) {
var dv = u8_to_dataview(buf);
var ctype = buf[2];
var addr = encode_cell({ r: R, c: C });
switch (ctype) {
case 0:
{
switch (buf[1]) {
case 3:
ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4, true)] };
break;
case 2:
ws[addr] = { t: "n", v: dv.getFloat64(16, true) };
break;
case 0:
break;
case 5:
break;
case 7:
break;
case 6:
ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 };
break;
default:
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
}
}
break;
case 3:
{
ws[addr] = { t: "s", v: sst[dv.getUint32(16, true)] };
}
break;
case 2:
{
ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) };
}
break;
case 6:
{
ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 };
}
break;
default:
throw new Error("Unsupported cell type ".concat(ctype));
var res = parse(buf);
if (res) {
ws[addr] = res;
if (res.t == "s" && typeof res.v == "number")
res.v = sst[res.v];
}
});
});

View File

@ -61,7 +61,7 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
if(safegetzipfile(zip, 'objectdata.xml')) return parse_ods(zip, opts);
/* Numbers */
if(safegetzipfile(zip, 'Index/Document.iwa')) {
if(typeof Uint8Array == "undefined") throw new Error('NUMBERS file parsing requires Uint8Array support')
if(typeof Uint8Array == "undefined") throw new Error('NUMBERS file parsing requires Uint8Array support');
if(typeof NUMBERS != "undefined") {
if(zip.FileIndex) return NUMBERS.parse_numbers(zip);
var _zip = CFB.utils.cfb_new();

9000
dist/jszip.js generated vendored

File diff suppressed because it is too large Load Diff

View File

@ -88,7 +88,7 @@ file but Excel will know how to handle it. This library applies similar logic:
| `0xD0` | CFB Container | BIFF 5/8 or protected XLSX/XLSB or WQ3/QPW or XLR |
| `0x09` | BIFF Stream | BIFF 2/3/4/5 |
| `0x3C` | XML/HTML | SpreadsheetML / Flat ODS / UOS1 / HTML / plain text |
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plain text |
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or NUMBERS or text |
| `0x49` | Plain Text | SYLK or plain text |
| `0x54` | Plain Text | DIF or plain text |
| `0xEF` | UTF8 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plain text |

View File

@ -20,6 +20,7 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats:
| Lotus Formatted Text (PRN) | ✔ | ✔ |
| UTF-16 Unicode Text (TXT) | ✔ | ✔ |
| **Other Workbook/Worksheet Formats** |:-----:|:-----:|
| Numbers 3.0+ / iWork 2013+ Spreadsheet (NUMBERS) | ✔ | |
| OpenDocument Spreadsheet (ODS) | ✔ | ✔ |
| Flat XML ODF Spreadsheet (FODS) | ✔ | ✔ |
| Uniform Office Format Spreadsheet (标文通 UOS1/UOS2) | ✔ | |
@ -205,6 +206,21 @@ XLR also includes a `WksSSWorkBook` stream similar to Lotus FM3/FMT files.
</details>
#### Numbers 3.0+ / iWork 2013+ Spreadsheet (NUMBERS)
<details>
<summary>(click to show)</summary>
iWork 2013 (Numbers 3.0 / Pages 5.0 / Keynote 6.0) switched from a proprietary
XML-based format to the current file format based on the iWork Archive (IWA).
This format has been used up through the current release (Numbers 11.2).
The parser focuses on extracting raw data from tables. Numbers technically
supports multiple tables in a logical worksheet, including custom titles. This
parser will only extract the raw data from the first table from each worksheet.
</details>
#### OpenDocument Spreadsheet (ODS/FODS)
<details>

Binary file not shown.

Before

Width:  |  Height:  |  Size: 191 KiB

After

Width:  |  Height:  |  Size: 203 KiB

View File

@ -16,6 +16,7 @@ digraph G {
subgraph OLD {
node [style=filled,color=cyan];
nums [label="NUMBERS"];
ods [label="ODS"];
fods [label="FODS"];
uos [label="UOS"];
@ -63,6 +64,7 @@ digraph G {
wk4 -> csf
123 -> csf
qpw -> csf
nums -> csf
}
subgraph WORKSHEET {
edge [color=aquamarine4];

View File

@ -54,6 +54,11 @@ var NUMBERS = (function() {
});
return out;
};
var popcnt = function(x) {
x -= x >> 1 & 1431655765;
x = (x & 858993459) + (x >> 2 & 858993459);
return (x + (x >> 4) & 252645135) * 16843009 >>> 24;
};
// src/proto.ts
function parse_varint49(buf, ptr) {
@ -126,12 +131,16 @@ var NUMBERS = (function() {
break;
case 5:
len = 4;
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
case 1:
if (!len)
len = 8;
len = 8;
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
case 2:
if (!len)
len = parse_varint49(buf, ptr);
len = parse_varint49(buf, ptr);
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
@ -140,7 +149,7 @@ var NUMBERS = (function() {
default:
throw new Error("PB Type ".concat(type, " for Field ").concat(num, " at offset ").concat(off));
}
var v = { offset: off, data: res };
var v = { offset: off, data: res, type: type };
if (out[num] == null)
out[num] = [v];
else
@ -172,14 +181,14 @@ var NUMBERS = (function() {
var t = buf[l++];
var len = buf[l] | buf[l + 1] << 8 | buf[l + 2] << 16;
l += 3;
out.push(process_chunk(t, buf.slice(l, l + len)));
out.push(parse_snappy_chunk(t, buf.slice(l, l + len)));
l += len;
}
if (l !== buf.length)
throw new Error("data is not a valid framed stream!");
return u8concat(out);
}
function process_chunk(type, buf) {
function parse_snappy_chunk(type, buf) {
if (type != 0)
throw new Error("Unexpected Snappy chunk type ".concat(type));
var ptr = [0];
@ -270,6 +279,62 @@ var NUMBERS = (function() {
return out;
}
// src/prebnccell.ts
function parseit(buf, version) {
var dv = u8_to_dataview(buf);
var ctype = buf[version == 4 ? 1 : 2];
var flags = dv.getUint32(4, true);
var data_offset = 12 + popcnt(flags & 16270) * 4;
var sidx = -1, ieee = NaN, dt = NaN;
if (flags & 16) {
sidx = dv.getUint32(data_offset, true);
data_offset += 4;
}
if (flags & 32) {
ieee = dv.getFloat64(data_offset, true);
data_offset += 8;
}
if (flags & 64) {
dt = dv.getFloat64(data_offset, true);
data_offset += 8;
}
var ret;
switch (ctype) {
case 0:
break;
case 2:
ret = { t: "n", v: ieee };
break;
case 3:
ret = { t: "s", v: sidx };
break;
case 5:
var dd = new Date(2001, 0, 1);
dd.setTime(dd.getTime() + dt * 1e3);
ret = { t: "d", v: dd };
break;
case 6:
ret = { t: "b", v: ieee > 0 };
break;
case 7:
ret = { t: "n", v: ieee };
break;
default:
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
}
return ret;
}
function parse(buf) {
var version = buf[0];
switch (version) {
case 3:
case 4:
return parseit(buf, version);
default:
throw new Error("Unsupported pre-BNC version ".concat(version));
}
}
// src/numbers.ts
var encode_col = function(C) {
var s = "";
@ -343,7 +408,7 @@ var NUMBERS = (function() {
var pb = parse_shallow(root.data);
var entries = pb[3];
var data = [];
entries == null ? void 0 : entries.forEach(function(entry) {
(entries || []).forEach(function(entry) {
var le = parse_shallow(entry.data);
var key = varint_to_i32(le[1][0].data) >>> 0;
data[key] = u8str(le[3][0].data);
@ -405,50 +470,12 @@ var NUMBERS = (function() {
tiles.forEach(function(tile2) {
tile2.ref.forEach(function(row, R) {
row.forEach(function(buf, C) {
var dv = u8_to_dataview(buf);
var ctype = buf[2];
var addr = encode_cell({ r: R, c: C });
switch (ctype) {
case 0:
{
switch (buf[1]) {
case 3:
ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4, true)] };
break;
case 2:
ws[addr] = { t: "n", v: dv.getFloat64(16, true) };
break;
case 0:
break;
case 5:
break;
case 7:
break;
case 6:
ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 };
break;
default:
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
}
}
break;
case 3:
{
ws[addr] = { t: "s", v: sst[dv.getUint32(16, true)] };
}
break;
case 2:
{
ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) };
}
break;
case 6:
{
ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 };
}
break;
default:
throw new Error("Unsupported cell type ".concat(ctype));
var res = parse(buf);
if (res) {
ws[addr] = res;
if (res.t == "s" && typeof res.v == "number")
res.v = sst[res.v];
}
});
});

View File

@ -8,6 +8,10 @@ var f = process.argv[2];
var cfb = read(f, {type: "file"});
var wb = parse_numbers(cfb);
var sn = process.argv[3];
if(sn && !isNaN(+sn)) sn = wb.SheetNames[+sn];
if(wb.SheetNames.indexOf(sn) == -1) sn = wb.SheetNames[0];
console.log(utils.sheet_to_csv(wb.Sheets[sn]));
if(typeof sn == "undefined") {
wb.SheetNames.forEach(sn => console.log(utils.sheet_to_csv(wb.Sheets[sn])));
} else {
if(sn && !isNaN(+sn)) sn = wb.SheetNames[+sn];
if(wb.SheetNames.indexOf(sn) == -1) sn = wb.SheetNames[0];
console.log(utils.sheet_to_csv(wb.Sheets[sn]));
}

View File

@ -1,5 +1,5 @@
/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */
import { Ptr, parse_varint49 } from './proto';
import { Ptr, parse_varint49, write_varint49 } from './proto';
import { u8concat } from './util';
function is_framed(buf: Uint8Array): boolean {
@ -19,7 +19,7 @@ function deframe(buf: Uint8Array): Uint8Array {
while(l < buf.length) {
var t = buf[l++];
var len = buf[l] | (buf[l+1]<<8) | (buf[l+2] << 16); l += 3;
out.push(process_chunk(t, buf.slice(l, l + len)));
out.push(parse_snappy_chunk(t, buf.slice(l, l + len)));
l += len;
}
if(l !== buf.length) throw new Error("data is not a valid framed stream!");
@ -27,7 +27,34 @@ function deframe(buf: Uint8Array): Uint8Array {
}
export { deframe };
function process_chunk(type: number, buf: Uint8Array): Uint8Array {
function reframe(buf: Uint8Array): Uint8Array {
var out: Uint8Array[] = [];
var l = 0;
while(l < buf.length) {
var c = Math.min(buf.length - l, 0xFFFFFFF);
var frame = new Uint8Array(4);
out.push(frame);
var usz = write_varint49(c);
var L = usz.length;
out.push(usz);
if(c <= 60) { L++; out.push(new Uint8Array([(c - 1)<<2])); }
else if(c <= 0x100) { L += 2; out.push(new Uint8Array([0xF0, (c-1) & 0xFF])); }
else if(c <= 0x10000) { L += 3; out.push(new Uint8Array([0xF4, (c-1) & 0xFF, ((c-1) >> 8) & 0xFF])); }
else if(c <= 0x1000000) { L += 4; out.push(new Uint8Array([0xF8, (c-1) & 0xFF, ((c-1) >> 8) & 0xFF, ((c-1) >> 16) & 0xFF])); }
else if(c <= 0x100000000) { L += 5; out.push(new Uint8Array([0xFC, (c-1) & 0xFF, ((c-1) >> 8) & 0xFF, ((c-1) >> 16) & 0xFF, ((c-1) >>> 24) & 0xFF])); }
out.push(buf.slice(l, l + c)); L += c;
frame[0] = 0;
frame[1] = L & 0xFF; frame[2] = (L >> 8) & 0xFF; frame[3] = (L >> 16) & 0xFF;
l += c;
}
return u8concat(out);
}
export { reframe };
function parse_snappy_chunk(type: number, buf: Uint8Array): Uint8Array {
if(type != 0) throw new Error(`Unexpected Snappy chunk type ${type}`);
var ptr: Ptr = [0];

View File

@ -5,6 +5,7 @@ import { u8str, u8_to_dataview } from './util';
import { parse_shallow, varint_to_i32, parse_varint49, mappa } from './proto';
import { deframe } from './frame';
import { IWAArchiveInfo, IWAMessage, parse_iwa } from './iwa';
import { parse as parse_bnc } from "./prebnccell";
/* written here to avoid a full import of the 'xlsx' library */
var encode_col = (C: number): string => {
@ -59,7 +60,7 @@ function parse_TST_TableDataList(M: IWAMessage[][], root: IWAMessage): string[]
var pb = parse_shallow(root.data);
var entries = pb[3];
var data = [];
entries?.forEach(entry => {
(entries||[]).forEach(entry => {
var le = parse_shallow(entry.data);
var key = varint_to_i32(le[1][0].data)>>>0;
data[key] = u8str(le[3][0].data);
@ -129,33 +130,11 @@ function parse_TST_TableModelArchive(M: IWAMessage[][], root: IWAMessage, ws: Wo
tiles.forEach((tile) => {
tile.ref.forEach((row, R) => {
row.forEach((buf, C) => {
var dv = u8_to_dataview(buf);
//var version = buf[0]; // numbers 3.x use "3", 6.x - 11.x use "4"
/* TODO: find the correct field position of the data type and value. */
var ctype = buf[2];
var addr = encode_cell({r:R,c:C});
switch(ctype) {
case 0: { // TODO: generic ??
switch(buf[1]) {
case 3: ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4,true)] } as CellObject; break;
case 2: ws[addr] = { t: "n", v: dv.getFloat64(16, true) } as CellObject; break;
case 0: break; // ws[addr] = { t: "z" } as CellObject; // blank?
case 5: break; // date-time
case 7: break; // duration
case 6: ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 } as CellObject; break;
default: throw new Error(`Unsupported cell type ${buf.slice(0,4)}`);
}
} break;
case 3: { // string
ws[addr] = { t: "s", v: sst[dv.getUint32(16,true)] } as CellObject;
} break;
case 2: { // number
ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) } as CellObject;
} break;
case 6: { // boolean
ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 } as CellObject; // 1 or 0
} break;
default: throw new Error(`Unsupported cell type ${ctype}`);
var res = parse_bnc(buf);
if(res) {
ws[addr] = res as CellObject;
if(res.t == "s" && typeof res.v == "number") res.v = sst[res.v];
}
});
});

41
modules/src/prebnccell.ts Normal file
View File

@ -0,0 +1,41 @@
/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */
import { CellObject } from '../../';
import { u8_to_dataview, popcnt } from './util';
function parseit(buf: Uint8Array, version: number): CellObject {
var dv = u8_to_dataview(buf);
var ctype = buf[version == 4 ? 1 : 2];
/* TODO: find the correct field position of number formats, formulae, etc */
var flags = dv.getUint32(4, true);
var data_offset = 12 + popcnt(flags & 0x3F8E) * 4;
var sidx = -1, ieee = NaN, dt = NaN;
if(flags & 0x10) { sidx = dv.getUint32(data_offset, true); data_offset += 4; }
if(flags & 0x20) { ieee = dv.getFloat64(data_offset, true); data_offset += 8; }
if(flags & 0x40) { dt = dv.getFloat64(data_offset, true); data_offset += 8; }
var ret;
switch(ctype) {
case 0: break; // return { t: "z" }; // blank?
case 2: ret = { t: "n", v: ieee }; break;
case 3: ret = { t: "s", v: sidx }; break;
case 5: var dd = new Date(2001, 0, 1); dd.setTime(dd.getTime() + dt * 1000); ret = { t: "d", v: dd }; break; // date-time TODO: relative or absolute?
case 6: ret = { t: "b", v: ieee > 0 }; break;
case 7: ret = { t: "n", v: ieee }; break; // duration in seconds TODO: emit [hh]:[mm] style format with adjusted value
default: throw new Error(`Unsupported cell type ${buf.slice(0,4)}`);
}
/* TODO: Some fields appear after the cell data */
return ret;
}
function parse(buf: Uint8Array): CellObject {
var version = buf[0]; // numbers 3.5 uses "3", 6.x - 11.x use "4"
switch(version) {
case 3: case 4: return parseit(buf, version);
default: throw new Error(`Unsupported pre-BNC version ${version}`);
}
}
export { parse };

View File

@ -1,4 +1,6 @@
/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */
import { u8concat } from "./util";
type Ptr = [number];
export { Ptr };
@ -18,6 +20,26 @@ function parse_varint49(buf: Uint8Array, ptr?: Ptr): number {
return usz;
}
export { parse_varint49 };
function write_varint49(v: number): Uint8Array {
var usz = new Uint8Array(7);
usz[0] = (v & 0x7F);
var L = 1;
sz: if(v > 0x7F) {
usz[L-1] |= 0x80; usz[L] = (v >> 7) & 0x7F; ++L;
if(v <= 0x3FFF) break sz;
usz[L-1] |= 0x80; usz[L] = (v >> 14) & 0x7F; ++L;
if(v <= 0x1FFFFF) break sz;
usz[L-1] |= 0x80; usz[L] = (v >> 21) & 0x7F; ++L;
if(v <= 0xFFFFFFF) break sz;
usz[L-1] |= 0x80; usz[L] = ((v/0x100) >>> 21) & 0x7F; ++L;
if(v <= 0x7FFFFFFFF) break sz;
usz[L-1] |= 0x80; usz[L] = ((v/0x10000) >>> 21) & 0x7F; ++L;
if(v <= 0x3FFFFFFFFFF) break sz;
usz[L-1] |= 0x80; usz[L] = ((v/0x1000000) >>> 21) & 0x7F; ++L;
}
return usz.slice(0, L);
}
export { write_varint49 };
/** Parse a 32-bit signed integer from the raw varint */
function varint_to_i32(buf: Uint8Array): number {
@ -33,12 +55,13 @@ function varint_to_i32(buf: Uint8Array): number {
export { varint_to_i32 };
interface ProtoItem {
offset: number;
offset?: number;
data: Uint8Array;
type: number;
}
type ProtoField = Array<ProtoItem>
type ProtoMessage = Array<ProtoField>;
export { ProtoItem, ProtoField, ProtoMessage }
export { ProtoItem, ProtoField, ProtoMessage };
/** Shallow parse of a message */
function parse_shallow(buf: Uint8Array): ProtoMessage {
var out: ProtoMessage = [], ptr: Ptr = [0];
@ -55,23 +78,32 @@ function parse_shallow(buf: Uint8Array): ProtoMessage {
while(buf[ptr[0]++] >= 0x80);
res = buf.slice(l, ptr[0]);
} break;
case 5: len = 4;
/* falls through */
case 1: if(!len) len = 8;
/* falls through */
case 2: if(!len) len = parse_varint49(buf, ptr);
res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
case 5: len = 4; res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
case 1: len = 8; res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
case 2: len = parse_varint49(buf, ptr); res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
case 3: // Start group
case 4: // End group
default: throw new Error(`PB Type ${type} for Field ${num} at offset ${off}`);
}
var v: ProtoItem = { offset: off, data: res };
var v: ProtoItem = { offset: off, data: res, type };
if(out[num] == null) out[num] = [v];
else out[num].push(v);
}
return out;
}
export { parse_shallow };
/** Serialize a shallow parse */
function write_shallow(proto: ProtoMessage): Uint8Array {
var out: Uint8Array[] = [];
proto.forEach((field, idx) => {
field.forEach(item => {
out.push(write_varint49(idx * 8 + item.type));
out.push(item.data);
});
});
return u8concat(out);
}
export { write_shallow };
function mappa<U>(data: ProtoField, cb:(Uint8Array) => U): U[] {
if(!data) return [];

View File

@ -37,3 +37,11 @@ function u8indexOf(u8: Uint8Array, data: string | number | Uint8Array, byteOffse
return -1;
}
export { u8indexOf };
/* Hopefully one day this will be added to the language */
var popcnt = (x: number): number => {
x -= ((x >> 1) & 0x55555555);
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >>> 24;
};
export { popcnt };

View File

@ -171,12 +171,16 @@ function parse_shallow(buf) {
break;
case 5:
len = 4;
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
case 1:
if (!len)
len = 8;
len = 8;
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
case 2:
if (!len)
len = parse_varint49(buf, ptr);
len = parse_varint49(buf, ptr);
res = buf.slice(ptr[0], ptr[0] + len);
ptr[0] += len;
break;
@ -468,6 +472,9 @@ function otorp(buf, builtins = false) {
var otorp_default = otorp;
var is_referenced = (buf, pos) => {
var dv = u8_to_dataview(buf);
for (var leaddr = 0; leaddr > -1 && leaddr < pos; leaddr = u8indexOf(buf, 141, leaddr + 1))
if (dv.getUint32(leaddr + 2, true) == pos - leaddr - 6)
return true;
try {
var headers = parse_macho(buf);
for (var i = 0; i < headers.length; ++i) {
@ -507,6 +514,7 @@ var proto_offsets = (buf) => {
if (buf[--pos] != 10)
continue;
if (!is_referenced(buf, pos)) {
console.error(`Reference to ${name} not found`);
continue;
}
var bin = meta.find((m) => m.offset <= pos && m.offset + m.size >= pos);

View File

@ -1,6 +1,6 @@
{
"name": "otorp",
"version": "0.0.0",
"version": "0.0.1",
"author": "sheetjs",
"description": "Recover protobuf definitions from Mach-O binaries",
"bin": {

View File

@ -55,6 +55,11 @@ export default otorp;
var is_referenced = (buf: Uint8Array, pos: number): boolean => {
var dv = u8_to_dataview(buf);
/* Search for LEA reference (x86) */
for(var leaddr = 0; leaddr > -1 && leaddr < pos; leaddr = u8indexOf(buf, 0x8D, leaddr + 1))
if(dv.getUint32(leaddr + 2, true) == pos - leaddr - 6) return true;
/* Search for absolute reference to address */
try {
var headers = parse_macho(buf);
for(var i = 0; i < headers.length; ++i) {
@ -69,7 +74,6 @@ var is_referenced = (buf: Uint8Array, pos: number): boolean => {
if(u8indexOf(b, ref, 0) > 0) return true;
}
} catch(e) {}
return false;
};
@ -90,7 +94,7 @@ var proto_offsets = (buf: Uint8Array): OffsetList => {
if(off - pos > 250) continue;
var name = u8str(buf.slice(pos + 1, off));
if(buf[--pos] != 0x0A) continue;
if(!is_referenced(buf, pos)) { /* console.error(`Reference to ${name} not found`); */ continue; }
if(!is_referenced(buf, pos)) { console.error(`Reference to ${name} not found`); continue; }
var bin = meta.find(m => m.offset <= pos && m.offset + m.size >= pos);
out.push([pos, name, bin?.type || -1, bin?.subtype || -1]);
}