forked from sheetjs/sheetjs
arithmancy
This commit is contained in:
parent
bb997654ef
commit
cb8046c447
1
.gitignore
vendored
1
.gitignore
vendored
@ -27,6 +27,7 @@ tmp
|
||||
*.[bB][iI][fF][fF][23458]
|
||||
*.[rR][tT][fF]
|
||||
*.[eE][tT][hH]
|
||||
*.[nN][uU][mM][bB][eE][rR][sS]
|
||||
*.123
|
||||
*.htm
|
||||
*.html
|
||||
|
@ -1,4 +1,5 @@
|
||||
test_files/
|
||||
modules/
|
||||
packages/
|
||||
.github/
|
||||
tests/files/
|
||||
|
@ -51,6 +51,7 @@ VueJS
|
||||
WebSQL
|
||||
WK_
|
||||
iOS
|
||||
iWork
|
||||
nodejs
|
||||
node.js
|
||||
npm
|
||||
|
@ -7,6 +7,7 @@ changes may not be included if they are not expected to break existing code.
|
||||
## v0.18.0
|
||||
|
||||
* Browser scripts only expose `XLSX` variable
|
||||
* Module no longer ships with `dist/jszip.js` browser script
|
||||
|
||||
## v0.17.4
|
||||
|
||||
|
1
Makefile
1
Makefile
@ -94,7 +94,6 @@ dist: dist-deps $(TARGET) bower.json ## Prepare JS files for distribution
|
||||
dist-deps: ## Copy dependencies for distribution
|
||||
mkdir -p dist
|
||||
cp node_modules/codepage/dist/cpexcel.full.js dist/cpexcel.js
|
||||
cp jszip.js dist/jszip.js
|
||||
|
||||
.PHONY: aux
|
||||
aux: $(AUXTARGETS)
|
||||
|
@ -54,6 +54,11 @@ var NUMBERS = (function() {
|
||||
});
|
||||
return out;
|
||||
};
|
||||
var popcnt = function(x) {
|
||||
x -= x >> 1 & 1431655765;
|
||||
x = (x & 858993459) + (x >> 2 & 858993459);
|
||||
return (x + (x >> 4) & 252645135) * 16843009 >>> 24;
|
||||
};
|
||||
|
||||
// src/proto.ts
|
||||
function parse_varint49(buf, ptr) {
|
||||
@ -126,12 +131,16 @@ var NUMBERS = (function() {
|
||||
break;
|
||||
case 5:
|
||||
len = 4;
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
case 1:
|
||||
if (!len)
|
||||
len = 8;
|
||||
len = 8;
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
case 2:
|
||||
if (!len)
|
||||
len = parse_varint49(buf, ptr);
|
||||
len = parse_varint49(buf, ptr);
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
@ -140,7 +149,7 @@ var NUMBERS = (function() {
|
||||
default:
|
||||
throw new Error("PB Type ".concat(type, " for Field ").concat(num, " at offset ").concat(off));
|
||||
}
|
||||
var v = { offset: off, data: res };
|
||||
var v = { offset: off, data: res, type: type };
|
||||
if (out[num] == null)
|
||||
out[num] = [v];
|
||||
else
|
||||
@ -172,14 +181,14 @@ var NUMBERS = (function() {
|
||||
var t = buf[l++];
|
||||
var len = buf[l] | buf[l + 1] << 8 | buf[l + 2] << 16;
|
||||
l += 3;
|
||||
out.push(process_chunk(t, buf.slice(l, l + len)));
|
||||
out.push(parse_snappy_chunk(t, buf.slice(l, l + len)));
|
||||
l += len;
|
||||
}
|
||||
if (l !== buf.length)
|
||||
throw new Error("data is not a valid framed stream!");
|
||||
return u8concat(out);
|
||||
}
|
||||
function process_chunk(type, buf) {
|
||||
function parse_snappy_chunk(type, buf) {
|
||||
if (type != 0)
|
||||
throw new Error("Unexpected Snappy chunk type ".concat(type));
|
||||
var ptr = [0];
|
||||
@ -270,6 +279,62 @@ var NUMBERS = (function() {
|
||||
return out;
|
||||
}
|
||||
|
||||
// src/prebnccell.ts
|
||||
function parseit(buf, version) {
|
||||
var dv = u8_to_dataview(buf);
|
||||
var ctype = buf[version == 4 ? 1 : 2];
|
||||
var flags = dv.getUint32(4, true);
|
||||
var data_offset = 12 + popcnt(flags & 16270) * 4;
|
||||
var sidx = -1, ieee = NaN, dt = NaN;
|
||||
if (flags & 16) {
|
||||
sidx = dv.getUint32(data_offset, true);
|
||||
data_offset += 4;
|
||||
}
|
||||
if (flags & 32) {
|
||||
ieee = dv.getFloat64(data_offset, true);
|
||||
data_offset += 8;
|
||||
}
|
||||
if (flags & 64) {
|
||||
dt = dv.getFloat64(data_offset, true);
|
||||
data_offset += 8;
|
||||
}
|
||||
var ret;
|
||||
switch (ctype) {
|
||||
case 0:
|
||||
break;
|
||||
case 2:
|
||||
ret = { t: "n", v: ieee };
|
||||
break;
|
||||
case 3:
|
||||
ret = { t: "s", v: sidx };
|
||||
break;
|
||||
case 5:
|
||||
var dd = new Date(2001, 0, 1);
|
||||
dd.setTime(dd.getTime() + dt * 1e3);
|
||||
ret = { t: "d", v: dd };
|
||||
break;
|
||||
case 6:
|
||||
ret = { t: "b", v: ieee > 0 };
|
||||
break;
|
||||
case 7:
|
||||
ret = { t: "n", v: ieee };
|
||||
break;
|
||||
default:
|
||||
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
function parse(buf) {
|
||||
var version = buf[0];
|
||||
switch (version) {
|
||||
case 3:
|
||||
case 4:
|
||||
return parseit(buf, version);
|
||||
default:
|
||||
throw new Error("Unsupported pre-BNC version ".concat(version));
|
||||
}
|
||||
}
|
||||
|
||||
// src/numbers.ts
|
||||
var encode_col = function(C) {
|
||||
var s = "";
|
||||
@ -343,7 +408,7 @@ var NUMBERS = (function() {
|
||||
var pb = parse_shallow(root.data);
|
||||
var entries = pb[3];
|
||||
var data = [];
|
||||
entries == null ? void 0 : entries.forEach(function(entry) {
|
||||
(entries || []).forEach(function(entry) {
|
||||
var le = parse_shallow(entry.data);
|
||||
var key = varint_to_i32(le[1][0].data) >>> 0;
|
||||
data[key] = u8str(le[3][0].data);
|
||||
@ -405,50 +470,12 @@ var NUMBERS = (function() {
|
||||
tiles.forEach(function(tile2) {
|
||||
tile2.ref.forEach(function(row, R) {
|
||||
row.forEach(function(buf, C) {
|
||||
var dv = u8_to_dataview(buf);
|
||||
var ctype = buf[2];
|
||||
var addr = encode_cell({ r: R, c: C });
|
||||
switch (ctype) {
|
||||
case 0:
|
||||
{
|
||||
switch (buf[1]) {
|
||||
case 3:
|
||||
ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4, true)] };
|
||||
break;
|
||||
case 2:
|
||||
ws[addr] = { t: "n", v: dv.getFloat64(16, true) };
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
case 5:
|
||||
break;
|
||||
case 7:
|
||||
break;
|
||||
case 6:
|
||||
ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 };
|
||||
break;
|
||||
default:
|
||||
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
{
|
||||
ws[addr] = { t: "s", v: sst[dv.getUint32(16, true)] };
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) };
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
{
|
||||
ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 };
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new Error("Unsupported cell type ".concat(ctype));
|
||||
var res = parse(buf);
|
||||
if (res) {
|
||||
ws[addr] = res;
|
||||
if (res.t == "s" && typeof res.v == "number")
|
||||
res.v = sst[res.v];
|
||||
}
|
||||
});
|
||||
});
|
||||
|
@ -61,7 +61,7 @@ function parse_zip(zip/*:ZIP*/, opts/*:?ParseOpts*/)/*:Workbook*/ {
|
||||
if(safegetzipfile(zip, 'objectdata.xml')) return parse_ods(zip, opts);
|
||||
/* Numbers */
|
||||
if(safegetzipfile(zip, 'Index/Document.iwa')) {
|
||||
if(typeof Uint8Array == "undefined") throw new Error('NUMBERS file parsing requires Uint8Array support')
|
||||
if(typeof Uint8Array == "undefined") throw new Error('NUMBERS file parsing requires Uint8Array support');
|
||||
if(typeof NUMBERS != "undefined") {
|
||||
if(zip.FileIndex) return NUMBERS.parse_numbers(zip);
|
||||
var _zip = CFB.utils.cfb_new();
|
||||
|
9000
dist/jszip.js
generated
vendored
9000
dist/jszip.js
generated
vendored
File diff suppressed because it is too large
Load Diff
@ -88,7 +88,7 @@ file but Excel will know how to handle it. This library applies similar logic:
|
||||
| `0xD0` | CFB Container | BIFF 5/8 or protected XLSX/XLSB or WQ3/QPW or XLR |
|
||||
| `0x09` | BIFF Stream | BIFF 2/3/4/5 |
|
||||
| `0x3C` | XML/HTML | SpreadsheetML / Flat ODS / UOS1 / HTML / plain text |
|
||||
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or plain text |
|
||||
| `0x50` | ZIP Archive | XLSB or XLSX/M or ODS or UOS2 or NUMBERS or text |
|
||||
| `0x49` | Plain Text | SYLK or plain text |
|
||||
| `0x54` | Plain Text | DIF or plain text |
|
||||
| `0xEF` | UTF8 Encoded | SpreadsheetML / Flat ODS / UOS1 / HTML / plain text |
|
||||
|
@ -20,6 +20,7 @@ Despite the library name `xlsx`, it supports numerous spreadsheet file formats:
|
||||
| Lotus Formatted Text (PRN) | ✔ | ✔ |
|
||||
| UTF-16 Unicode Text (TXT) | ✔ | ✔ |
|
||||
| **Other Workbook/Worksheet Formats** |:-----:|:-----:|
|
||||
| Numbers 3.0+ / iWork 2013+ Spreadsheet (NUMBERS) | ✔ | |
|
||||
| OpenDocument Spreadsheet (ODS) | ✔ | ✔ |
|
||||
| Flat XML ODF Spreadsheet (FODS) | ✔ | ✔ |
|
||||
| Uniform Office Format Spreadsheet (标文通 UOS1/UOS2) | ✔ | |
|
||||
@ -205,6 +206,21 @@ XLR also includes a `WksSSWorkBook` stream similar to Lotus FM3/FMT files.
|
||||
|
||||
</details>
|
||||
|
||||
#### Numbers 3.0+ / iWork 2013+ Spreadsheet (NUMBERS)
|
||||
|
||||
<details>
|
||||
<summary>(click to show)</summary>
|
||||
|
||||
iWork 2013 (Numbers 3.0 / Pages 5.0 / Keynote 6.0) switched from a proprietary
|
||||
XML-based format to the current file format based on the iWork Archive (IWA).
|
||||
This format has been used up through the current release (Numbers 11.2).
|
||||
|
||||
The parser focuses on extracting raw data from tables. Numbers technically
|
||||
supports multiple tables in a logical worksheet, including custom titles. This
|
||||
parser will only extract the raw data from the first table from each worksheet.
|
||||
|
||||
</details>
|
||||
|
||||
#### OpenDocument Spreadsheet (ODS/FODS)
|
||||
|
||||
<details>
|
||||
|
BIN
formats.png
BIN
formats.png
Binary file not shown.
Before Width: | Height: | Size: 191 KiB After Width: | Height: | Size: 203 KiB |
@ -16,6 +16,7 @@ digraph G {
|
||||
|
||||
subgraph OLD {
|
||||
node [style=filled,color=cyan];
|
||||
nums [label="NUMBERS"];
|
||||
ods [label="ODS"];
|
||||
fods [label="FODS"];
|
||||
uos [label="UOS"];
|
||||
@ -63,6 +64,7 @@ digraph G {
|
||||
wk4 -> csf
|
||||
123 -> csf
|
||||
qpw -> csf
|
||||
nums -> csf
|
||||
}
|
||||
subgraph WORKSHEET {
|
||||
edge [color=aquamarine4];
|
||||
|
@ -54,6 +54,11 @@ var NUMBERS = (function() {
|
||||
});
|
||||
return out;
|
||||
};
|
||||
var popcnt = function(x) {
|
||||
x -= x >> 1 & 1431655765;
|
||||
x = (x & 858993459) + (x >> 2 & 858993459);
|
||||
return (x + (x >> 4) & 252645135) * 16843009 >>> 24;
|
||||
};
|
||||
|
||||
// src/proto.ts
|
||||
function parse_varint49(buf, ptr) {
|
||||
@ -126,12 +131,16 @@ var NUMBERS = (function() {
|
||||
break;
|
||||
case 5:
|
||||
len = 4;
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
case 1:
|
||||
if (!len)
|
||||
len = 8;
|
||||
len = 8;
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
case 2:
|
||||
if (!len)
|
||||
len = parse_varint49(buf, ptr);
|
||||
len = parse_varint49(buf, ptr);
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
@ -140,7 +149,7 @@ var NUMBERS = (function() {
|
||||
default:
|
||||
throw new Error("PB Type ".concat(type, " for Field ").concat(num, " at offset ").concat(off));
|
||||
}
|
||||
var v = { offset: off, data: res };
|
||||
var v = { offset: off, data: res, type: type };
|
||||
if (out[num] == null)
|
||||
out[num] = [v];
|
||||
else
|
||||
@ -172,14 +181,14 @@ var NUMBERS = (function() {
|
||||
var t = buf[l++];
|
||||
var len = buf[l] | buf[l + 1] << 8 | buf[l + 2] << 16;
|
||||
l += 3;
|
||||
out.push(process_chunk(t, buf.slice(l, l + len)));
|
||||
out.push(parse_snappy_chunk(t, buf.slice(l, l + len)));
|
||||
l += len;
|
||||
}
|
||||
if (l !== buf.length)
|
||||
throw new Error("data is not a valid framed stream!");
|
||||
return u8concat(out);
|
||||
}
|
||||
function process_chunk(type, buf) {
|
||||
function parse_snappy_chunk(type, buf) {
|
||||
if (type != 0)
|
||||
throw new Error("Unexpected Snappy chunk type ".concat(type));
|
||||
var ptr = [0];
|
||||
@ -270,6 +279,62 @@ var NUMBERS = (function() {
|
||||
return out;
|
||||
}
|
||||
|
||||
// src/prebnccell.ts
|
||||
function parseit(buf, version) {
|
||||
var dv = u8_to_dataview(buf);
|
||||
var ctype = buf[version == 4 ? 1 : 2];
|
||||
var flags = dv.getUint32(4, true);
|
||||
var data_offset = 12 + popcnt(flags & 16270) * 4;
|
||||
var sidx = -1, ieee = NaN, dt = NaN;
|
||||
if (flags & 16) {
|
||||
sidx = dv.getUint32(data_offset, true);
|
||||
data_offset += 4;
|
||||
}
|
||||
if (flags & 32) {
|
||||
ieee = dv.getFloat64(data_offset, true);
|
||||
data_offset += 8;
|
||||
}
|
||||
if (flags & 64) {
|
||||
dt = dv.getFloat64(data_offset, true);
|
||||
data_offset += 8;
|
||||
}
|
||||
var ret;
|
||||
switch (ctype) {
|
||||
case 0:
|
||||
break;
|
||||
case 2:
|
||||
ret = { t: "n", v: ieee };
|
||||
break;
|
||||
case 3:
|
||||
ret = { t: "s", v: sidx };
|
||||
break;
|
||||
case 5:
|
||||
var dd = new Date(2001, 0, 1);
|
||||
dd.setTime(dd.getTime() + dt * 1e3);
|
||||
ret = { t: "d", v: dd };
|
||||
break;
|
||||
case 6:
|
||||
ret = { t: "b", v: ieee > 0 };
|
||||
break;
|
||||
case 7:
|
||||
ret = { t: "n", v: ieee };
|
||||
break;
|
||||
default:
|
||||
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
function parse(buf) {
|
||||
var version = buf[0];
|
||||
switch (version) {
|
||||
case 3:
|
||||
case 4:
|
||||
return parseit(buf, version);
|
||||
default:
|
||||
throw new Error("Unsupported pre-BNC version ".concat(version));
|
||||
}
|
||||
}
|
||||
|
||||
// src/numbers.ts
|
||||
var encode_col = function(C) {
|
||||
var s = "";
|
||||
@ -343,7 +408,7 @@ var NUMBERS = (function() {
|
||||
var pb = parse_shallow(root.data);
|
||||
var entries = pb[3];
|
||||
var data = [];
|
||||
entries == null ? void 0 : entries.forEach(function(entry) {
|
||||
(entries || []).forEach(function(entry) {
|
||||
var le = parse_shallow(entry.data);
|
||||
var key = varint_to_i32(le[1][0].data) >>> 0;
|
||||
data[key] = u8str(le[3][0].data);
|
||||
@ -405,50 +470,12 @@ var NUMBERS = (function() {
|
||||
tiles.forEach(function(tile2) {
|
||||
tile2.ref.forEach(function(row, R) {
|
||||
row.forEach(function(buf, C) {
|
||||
var dv = u8_to_dataview(buf);
|
||||
var ctype = buf[2];
|
||||
var addr = encode_cell({ r: R, c: C });
|
||||
switch (ctype) {
|
||||
case 0:
|
||||
{
|
||||
switch (buf[1]) {
|
||||
case 3:
|
||||
ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4, true)] };
|
||||
break;
|
||||
case 2:
|
||||
ws[addr] = { t: "n", v: dv.getFloat64(16, true) };
|
||||
break;
|
||||
case 0:
|
||||
break;
|
||||
case 5:
|
||||
break;
|
||||
case 7:
|
||||
break;
|
||||
case 6:
|
||||
ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 };
|
||||
break;
|
||||
default:
|
||||
throw new Error("Unsupported cell type ".concat(buf.slice(0, 4)));
|
||||
}
|
||||
}
|
||||
break;
|
||||
case 3:
|
||||
{
|
||||
ws[addr] = { t: "s", v: sst[dv.getUint32(16, true)] };
|
||||
}
|
||||
break;
|
||||
case 2:
|
||||
{
|
||||
ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) };
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
{
|
||||
ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 };
|
||||
}
|
||||
break;
|
||||
default:
|
||||
throw new Error("Unsupported cell type ".concat(ctype));
|
||||
var res = parse(buf);
|
||||
if (res) {
|
||||
ws[addr] = res;
|
||||
if (res.t == "s" && typeof res.v == "number")
|
||||
res.v = sst[res.v];
|
||||
}
|
||||
});
|
||||
});
|
||||
|
@ -8,6 +8,10 @@ var f = process.argv[2];
|
||||
var cfb = read(f, {type: "file"});
|
||||
var wb = parse_numbers(cfb);
|
||||
var sn = process.argv[3];
|
||||
if(sn && !isNaN(+sn)) sn = wb.SheetNames[+sn];
|
||||
if(wb.SheetNames.indexOf(sn) == -1) sn = wb.SheetNames[0];
|
||||
console.log(utils.sheet_to_csv(wb.Sheets[sn]));
|
||||
if(typeof sn == "undefined") {
|
||||
wb.SheetNames.forEach(sn => console.log(utils.sheet_to_csv(wb.Sheets[sn])));
|
||||
} else {
|
||||
if(sn && !isNaN(+sn)) sn = wb.SheetNames[+sn];
|
||||
if(wb.SheetNames.indexOf(sn) == -1) sn = wb.SheetNames[0];
|
||||
console.log(utils.sheet_to_csv(wb.Sheets[sn]));
|
||||
}
|
||||
|
@ -1,5 +1,5 @@
|
||||
/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
import { Ptr, parse_varint49 } from './proto';
|
||||
import { Ptr, parse_varint49, write_varint49 } from './proto';
|
||||
import { u8concat } from './util';
|
||||
|
||||
function is_framed(buf: Uint8Array): boolean {
|
||||
@ -19,7 +19,7 @@ function deframe(buf: Uint8Array): Uint8Array {
|
||||
while(l < buf.length) {
|
||||
var t = buf[l++];
|
||||
var len = buf[l] | (buf[l+1]<<8) | (buf[l+2] << 16); l += 3;
|
||||
out.push(process_chunk(t, buf.slice(l, l + len)));
|
||||
out.push(parse_snappy_chunk(t, buf.slice(l, l + len)));
|
||||
l += len;
|
||||
}
|
||||
if(l !== buf.length) throw new Error("data is not a valid framed stream!");
|
||||
@ -27,7 +27,34 @@ function deframe(buf: Uint8Array): Uint8Array {
|
||||
}
|
||||
export { deframe };
|
||||
|
||||
function process_chunk(type: number, buf: Uint8Array): Uint8Array {
|
||||
function reframe(buf: Uint8Array): Uint8Array {
|
||||
var out: Uint8Array[] = [];
|
||||
var l = 0;
|
||||
while(l < buf.length) {
|
||||
var c = Math.min(buf.length - l, 0xFFFFFFF);
|
||||
var frame = new Uint8Array(4);
|
||||
out.push(frame);
|
||||
var usz = write_varint49(c);
|
||||
var L = usz.length;
|
||||
out.push(usz);
|
||||
|
||||
if(c <= 60) { L++; out.push(new Uint8Array([(c - 1)<<2])); }
|
||||
else if(c <= 0x100) { L += 2; out.push(new Uint8Array([0xF0, (c-1) & 0xFF])); }
|
||||
else if(c <= 0x10000) { L += 3; out.push(new Uint8Array([0xF4, (c-1) & 0xFF, ((c-1) >> 8) & 0xFF])); }
|
||||
else if(c <= 0x1000000) { L += 4; out.push(new Uint8Array([0xF8, (c-1) & 0xFF, ((c-1) >> 8) & 0xFF, ((c-1) >> 16) & 0xFF])); }
|
||||
else if(c <= 0x100000000) { L += 5; out.push(new Uint8Array([0xFC, (c-1) & 0xFF, ((c-1) >> 8) & 0xFF, ((c-1) >> 16) & 0xFF, ((c-1) >>> 24) & 0xFF])); }
|
||||
|
||||
out.push(buf.slice(l, l + c)); L += c;
|
||||
|
||||
frame[0] = 0;
|
||||
frame[1] = L & 0xFF; frame[2] = (L >> 8) & 0xFF; frame[3] = (L >> 16) & 0xFF;
|
||||
l += c;
|
||||
}
|
||||
return u8concat(out);
|
||||
}
|
||||
export { reframe };
|
||||
|
||||
function parse_snappy_chunk(type: number, buf: Uint8Array): Uint8Array {
|
||||
if(type != 0) throw new Error(`Unexpected Snappy chunk type ${type}`);
|
||||
var ptr: Ptr = [0];
|
||||
|
||||
|
@ -5,6 +5,7 @@ import { u8str, u8_to_dataview } from './util';
|
||||
import { parse_shallow, varint_to_i32, parse_varint49, mappa } from './proto';
|
||||
import { deframe } from './frame';
|
||||
import { IWAArchiveInfo, IWAMessage, parse_iwa } from './iwa';
|
||||
import { parse as parse_bnc } from "./prebnccell";
|
||||
|
||||
/* written here to avoid a full import of the 'xlsx' library */
|
||||
var encode_col = (C: number): string => {
|
||||
@ -59,7 +60,7 @@ function parse_TST_TableDataList(M: IWAMessage[][], root: IWAMessage): string[]
|
||||
var pb = parse_shallow(root.data);
|
||||
var entries = pb[3];
|
||||
var data = [];
|
||||
entries?.forEach(entry => {
|
||||
(entries||[]).forEach(entry => {
|
||||
var le = parse_shallow(entry.data);
|
||||
var key = varint_to_i32(le[1][0].data)>>>0;
|
||||
data[key] = u8str(le[3][0].data);
|
||||
@ -129,33 +130,11 @@ function parse_TST_TableModelArchive(M: IWAMessage[][], root: IWAMessage, ws: Wo
|
||||
tiles.forEach((tile) => {
|
||||
tile.ref.forEach((row, R) => {
|
||||
row.forEach((buf, C) => {
|
||||
var dv = u8_to_dataview(buf);
|
||||
//var version = buf[0]; // numbers 3.x use "3", 6.x - 11.x use "4"
|
||||
/* TODO: find the correct field position of the data type and value. */
|
||||
var ctype = buf[2];
|
||||
var addr = encode_cell({r:R,c:C});
|
||||
switch(ctype) {
|
||||
case 0: { // TODO: generic ??
|
||||
switch(buf[1]) {
|
||||
case 3: ws[addr] = { t: "s", v: sst[dv.getUint32(buf.length - 4,true)] } as CellObject; break;
|
||||
case 2: ws[addr] = { t: "n", v: dv.getFloat64(16, true) } as CellObject; break;
|
||||
case 0: break; // ws[addr] = { t: "z" } as CellObject; // blank?
|
||||
case 5: break; // date-time
|
||||
case 7: break; // duration
|
||||
case 6: ws[addr] = { t: "b", v: dv.getFloat64(buf.length - 8, true) > 0 } as CellObject; break;
|
||||
default: throw new Error(`Unsupported cell type ${buf.slice(0,4)}`);
|
||||
}
|
||||
} break;
|
||||
case 3: { // string
|
||||
ws[addr] = { t: "s", v: sst[dv.getUint32(16,true)] } as CellObject;
|
||||
} break;
|
||||
case 2: { // number
|
||||
ws[addr] = { t: "n", v: dv.getFloat64(buf.length - 12, true) } as CellObject;
|
||||
} break;
|
||||
case 6: { // boolean
|
||||
ws[addr] = { t: "b", v: dv.getFloat64(16, true) > 0 } as CellObject; // 1 or 0
|
||||
} break;
|
||||
default: throw new Error(`Unsupported cell type ${ctype}`);
|
||||
var res = parse_bnc(buf);
|
||||
if(res) {
|
||||
ws[addr] = res as CellObject;
|
||||
if(res.t == "s" && typeof res.v == "number") res.v = sst[res.v];
|
||||
}
|
||||
});
|
||||
});
|
||||
|
41
modules/src/prebnccell.ts
Normal file
41
modules/src/prebnccell.ts
Normal file
@ -0,0 +1,41 @@
|
||||
/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
import { CellObject } from '../../';
|
||||
import { u8_to_dataview, popcnt } from './util';
|
||||
|
||||
function parseit(buf: Uint8Array, version: number): CellObject {
|
||||
var dv = u8_to_dataview(buf);
|
||||
var ctype = buf[version == 4 ? 1 : 2];
|
||||
|
||||
/* TODO: find the correct field position of number formats, formulae, etc */
|
||||
var flags = dv.getUint32(4, true);
|
||||
var data_offset = 12 + popcnt(flags & 0x3F8E) * 4;
|
||||
|
||||
var sidx = -1, ieee = NaN, dt = NaN;
|
||||
if(flags & 0x10) { sidx = dv.getUint32(data_offset, true); data_offset += 4; }
|
||||
if(flags & 0x20) { ieee = dv.getFloat64(data_offset, true); data_offset += 8; }
|
||||
if(flags & 0x40) { dt = dv.getFloat64(data_offset, true); data_offset += 8; }
|
||||
|
||||
var ret;
|
||||
switch(ctype) {
|
||||
case 0: break; // return { t: "z" }; // blank?
|
||||
case 2: ret = { t: "n", v: ieee }; break;
|
||||
case 3: ret = { t: "s", v: sidx }; break;
|
||||
case 5: var dd = new Date(2001, 0, 1); dd.setTime(dd.getTime() + dt * 1000); ret = { t: "d", v: dd }; break; // date-time TODO: relative or absolute?
|
||||
case 6: ret = { t: "b", v: ieee > 0 }; break;
|
||||
case 7: ret = { t: "n", v: ieee }; break; // duration in seconds TODO: emit [hh]:[mm] style format with adjusted value
|
||||
default: throw new Error(`Unsupported cell type ${buf.slice(0,4)}`);
|
||||
}
|
||||
/* TODO: Some fields appear after the cell data */
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
function parse(buf: Uint8Array): CellObject {
|
||||
var version = buf[0]; // numbers 3.5 uses "3", 6.x - 11.x use "4"
|
||||
switch(version) {
|
||||
case 3: case 4: return parseit(buf, version);
|
||||
default: throw new Error(`Unsupported pre-BNC version ${version}`);
|
||||
}
|
||||
}
|
||||
|
||||
export { parse };
|
@ -1,4 +1,6 @@
|
||||
/*! sheetjs (C) 2013-present SheetJS -- http://sheetjs.com */
|
||||
import { u8concat } from "./util";
|
||||
|
||||
type Ptr = [number];
|
||||
export { Ptr };
|
||||
|
||||
@ -18,6 +20,26 @@ function parse_varint49(buf: Uint8Array, ptr?: Ptr): number {
|
||||
return usz;
|
||||
}
|
||||
export { parse_varint49 };
|
||||
function write_varint49(v: number): Uint8Array {
|
||||
var usz = new Uint8Array(7);
|
||||
usz[0] = (v & 0x7F);
|
||||
var L = 1;
|
||||
sz: if(v > 0x7F) {
|
||||
usz[L-1] |= 0x80; usz[L] = (v >> 7) & 0x7F; ++L;
|
||||
if(v <= 0x3FFF) break sz;
|
||||
usz[L-1] |= 0x80; usz[L] = (v >> 14) & 0x7F; ++L;
|
||||
if(v <= 0x1FFFFF) break sz;
|
||||
usz[L-1] |= 0x80; usz[L] = (v >> 21) & 0x7F; ++L;
|
||||
if(v <= 0xFFFFFFF) break sz;
|
||||
usz[L-1] |= 0x80; usz[L] = ((v/0x100) >>> 21) & 0x7F; ++L;
|
||||
if(v <= 0x7FFFFFFFF) break sz;
|
||||
usz[L-1] |= 0x80; usz[L] = ((v/0x10000) >>> 21) & 0x7F; ++L;
|
||||
if(v <= 0x3FFFFFFFFFF) break sz;
|
||||
usz[L-1] |= 0x80; usz[L] = ((v/0x1000000) >>> 21) & 0x7F; ++L;
|
||||
}
|
||||
return usz.slice(0, L);
|
||||
}
|
||||
export { write_varint49 };
|
||||
|
||||
/** Parse a 32-bit signed integer from the raw varint */
|
||||
function varint_to_i32(buf: Uint8Array): number {
|
||||
@ -33,12 +55,13 @@ function varint_to_i32(buf: Uint8Array): number {
|
||||
export { varint_to_i32 };
|
||||
|
||||
interface ProtoItem {
|
||||
offset: number;
|
||||
offset?: number;
|
||||
data: Uint8Array;
|
||||
type: number;
|
||||
}
|
||||
type ProtoField = Array<ProtoItem>
|
||||
type ProtoMessage = Array<ProtoField>;
|
||||
export { ProtoItem, ProtoField, ProtoMessage }
|
||||
export { ProtoItem, ProtoField, ProtoMessage };
|
||||
/** Shallow parse of a message */
|
||||
function parse_shallow(buf: Uint8Array): ProtoMessage {
|
||||
var out: ProtoMessage = [], ptr: Ptr = [0];
|
||||
@ -55,23 +78,32 @@ function parse_shallow(buf: Uint8Array): ProtoMessage {
|
||||
while(buf[ptr[0]++] >= 0x80);
|
||||
res = buf.slice(l, ptr[0]);
|
||||
} break;
|
||||
case 5: len = 4;
|
||||
/* falls through */
|
||||
case 1: if(!len) len = 8;
|
||||
/* falls through */
|
||||
case 2: if(!len) len = parse_varint49(buf, ptr);
|
||||
res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
|
||||
case 5: len = 4; res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
|
||||
case 1: len = 8; res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
|
||||
case 2: len = parse_varint49(buf, ptr); res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
|
||||
case 3: // Start group
|
||||
case 4: // End group
|
||||
default: throw new Error(`PB Type ${type} for Field ${num} at offset ${off}`);
|
||||
}
|
||||
var v: ProtoItem = { offset: off, data: res };
|
||||
var v: ProtoItem = { offset: off, data: res, type };
|
||||
if(out[num] == null) out[num] = [v];
|
||||
else out[num].push(v);
|
||||
}
|
||||
return out;
|
||||
}
|
||||
export { parse_shallow };
|
||||
/** Serialize a shallow parse */
|
||||
function write_shallow(proto: ProtoMessage): Uint8Array {
|
||||
var out: Uint8Array[] = [];
|
||||
proto.forEach((field, idx) => {
|
||||
field.forEach(item => {
|
||||
out.push(write_varint49(idx * 8 + item.type));
|
||||
out.push(item.data);
|
||||
});
|
||||
});
|
||||
return u8concat(out);
|
||||
}
|
||||
export { write_shallow };
|
||||
|
||||
function mappa<U>(data: ProtoField, cb:(Uint8Array) => U): U[] {
|
||||
if(!data) return [];
|
||||
|
@ -37,3 +37,11 @@ function u8indexOf(u8: Uint8Array, data: string | number | Uint8Array, byteOffse
|
||||
return -1;
|
||||
}
|
||||
export { u8indexOf };
|
||||
|
||||
/* Hopefully one day this will be added to the language */
|
||||
var popcnt = (x: number): number => {
|
||||
x -= ((x >> 1) & 0x55555555);
|
||||
x = (x & 0x33333333) + ((x >> 2) & 0x33333333);
|
||||
return (((x + (x >> 4)) & 0x0F0F0F0F) * 0x01010101) >>> 24;
|
||||
};
|
||||
export { popcnt };
|
||||
|
@ -171,12 +171,16 @@ function parse_shallow(buf) {
|
||||
break;
|
||||
case 5:
|
||||
len = 4;
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
case 1:
|
||||
if (!len)
|
||||
len = 8;
|
||||
len = 8;
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
case 2:
|
||||
if (!len)
|
||||
len = parse_varint49(buf, ptr);
|
||||
len = parse_varint49(buf, ptr);
|
||||
res = buf.slice(ptr[0], ptr[0] + len);
|
||||
ptr[0] += len;
|
||||
break;
|
||||
@ -468,6 +472,9 @@ function otorp(buf, builtins = false) {
|
||||
var otorp_default = otorp;
|
||||
var is_referenced = (buf, pos) => {
|
||||
var dv = u8_to_dataview(buf);
|
||||
for (var leaddr = 0; leaddr > -1 && leaddr < pos; leaddr = u8indexOf(buf, 141, leaddr + 1))
|
||||
if (dv.getUint32(leaddr + 2, true) == pos - leaddr - 6)
|
||||
return true;
|
||||
try {
|
||||
var headers = parse_macho(buf);
|
||||
for (var i = 0; i < headers.length; ++i) {
|
||||
@ -507,6 +514,7 @@ var proto_offsets = (buf) => {
|
||||
if (buf[--pos] != 10)
|
||||
continue;
|
||||
if (!is_referenced(buf, pos)) {
|
||||
console.error(`Reference to ${name} not found`);
|
||||
continue;
|
||||
}
|
||||
var bin = meta.find((m) => m.offset <= pos && m.offset + m.size >= pos);
|
||||
|
@ -1,6 +1,6 @@
|
||||
{
|
||||
"name": "otorp",
|
||||
"version": "0.0.0",
|
||||
"version": "0.0.1",
|
||||
"author": "sheetjs",
|
||||
"description": "Recover protobuf definitions from Mach-O binaries",
|
||||
"bin": {
|
||||
|
@ -55,6 +55,11 @@ export default otorp;
|
||||
var is_referenced = (buf: Uint8Array, pos: number): boolean => {
|
||||
var dv = u8_to_dataview(buf);
|
||||
|
||||
/* Search for LEA reference (x86) */
|
||||
for(var leaddr = 0; leaddr > -1 && leaddr < pos; leaddr = u8indexOf(buf, 0x8D, leaddr + 1))
|
||||
if(dv.getUint32(leaddr + 2, true) == pos - leaddr - 6) return true;
|
||||
|
||||
/* Search for absolute reference to address */
|
||||
try {
|
||||
var headers = parse_macho(buf);
|
||||
for(var i = 0; i < headers.length; ++i) {
|
||||
@ -69,7 +74,6 @@ var is_referenced = (buf: Uint8Array, pos: number): boolean => {
|
||||
if(u8indexOf(b, ref, 0) > 0) return true;
|
||||
}
|
||||
} catch(e) {}
|
||||
|
||||
return false;
|
||||
};
|
||||
|
||||
@ -90,7 +94,7 @@ var proto_offsets = (buf: Uint8Array): OffsetList => {
|
||||
if(off - pos > 250) continue;
|
||||
var name = u8str(buf.slice(pos + 1, off));
|
||||
if(buf[--pos] != 0x0A) continue;
|
||||
if(!is_referenced(buf, pos)) { /* console.error(`Reference to ${name} not found`); */ continue; }
|
||||
if(!is_referenced(buf, pos)) { console.error(`Reference to ${name} not found`); continue; }
|
||||
var bin = meta.find(m => m.offset <= pos && m.offset + m.size >= pos);
|
||||
out.push([pos, name, bin?.type || -1, bin?.subtype || -1]);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user