SheetZilla (sheets in ghidra)
This commit is contained in:
parent
5b2e3e4786
commit
16e1d8d216
357
docz/docs/03-demos/32-extensions/41-ghidra.md
Normal file
357
docz/docs/03-demos/32-extensions/41-ghidra.md
Normal file
@ -0,0 +1,357 @@
|
||||
---
|
||||
title: Sheets in Ghidra
|
||||
sidebar_label: Ghidra
|
||||
pagination_prev: demos/cloud/index
|
||||
pagination_next: demos/bigdata/index
|
||||
sidebar_custom_props:
|
||||
summary: Generate spreadsheets from Ghidra-generated bitfield tables
|
||||
---
|
||||
|
||||
import current from '/version.js';
|
||||
import CodeBlock from '@theme/CodeBlock';
|
||||
|
||||
[Ghidra](https://ghidra-sre.org/) is a software reverse engineering platform
|
||||
with a robust Java-based extension system.
|
||||
|
||||
[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing
|
||||
data from spreadsheets.
|
||||
|
||||
The [Complete Demo](#complete-demo) uses SheetJS to export data from a Ghidra
|
||||
script. We'll create an extension that loads the [V8](/docs/demos/engines/v8)
|
||||
JavaScript engine through the Ghidra.js[^1] integration and uses the SheetJS
|
||||
library to export a bitfield table from Apple Numbers to a XLSX workbook.
|
||||
|
||||
:::note Tested Deployments
|
||||
|
||||
This demo was tested by SheetJS users in the following deployments:
|
||||
|
||||
| Architecture | Ghidra | Date |
|
||||
|:-------------|:---------|:-----------|
|
||||
| `darwin-arm` | `11.1.2` | 2024-10-13 |
|
||||
|
||||
:::
|
||||
|
||||
## Integration Details
|
||||
|
||||
Ghidra natively supports scripts that are run in Java. JS extension scripts
|
||||
require a [JavaScript engine](/docs/demos/engines/) with Java bindings.
|
||||
|
||||
Ghidra.js[^1] is a Ghidra integration for [RhinoJS](/docs/demos/engines/rhino),
|
||||
[GraalJS](/docs/demos/engines/graaljs) and [V8](/docs/demos/engines/v8#java).
|
||||
The current version uses [the Javet V8 binding](https://www.caoccao.com/Javet).
|
||||
|
||||
### Loading SheetJS Scripts
|
||||
|
||||
The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be
|
||||
loaded in Ghidra.js scripts using `require`:
|
||||
|
||||
```js title="Loading SheetJS scripts in Ghidra.js"
|
||||
const XLSX = require("xlsx");
|
||||
```
|
||||
|
||||
:::caution pass
|
||||
|
||||
SheetJS NodeJS modules must be installed in a folder in the Ghidra script path!
|
||||
|
||||
:::
|
||||
|
||||
### Bitfields and Sheets
|
||||
|
||||
Binary file formats commonly use bitfields to compactly store a set of Boolean
|
||||
(true or false) flags. For example, in the XLSB file format, the `BrtRowHdr`
|
||||
record[^2] encodes [row properties](/docs/csf/features/rowprops). Bit offsets
|
||||
91-96 are interpreted as flags marking if a row is hidden or if it is collapsed.
|
||||
|
||||
#### Assembly Implementation
|
||||
|
||||
Functions that parse bitfields typically test each bit sequentially:
|
||||
|
||||
```nasm title="x86_64 sample assembly with mnemonics"
|
||||
CASE_1c
|
||||
41 0f ba e5 1c BT R13D,0x1c
|
||||
73 69 JNC CASE_1d
|
||||
|
||||
;; .... Do some work here (bit offset 28)
|
||||
|
||||
CASE_1d
|
||||
41 0f ba e5 1d BT R13D,0x1d
|
||||
73 69 JNC CASE_1e
|
||||
|
||||
;; .... Do some work here (bit offset 29)
|
||||
```
|
||||
|
||||
:::note pass
|
||||
|
||||
The assembly is approximated by the following TypeScript snippet:
|
||||
|
||||
```typescript title="Approximate TypeScript"
|
||||
/* R13 is a 64-bit register */
|
||||
declare let R13: BigInt;
|
||||
/* NOTE: asm R13D is technically a live binding */
|
||||
let R13D: number = Number(R13 & 0xFFFFFFFFn);
|
||||
|
||||
if((R13D >> 28) & 1) {
|
||||
// .... Do some work here (bit offset 28)
|
||||
}
|
||||
|
||||
if((R13D >> 29) & 1) {
|
||||
// .... Do some work here (bit offset 29)
|
||||
}
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
#### Array of Objects
|
||||
|
||||
A bitmask or bit offset can be paired with a description in a JavaScript object.
|
||||
|
||||
For example, in the `BrtRowHdr` record, bit offset 92 indicates whether the row
|
||||
is hidden (if the bit is set) or visible (if the bit is not set). The offset and
|
||||
description can be stored as fields in an object:
|
||||
|
||||
```js title="Sample metadata for BrtRowHdr offset 92"
|
||||
const metadata_92 = { Offset: 92, Description: "Hidden flag" };
|
||||
```
|
||||
|
||||
Each object can be stored in an array:
|
||||
|
||||
```js title="Array of sample metadata for BrtRowHdr"
|
||||
const metadata = [
|
||||
{ Offset: 91, Description: "Collapsed flag" },
|
||||
{ Offset: 92, Description: "Hidden flag" },
|
||||
// ...
|
||||
];
|
||||
```
|
||||
|
||||
This is an ["Array of Objects"](/docs/api/utilities/array#arrays-of-objects).
|
||||
The SheetJS `json_to_sheet` method[^3] can generate a SheetJS worksheet object
|
||||
from the array:
|
||||
|
||||
```js title="Generating a worksheet from the metadata"
|
||||
const ws = XLSX.utils.json_to_sheet(metadata);
|
||||
```
|
||||
|
||||
The SheetJS `book_new` method[^4] generates a SheetJS workbook object that can
|
||||
be written to the filesystem using the `writeFile` method[^5]:
|
||||
|
||||
```js title="Exporting the worksheet to file"
|
||||
const wb = XLSX.utils.book_new(ws, "Offsets");
|
||||
XLSX.utils.writeFile(wb, "SheetJSGhidra.xlsx");
|
||||
```
|
||||
|
||||
### Java Binding
|
||||
|
||||
Ghidra.js exposes a number of globals for interacting with Ghidra, including:
|
||||
|
||||
- `currentProgram`: information about the loaded program.
|
||||
- `JavaHelper`: Java helper to load classes.
|
||||
|
||||
Ghidra.js automatically bridges instance methods to Java method calls. It also
|
||||
handles the plugin and file extension details.
|
||||
|
||||
#### Launching the Decompiler
|
||||
|
||||
`ghidra.app.decompiler.DecompInterface` is the primary Java interface to the
|
||||
decompiler. In Ghidra.js, `JavaHelper.getClass` will load the class.
|
||||
|
||||
_Java_
|
||||
|
||||
```java title="Launch decompiler process in Java (snippet)"
|
||||
import ghidra.app.script.GhidraScript;
|
||||
import ghidra.app.decompiler.DecompInterface;
|
||||
import ghidra.program.model.listing.Program;
|
||||
|
||||
public class SheetZilla extends GhidraScript {
|
||||
@Override public void run() throws Exception {
|
||||
DecompInterface ifc = new DecompInterface();
|
||||
boolean success = ifc.openProgram(currentProgram);
|
||||
/* ... do work here ... */
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
_Ghidra.js_
|
||||
|
||||
```js title="Launch decompiler process in Ghidra.js"
|
||||
const DecompInterface = JavaHelper.getClass('ghidra.app.decompiler.DecompInterface');
|
||||
const decompiler = new DecompInterface();
|
||||
decompiler.openProgram(currentProgram);
|
||||
```
|
||||
|
||||
#### Identifying a Function
|
||||
|
||||
The `getGlobalSymbols` method of a symbol table instance will return an array of
|
||||
symbols matching the given name:
|
||||
|
||||
```js
|
||||
/* name of function to find */
|
||||
const fname = 'MyMethod';
|
||||
|
||||
/* find symbols matching the name */
|
||||
// highlight-next-line
|
||||
const fsymbs = currentProgram.getSymbolTable().getGlobalSymbols(fname);
|
||||
|
||||
/* get first result */
|
||||
const fsymb = fsymbs[0];
|
||||
```
|
||||
|
||||
The `getFunctionAt` method of a function manager instance will take an address
|
||||
and return a reference to a function:
|
||||
|
||||
```js
|
||||
/* get address */
|
||||
const faddr = fsymb.getAddress();
|
||||
|
||||
/* find function */
|
||||
// highlight-next-line
|
||||
const fn = currentProgram.getFunctionManager().getFunctionAt(faddr);
|
||||
```
|
||||
|
||||
#### Decompiling a Function
|
||||
|
||||
The `decompileFunction` method attempts to decompile the referenced function:
|
||||
|
||||
```js
|
||||
/* decompile function */
|
||||
// highlight-next-line
|
||||
const decomp = decompiler.decompileFunction(fn, 10000, null);
|
||||
```
|
||||
|
||||
Once decompiled, it is possible to retrieve the decompiled C code:
|
||||
|
||||
```js
|
||||
/* get generated C code */
|
||||
const src = decomp.getDecompiledFunction().getC();
|
||||
```
|
||||
|
||||
## Complete Demo
|
||||
|
||||
In this demo, we will inspect the `_TSTCellToCellStorage` method within the
|
||||
`TSTables` framework of Apple Numbers 14.2. This particular method handles
|
||||
serialization of cells to the NUMBERS file format.
|
||||
|
||||
The implementation has a number of blocks which look like the following script:
|
||||
|
||||
```js
|
||||
if(flags >> 0x0d & 1) {
|
||||
const field = "numberFormatID";
|
||||
const current_value = cell[field];
|
||||
// ... check if current_value is set, do other stuff
|
||||
}
|
||||
```
|
||||
|
||||
Based on the bit offset and the field name, we will generate the following row:
|
||||
|
||||
```js
|
||||
const mask = 1 << 0x0d; // = 8192 = 0x2000
|
||||
const name = "number format ID";
|
||||
const row = { Mask: "0x" + mask.toString(16), "Internal Name": name };
|
||||
```
|
||||
|
||||
Rows will be generated for each block and the final dataset will be exported.
|
||||
|
||||
### System Setup
|
||||
|
||||
0) Install Ghidra, Xcode, and Apple Numbers.
|
||||
|
||||
<details>
|
||||
<summary><b>Installation Notes</b> (click to show)</summary>
|
||||
|
||||
On macOS, Ghidra was installed using Homebrew:
|
||||
|
||||
```bash
|
||||
brew install --cask ghidra
|
||||
```
|
||||
|
||||
</details>
|
||||
|
||||
1) Add the base Ghidra folder to the PATH variable. The following shell command
|
||||
adds to the path for the current `zsh` or `bash` session:
|
||||
|
||||
```bash
|
||||
export PATH="$PATH":$(dirname $(realpath `which ghidraRun`))
|
||||
```
|
||||
|
||||
2) Install `ghidra.js` globally:
|
||||
|
||||
```bash
|
||||
npm install -g ghidra.js
|
||||
```
|
||||
|
||||
:::note pass
|
||||
|
||||
If the install fails with a permissions issue, install with the root user:
|
||||
|
||||
```bash
|
||||
sudo npm install -g ghidra.js
|
||||
```
|
||||
|
||||
:::
|
||||
|
||||
### Program Preparation
|
||||
|
||||
3) Create a temporary folder to hold the Ghidra project:
|
||||
|
||||
```bash
|
||||
mkdir -p /tmp/sheetjs-ghidra
|
||||
```
|
||||
|
||||
4) Copy the `TSTables` framework to the current directory:
|
||||
|
||||
```bash
|
||||
cp /Applications/Numbers.app/Contents/Frameworks/TSTables.framework/Versions/Current/TSTables .
|
||||
```
|
||||
|
||||
5) Create a "thin" binary by extracting the `x86_64` part of the framework:
|
||||
|
||||
```bash
|
||||
lipo TSTables -thin x86_64 -output TSTables.macho
|
||||
```
|
||||
|
||||
:::info pass
|
||||
|
||||
When this demo was last tested, the headless analyzer did not support Mach-O fat
|
||||
binaries. `lipo` creates a new binary with support for one architecture.
|
||||
|
||||
:::
|
||||
|
||||
6) Analyze the program:
|
||||
|
||||
```bash
|
||||
$(dirname $(realpath `which ghidraRun`))/support/analyzeHeadless /tmp/sheetjs-ghidra Numbers -import TSTables.macho
|
||||
```
|
||||
|
||||
:::note pass
|
||||
|
||||
This process may take a while and print a number of Java stacktraces. The errors
|
||||
can be ignored.
|
||||
|
||||
:::
|
||||
|
||||
### SheetJS Integration
|
||||
|
||||
7) Download [`sheetjs-ghidra.js`](pathname:///ghidra/sheetjs-ghidra.js):
|
||||
|
||||
```bash
|
||||
curl -LO https://docs.sheetjs.com/ghidra/sheetjs-ghidra.js
|
||||
```
|
||||
|
||||
8) Install the [SheetJS NodeJS module](/docs/getting-started/installation/nodejs):
|
||||
|
||||
<CodeBlock language="bash">{`\
|
||||
npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz`}
|
||||
</CodeBlock>
|
||||
|
||||
9) Run the script:
|
||||
|
||||
```bash
|
||||
$(dirname $(realpath `which ghidraRun`))/support/analyzeHeadless /tmp/sheetjs-ghidra Numbers -process TSTables.macho -noanalysis -scriptPath `pwd` -postScript sheetjs-ghidra.js
|
||||
```
|
||||
|
||||
10) Open the generated `SheetJSGhidraTSTCell.xlsx` spreadsheet.
|
||||
|
||||
[^1]: The project does not have a website. The [source repository](https://github.com/vaguue/Ghidra.js) is publicly available.
|
||||
[^2]: `BrtRowHdr` is defined in the [`MS-XLSB` specification](/docs/miscellany/references)
|
||||
[^3]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input)
|
||||
[^4]: See [`book_new` in "Utilities"](/docs/api/utilities/wb)
|
||||
[^5]: See [`writeFile` in "Writing Files"](/docs/api/write-options)
|
@ -86,6 +86,7 @@ in the [issue tracker](https://git.sheetjs.com/sheetjs/docs.sheetjs.com/issues)
|
||||
- [`MATLAB`](/docs/demos/extensions/matlab)
|
||||
- [`Stata`](/docs/demos/extensions/stata)
|
||||
- [`Maple`](/docs/demos/extensions/maple)
|
||||
- [`Ghidra`](/docs/demos/extensions/ghidra)
|
||||
|
||||
### Cloud Platforms
|
||||
|
||||
|
@ -181,7 +181,7 @@ const config = {
|
||||
}
|
||||
})
|
||||
},
|
||||
additionalLanguages: [ "visual-basic", "swift", "java", "php", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata", "zig", "diff", "bash", "json" ],
|
||||
additionalLanguages: [ "visual-basic", "swift", "java", "php", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata", "zig", "diff", "bash", "json", "nasm" ],
|
||||
},
|
||||
liveCodeBlock: {
|
||||
playgroundPosition: 'top'
|
||||
|
70
docz/static/ghidra/sheetjs-ghidra.js
Normal file
70
docz/static/ghidra/sheetjs-ghidra.js
Normal file
@ -0,0 +1,70 @@
|
||||
const { utils: { book_new, json_to_sheet }, writeFileXLSX } = require("xlsx");
|
||||
|
||||
/* start decompiler */
|
||||
const DecompInterface = JavaHelper.getClass('ghidra.app.decompiler.DecompInterface');
|
||||
const decompiler = new DecompInterface();
|
||||
decompiler.openProgram(currentProgram);
|
||||
|
||||
/* get decompiled C source */
|
||||
const src = (() => {
|
||||
const fname = '_TSTCellToCellStorage';
|
||||
|
||||
/* find address to function */
|
||||
const fsymbs = currentProgram.getSymbolTable().getGlobalSymbols(fname);
|
||||
if(!fsymbs) throw new Error(`Global Symbol ${fname} cannot be found`);
|
||||
|
||||
/* find function */
|
||||
const fn = currentProgram.getFunctionManager().getFunctionAt(fsymbs[0].getAddress());
|
||||
if(!fn) throw new Error(`Function ${fname} cannot be found`);
|
||||
|
||||
/* decompile function */
|
||||
const decomp = decompiler.decompileFunction(fn, 10000, null);
|
||||
if (decomp.isTimedOut() || !decomp.decompileCompleted()) throw new Error(`Function ${fname} at ${fn} could not be decompiled`);
|
||||
|
||||
/* get and return generated C code */
|
||||
const src = decomp.getDecompiledFunction().getC();
|
||||
if(!src) throw new Error(`Function ${fname} at ${fn} decompilation`);
|
||||
return src;
|
||||
})();
|
||||
|
||||
/* offset[n] will be the name of the field at bit `n` (mask `1 << n`) */
|
||||
let offset = [];
|
||||
|
||||
/* combine split lines and lazily extract data from C source */
|
||||
let n = -1;
|
||||
src.split(/[\r\n]+/).reduce((acc,row,i) => {
|
||||
if(i <= 9) acc.rows.push(row);
|
||||
else if(row.match(/[{};]$/)) {
|
||||
if(acc.buf) { row = acc.buf + row.trim(); acc.buf = ""; }
|
||||
acc.rows.push(row);
|
||||
} else acc.buf += acc.buf ? row.trim() : row.replace(/[\r\n]+$/,"");
|
||||
return acc;
|
||||
}, {rows:[], buf:""}).rows.forEach(line => {
|
||||
if(line.match(/^ if.*(char|short| >> | & )/)) {
|
||||
n = -1;
|
||||
if(!line.match(/Var/)) return;
|
||||
if(line.match(/char/)) n = 7;
|
||||
else if(line.match(/short/)) n = 15;
|
||||
else if(line.match(/>>/)) n = parseInt(line.match(/>> ([1-9]\d*|0x[0-9a-f]+)/)[1]);
|
||||
else if(line.match(/&/)) n = Math.round(Math.log2(parseInt(line.match(/& ([1-9]\d*|0x[0-9a-f]+)/)[1])));
|
||||
} else if(line.match(/PTR__objc_msgSend/) && n >= 0) {
|
||||
if(line.match(/AssertionHandler|NSString|stringWithUTF8String|Failure/)) return;
|
||||
if(!line.match(/,/)) return; // suppress noop
|
||||
const ptr = line.match(/PTR_s_\w+/)[0];
|
||||
offset[+n] = ptr;
|
||||
n = -1;
|
||||
}
|
||||
});
|
||||
|
||||
/* construct objects with "Mask" (hex string) and "Internal Name" */
|
||||
const aoo = offset.map((name, idx) => ({
|
||||
Mask: "0x" + (1 << idx).toString(16),
|
||||
"Internal Name": name.replace(/^PTR_s_|_[0-9a-f]*$/g,"").replace(/[A-Z]/g, " $&").toLowerCase().replace(/ i d/, " ID")
|
||||
}));
|
||||
|
||||
/* create worksheet */
|
||||
const ws = json_to_sheet(aoo);
|
||||
/* write workbook */
|
||||
const wb = book_new(ws, "Offsets");
|
||||
/* write to XLSX */
|
||||
writeFileXLSX(wb, "SheetJSGhidraTSTCell.xlsx");
|
Loading…
Reference in New Issue
Block a user