From 16e1d8d216ef72f107f4f5dde8cb40719eed1dd7 Mon Sep 17 00:00:00 2001 From: SheetJS Date: Mon, 14 Oct 2024 00:45:51 -0400 Subject: [PATCH] SheetZilla (sheets in ghidra) --- docz/docs/03-demos/32-extensions/41-ghidra.md | 357 ++++++++++++++++++ docz/docs/03-demos/index.md | 1 + docz/docusaurus.config.js | 2 +- docz/static/ghidra/sheetjs-ghidra.js | 70 ++++ 4 files changed, 429 insertions(+), 1 deletion(-) create mode 100644 docz/docs/03-demos/32-extensions/41-ghidra.md create mode 100644 docz/static/ghidra/sheetjs-ghidra.js diff --git a/docz/docs/03-demos/32-extensions/41-ghidra.md b/docz/docs/03-demos/32-extensions/41-ghidra.md new file mode 100644 index 0000000..fe01d7e --- /dev/null +++ b/docz/docs/03-demos/32-extensions/41-ghidra.md @@ -0,0 +1,357 @@ +--- +title: Sheets in Ghidra +sidebar_label: Ghidra +pagination_prev: demos/cloud/index +pagination_next: demos/bigdata/index +sidebar_custom_props: + summary: Generate spreadsheets from Ghidra-generated bitfield tables +--- + +import current from '/version.js'; +import CodeBlock from '@theme/CodeBlock'; + +[Ghidra](https://ghidra-sre.org/) is a software reverse engineering platform +with a robust Java-based extension system. + +[SheetJS](https://sheetjs.com) is a JavaScript library for reading and writing +data from spreadsheets. + +The [Complete Demo](#complete-demo) uses SheetJS to export data from a Ghidra +script. We'll create an extension that loads the [V8](/docs/demos/engines/v8) +JavaScript engine through the Ghidra.js[^1] integration and uses the SheetJS +library to export a bitfield table from Apple Numbers to a XLSX workbook. + +:::note Tested Deployments + +This demo was tested by SheetJS users in the following deployments: + +| Architecture | Ghidra | Date | +|:-------------|:---------|:-----------| +| `darwin-arm` | `11.1.2` | 2024-10-13 | + +::: + +## Integration Details + +Ghidra natively supports scripts that are run in Java. JS extension scripts +require a [JavaScript engine](/docs/demos/engines/) with Java bindings. + +Ghidra.js[^1] is a Ghidra integration for [RhinoJS](/docs/demos/engines/rhino), +[GraalJS](/docs/demos/engines/graaljs) and [V8](/docs/demos/engines/v8#java). +The current version uses [the Javet V8 binding](https://www.caoccao.com/Javet). + +### Loading SheetJS Scripts + +The [SheetJS NodeJS module](/docs/getting-started/installation/nodejs) can be +loaded in Ghidra.js scripts using `require`: + +```js title="Loading SheetJS scripts in Ghidra.js" +const XLSX = require("xlsx"); +``` + +:::caution pass + +SheetJS NodeJS modules must be installed in a folder in the Ghidra script path! + +::: + +### Bitfields and Sheets + +Binary file formats commonly use bitfields to compactly store a set of Boolean +(true or false) flags. For example, in the XLSB file format, the `BrtRowHdr` +record[^2] encodes [row properties](/docs/csf/features/rowprops). Bit offsets +91-96 are interpreted as flags marking if a row is hidden or if it is collapsed. + +#### Assembly Implementation + +Functions that parse bitfields typically test each bit sequentially: + +```nasm title="x86_64 sample assembly with mnemonics" + CASE_1c +41 0f ba e5 1c BT R13D,0x1c +73 69 JNC CASE_1d + +;; .... Do some work here (bit offset 28) + + CASE_1d +41 0f ba e5 1d BT R13D,0x1d +73 69 JNC CASE_1e + +;; .... Do some work here (bit offset 29) +``` + +:::note pass + +The assembly is approximated by the following TypeScript snippet: + +```typescript title="Approximate TypeScript" +/* R13 is a 64-bit register */ +declare let R13: BigInt; +/* NOTE: asm R13D is technically a live binding */ +let R13D: number = Number(R13 & 0xFFFFFFFFn); + +if((R13D >> 28) & 1) { + // .... Do some work here (bit offset 28) +} + +if((R13D >> 29) & 1) { + // .... Do some work here (bit offset 29) +} +``` + +::: + +#### Array of Objects + +A bitmask or bit offset can be paired with a description in a JavaScript object. + +For example, in the `BrtRowHdr` record, bit offset 92 indicates whether the row +is hidden (if the bit is set) or visible (if the bit is not set). The offset and +description can be stored as fields in an object: + +```js title="Sample metadata for BrtRowHdr offset 92" +const metadata_92 = { Offset: 92, Description: "Hidden flag" }; +``` + +Each object can be stored in an array: + +```js title="Array of sample metadata for BrtRowHdr" +const metadata = [ + { Offset: 91, Description: "Collapsed flag" }, + { Offset: 92, Description: "Hidden flag" }, + // ... +]; +``` + +This is an ["Array of Objects"](/docs/api/utilities/array#arrays-of-objects). +The SheetJS `json_to_sheet` method[^3] can generate a SheetJS worksheet object +from the array: + +```js title="Generating a worksheet from the metadata" +const ws = XLSX.utils.json_to_sheet(metadata); +``` + +The SheetJS `book_new` method[^4] generates a SheetJS workbook object that can +be written to the filesystem using the `writeFile` method[^5]: + +```js title="Exporting the worksheet to file" +const wb = XLSX.utils.book_new(ws, "Offsets"); +XLSX.utils.writeFile(wb, "SheetJSGhidra.xlsx"); +``` + +### Java Binding + +Ghidra.js exposes a number of globals for interacting with Ghidra, including: + +- `currentProgram`: information about the loaded program. +- `JavaHelper`: Java helper to load classes. + +Ghidra.js automatically bridges instance methods to Java method calls. It also +handles the plugin and file extension details. + +#### Launching the Decompiler + +`ghidra.app.decompiler.DecompInterface` is the primary Java interface to the +decompiler. In Ghidra.js, `JavaHelper.getClass` will load the class. + +_Java_ + +```java title="Launch decompiler process in Java (snippet)" +import ghidra.app.script.GhidraScript; +import ghidra.app.decompiler.DecompInterface; +import ghidra.program.model.listing.Program; + +public class SheetZilla extends GhidraScript { + @Override public void run() throws Exception { + DecompInterface ifc = new DecompInterface(); + boolean success = ifc.openProgram(currentProgram); + /* ... do work here ... */ + } +} +``` + +_Ghidra.js_ + +```js title="Launch decompiler process in Ghidra.js" +const DecompInterface = JavaHelper.getClass('ghidra.app.decompiler.DecompInterface'); +const decompiler = new DecompInterface(); +decompiler.openProgram(currentProgram); +``` + +#### Identifying a Function + +The `getGlobalSymbols` method of a symbol table instance will return an array of +symbols matching the given name: + +```js +/* name of function to find */ +const fname = 'MyMethod'; + +/* find symbols matching the name */ +// highlight-next-line +const fsymbs = currentProgram.getSymbolTable().getGlobalSymbols(fname); + +/* get first result */ +const fsymb = fsymbs[0]; +``` + +The `getFunctionAt` method of a function manager instance will take an address +and return a reference to a function: + +```js +/* get address */ +const faddr = fsymb.getAddress(); + +/* find function */ +// highlight-next-line +const fn = currentProgram.getFunctionManager().getFunctionAt(faddr); +``` + +#### Decompiling a Function + +The `decompileFunction` method attempts to decompile the referenced function: + +```js +/* decompile function */ +// highlight-next-line +const decomp = decompiler.decompileFunction(fn, 10000, null); +``` + +Once decompiled, it is possible to retrieve the decompiled C code: + +```js +/* get generated C code */ +const src = decomp.getDecompiledFunction().getC(); +``` + +## Complete Demo + +In this demo, we will inspect the `_TSTCellToCellStorage` method within the +`TSTables` framework of Apple Numbers 14.2. This particular method handles +serialization of cells to the NUMBERS file format. + +The implementation has a number of blocks which look like the following script: + +```js +if(flags >> 0x0d & 1) { + const field = "numberFormatID"; + const current_value = cell[field]; + // ... check if current_value is set, do other stuff +} +``` + +Based on the bit offset and the field name, we will generate the following row: + +```js +const mask = 1 << 0x0d; // = 8192 = 0x2000 +const name = "number format ID"; +const row = { Mask: "0x" + mask.toString(16), "Internal Name": name }; +``` + +Rows will be generated for each block and the final dataset will be exported. + +### System Setup + +0) Install Ghidra, Xcode, and Apple Numbers. + +
+ Installation Notes (click to show) + +On macOS, Ghidra was installed using Homebrew: + +```bash +brew install --cask ghidra +``` + +
+ +1) Add the base Ghidra folder to the PATH variable. The following shell command +adds to the path for the current `zsh` or `bash` session: + +```bash +export PATH="$PATH":$(dirname $(realpath `which ghidraRun`)) +``` + +2) Install `ghidra.js` globally: + +```bash +npm install -g ghidra.js +``` + +:::note pass + +If the install fails with a permissions issue, install with the root user: + +```bash +sudo npm install -g ghidra.js +``` + +::: + +### Program Preparation + +3) Create a temporary folder to hold the Ghidra project: + +```bash +mkdir -p /tmp/sheetjs-ghidra +``` + +4) Copy the `TSTables` framework to the current directory: + +```bash +cp /Applications/Numbers.app/Contents/Frameworks/TSTables.framework/Versions/Current/TSTables . +``` + +5) Create a "thin" binary by extracting the `x86_64` part of the framework: + +```bash +lipo TSTables -thin x86_64 -output TSTables.macho +``` + +:::info pass + +When this demo was last tested, the headless analyzer did not support Mach-O fat +binaries. `lipo` creates a new binary with support for one architecture. + +::: + +6) Analyze the program: + +```bash +$(dirname $(realpath `which ghidraRun`))/support/analyzeHeadless /tmp/sheetjs-ghidra Numbers -import TSTables.macho +``` + +:::note pass + +This process may take a while and print a number of Java stacktraces. The errors +can be ignored. + +::: + +### SheetJS Integration + +7) Download [`sheetjs-ghidra.js`](pathname:///ghidra/sheetjs-ghidra.js): + +```bash +curl -LO https://docs.sheetjs.com/ghidra/sheetjs-ghidra.js +``` + +8) Install the [SheetJS NodeJS module](/docs/getting-started/installation/nodejs): + +{`\ +npm i --save https://cdn.sheetjs.com/xlsx-${current}/xlsx-${current}.tgz`} + + +9) Run the script: + +```bash +$(dirname $(realpath `which ghidraRun`))/support/analyzeHeadless /tmp/sheetjs-ghidra Numbers -process TSTables.macho -noanalysis -scriptPath `pwd` -postScript sheetjs-ghidra.js +``` + +10) Open the generated `SheetJSGhidraTSTCell.xlsx` spreadsheet. + +[^1]: The project does not have a website. The [source repository](https://github.com/vaguue/Ghidra.js) is publicly available. +[^2]: `BrtRowHdr` is defined in the [`MS-XLSB` specification](/docs/miscellany/references) +[^3]: See [`json_to_sheet` in "Utilities"](/docs/api/utilities/array#array-of-objects-input) +[^4]: See [`book_new` in "Utilities"](/docs/api/utilities/wb) +[^5]: See [`writeFile` in "Writing Files"](/docs/api/write-options) \ No newline at end of file diff --git a/docz/docs/03-demos/index.md b/docz/docs/03-demos/index.md index a30fabc..35cbfaa 100644 --- a/docz/docs/03-demos/index.md +++ b/docz/docs/03-demos/index.md @@ -86,6 +86,7 @@ in the [issue tracker](https://git.sheetjs.com/sheetjs/docs.sheetjs.com/issues) - [`MATLAB`](/docs/demos/extensions/matlab) - [`Stata`](/docs/demos/extensions/stata) - [`Maple`](/docs/demos/extensions/maple) +- [`Ghidra`](/docs/demos/extensions/ghidra) ### Cloud Platforms diff --git a/docz/docusaurus.config.js b/docz/docusaurus.config.js index 0926143..2c3ae08 100644 --- a/docz/docusaurus.config.js +++ b/docz/docusaurus.config.js @@ -181,7 +181,7 @@ const config = { } }) }, - additionalLanguages: [ "visual-basic", "swift", "java", "php", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata", "zig", "diff", "bash", "json" ], + additionalLanguages: [ "visual-basic", "swift", "java", "php", "csharp", "perl", "ruby", "cpp", "applescript", "liquid", "rust", "dart", "wolfram", "matlab", "stata", "zig", "diff", "bash", "json", "nasm" ], }, liveCodeBlock: { playgroundPosition: 'top' diff --git a/docz/static/ghidra/sheetjs-ghidra.js b/docz/static/ghidra/sheetjs-ghidra.js new file mode 100644 index 0000000..e272026 --- /dev/null +++ b/docz/static/ghidra/sheetjs-ghidra.js @@ -0,0 +1,70 @@ +const { utils: { book_new, json_to_sheet }, writeFileXLSX } = require("xlsx"); + +/* start decompiler */ +const DecompInterface = JavaHelper.getClass('ghidra.app.decompiler.DecompInterface'); +const decompiler = new DecompInterface(); +decompiler.openProgram(currentProgram); + +/* get decompiled C source */ +const src = (() => { + const fname = '_TSTCellToCellStorage'; + + /* find address to function */ + const fsymbs = currentProgram.getSymbolTable().getGlobalSymbols(fname); + if(!fsymbs) throw new Error(`Global Symbol ${fname} cannot be found`); + + /* find function */ + const fn = currentProgram.getFunctionManager().getFunctionAt(fsymbs[0].getAddress()); + if(!fn) throw new Error(`Function ${fname} cannot be found`); + + /* decompile function */ + const decomp = decompiler.decompileFunction(fn, 10000, null); + if (decomp.isTimedOut() || !decomp.decompileCompleted()) throw new Error(`Function ${fname} at ${fn} could not be decompiled`); + + /* get and return generated C code */ + const src = decomp.getDecompiledFunction().getC(); + if(!src) throw new Error(`Function ${fname} at ${fn} decompilation`); + return src; +})(); + +/* offset[n] will be the name of the field at bit `n` (mask `1 << n`) */ +let offset = []; + +/* combine split lines and lazily extract data from C source */ +let n = -1; +src.split(/[\r\n]+/).reduce((acc,row,i) => { + if(i <= 9) acc.rows.push(row); + else if(row.match(/[{};]$/)) { + if(acc.buf) { row = acc.buf + row.trim(); acc.buf = ""; } + acc.rows.push(row); + } else acc.buf += acc.buf ? row.trim() : row.replace(/[\r\n]+$/,""); + return acc; +}, {rows:[], buf:""}).rows.forEach(line => { + if(line.match(/^ if.*(char|short| >> | & )/)) { + n = -1; + if(!line.match(/Var/)) return; + if(line.match(/char/)) n = 7; + else if(line.match(/short/)) n = 15; + else if(line.match(/>>/)) n = parseInt(line.match(/>> ([1-9]\d*|0x[0-9a-f]+)/)[1]); + else if(line.match(/&/)) n = Math.round(Math.log2(parseInt(line.match(/& ([1-9]\d*|0x[0-9a-f]+)/)[1]))); + } else if(line.match(/PTR__objc_msgSend/) && n >= 0) { + if(line.match(/AssertionHandler|NSString|stringWithUTF8String|Failure/)) return; + if(!line.match(/,/)) return; // suppress noop + const ptr = line.match(/PTR_s_\w+/)[0]; + offset[+n] = ptr; + n = -1; + } +}); + +/* construct objects with "Mask" (hex string) and "Internal Name" */ +const aoo = offset.map((name, idx) => ({ + Mask: "0x" + (1 << idx).toString(16), + "Internal Name": name.replace(/^PTR_s_|_[0-9a-f]*$/g,"").replace(/[A-Z]/g, " $&").toLowerCase().replace(/ i d/, " ID") +})); + +/* create worksheet */ +const ws = json_to_sheet(aoo); +/* write workbook */ +const wb = book_new(ws, "Offsets"); +/* write to XLSX */ +writeFileXLSX(wb, "SheetJSGhidraTSTCell.xlsx");