#!/usr/bin/env -S deno run -A
/*! otorp (C) 2021-present SheetJS -- http://sheetjs.com */
import { resolve } from "https://deno.land/std@0.171.0/path/mod.ts";
import { TerminalSpinner } from "https://deno.land/x/spinners/mod.ts";

// #region util.ts

var u8_to_dataview = (array: Uint8Array): DataView => new DataView(array.buffer, array.byteOffset, array.byteLength);

var u8str = (u8: Uint8Array): string => new TextDecoder().decode(u8);

var u8concat = (u8a: Uint8Array[]): Uint8Array => {
  var len = u8a.reduce((acc: number, x: Uint8Array) => acc + x.length, 0);
  var out = new Uint8Array(len);
  var off = 0;
  u8a.forEach(u8 => { out.set(u8, off); off += u8.length; });
  return out;
};

var indent = (str: string, depth: number /* = 1 */): string => str.split(/\n/g).map(x => x && "  ".repeat(depth) + x).join("\n");

function u8indexOf(u8: Uint8Array, data: string | number | Uint8Array, byteOffset?: number): number {
  //if(Buffer.isBuffer(u8)) return u8.indexOf(data, byteOffset);
  if(typeof data == "number") return u8.indexOf(data, byteOffset);
  var l = byteOffset;
  if(typeof data == "string") {
    outs: while((l = u8.indexOf(data.charCodeAt(0), l)) > -1) {
      ++l;
      for(var j = 1; j < data.length; ++j) if(u8[l+j-1] != data.charCodeAt(j)) continue outs;
      return l - 1;
    }
  } else {
    outb: while((l = u8.indexOf(data[0], l)) > -1) {
      ++l;
      for(var j = 1; j < data.length; ++j) if(u8[l+j-1] != data[j]) continue outb;
      return l - 1;
    }
  }
  return -1;
}

// #endregion

// #region proto.ts

type Ptr = [number];

/** Parse an integer from the varint that can be exactly stored in a double */
function parse_varint49(buf: Uint8Array, ptr?: Ptr): number {
  var l = ptr ? ptr[0] : 0;
  var usz = buf[l] & 0x7F;
  varint: if(buf[l++] >= 0x80) {
    usz |= (buf[l] & 0x7F) <<  7; if(buf[l++] < 0x80) break varint;
    usz |= (buf[l] & 0x7F) << 14; if(buf[l++] < 0x80) break varint;
    usz |= (buf[l] & 0x7F) << 21; if(buf[l++] < 0x80) break varint;
    usz += (buf[l] & 0x7F) * Math.pow(2, 28); ++l; if(buf[l++] < 0x80) break varint;
    usz += (buf[l] & 0x7F) * Math.pow(2, 35); ++l; if(buf[l++] < 0x80) break varint;
    usz += (buf[l] & 0x7F) * Math.pow(2, 42); ++l; if(buf[l++] < 0x80) break varint;
  }
  if(ptr) ptr[0] = l;
  return usz;
}

function write_varint49(v: number): Uint8Array {
  var usz = new Uint8Array(7);
  usz[0] = (v & 0x7F);
  var L = 1;
  sz: if(v > 0x7F) {
    usz[L-1] |= 0x80; usz[L] = (v >> 7) & 0x7F; ++L;
    if(v <= 0x3FFF) break sz;
    usz[L-1] |= 0x80; usz[L] = (v >> 14) & 0x7F; ++L;
    if(v <= 0x1FFFFF) break sz;
    usz[L-1] |= 0x80; usz[L] = (v >> 21) & 0x7F; ++L;
    if(v <= 0xFFFFFFF) break sz;
    usz[L-1] |= 0x80; usz[L] = ((v/0x100) >>> 21) & 0x7F; ++L;
    if(v <= 0x7FFFFFFFF) break sz;
    usz[L-1] |= 0x80; usz[L] = ((v/0x10000) >>> 21) & 0x7F; ++L;
    if(v <= 0x3FFFFFFFFFF) break sz;
    usz[L-1] |= 0x80; usz[L] = ((v/0x1000000) >>> 21) & 0x7F; ++L;
  }
  return usz.slice(0, L);
}

/** Parse a 32-bit signed integer from the raw varint */
function varint_to_i32(buf: Uint8Array): number {
  var l = 0, i32 = buf[l] & 0x7F;
  varint: if(buf[l++] >= 0x80) {
    i32 |= (buf[l] & 0x7F) <<  7; if(buf[l++] < 0x80) break varint;
    i32 |= (buf[l] & 0x7F) << 14; if(buf[l++] < 0x80) break varint;
    i32 |= (buf[l] & 0x7F) << 21; if(buf[l++] < 0x80) break varint;
    i32 |= (buf[l] & 0x7F) << 28;
  }
  return i32;
}

interface ProtoItem {
  offset?: number;
  data: Uint8Array;
  type: number;
}
type ProtoField = Array<ProtoItem>
type ProtoMessage = Array<ProtoField>;

/** Shallow parse of a message */
function parse_shallow(buf: Uint8Array): ProtoMessage {
  var out: ProtoMessage = [], ptr: Ptr = [0];
  while(ptr[0] < buf.length) {
    var off = ptr[0];
    var num = parse_varint49(buf, ptr);
    var type = num & 0x07; num = Math.floor(num / 8);
    var len = 0;
    var res: Uint8Array;
    if(num == 0) break;
    switch(type) {
      case 0: {
        var l = ptr[0];
        while(buf[ptr[0]++] >= 0x80);
        res = buf.slice(l, ptr[0]);
      } break;
      case 5: len = 4; res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
      case 1: len = 8; res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
      case 2: len = parse_varint49(buf, ptr); res = buf.slice(ptr[0], ptr[0] + len); ptr[0] += len; break;
      case 3: // Start group
      case 4: // End group
      default: throw new Error(`PB Type ${type} for Field ${num} at offset ${off}`);
    }
    var v: ProtoItem = { offset: off, data: res, type };
    if(out[num] == null) out[num] = [v];
    else out[num].push(v);
  }
  return out;
}

/** Serialize a shallow parse */
function write_shallow(proto: ProtoMessage): Uint8Array {
  var out: Uint8Array[] = [];
  proto.forEach((field, idx) => {
    field.forEach(item => {
      out.push(write_varint49(idx * 8 + item.type));
      out.push(item.data);
    });
  });
  return u8concat(out);
}

function mappa<U>(data: ProtoField, cb:(_:Uint8Array) => U): U[] {
  if(!data) return [];
  return data.map((d) => { try {
    return cb(d.data);
  } catch(e) {
    var m = e.message?.match(/at offset (\d+)/);
    if(m) e.message = e.message.replace(/at offset (\d+)/, "at offset " + (+m[1] + (d.offset||0)));
    throw e;
  }});
}

// #endregion

// #region descriptor.ts

var TYPES = [
  "error",
  "double",
  "float",
  "int64",
  "uint64",
  "int32",
  "fixed64",
  "fixed32",
  "bool",
  "string",
  "group",
  "message",
  "bytes",
  "uint32",
  "enum",
  "sfixed32",
  "sfixed64",
  "sint32",
  "sint64"
];


interface FileOptions {
  javaPackage?: string;
  javaOuterClassname?: string;
  javaMultipleFiles?: string;
  goPackage?: string;
}
function parse_FileOptions(buf: Uint8Array): FileOptions {
  var data = parse_shallow(buf);
  var out: FileOptions = {};
  if(data[1]?.[0]) out.javaPackage = u8str(data[1][0].data);
  if(data[8]?.[0]) out.javaOuterClassname = u8str(data[8][0].data);
  if(data[11]?.[0]) out.goPackage = u8str(data[11][0].data);
  return out;
}


interface EnumValue {
  name?: string;
  number?: number;
}
function parse_EnumValue(buf: Uint8Array): EnumValue {
  var data = parse_shallow(buf);
  var out: EnumValue = {};
  if(data[1]?.[0]) out.name = u8str(data[1][0].data);
  if(data[2]?.[0]) out.number = varint_to_i32(data[2][0].data);
  return out;
}


interface Enum {
  name?: string;
  value?: EnumValue[];
}
function parse_Enum(buf: Uint8Array): Enum {
  var data = parse_shallow(buf);
  var out: Enum = {};
  if(data[1]?.[0]) out.name = u8str(data[1][0].data);
  out.value = mappa(data[2], parse_EnumValue);
  return out;
}
var write_Enum = (en: Enum): string => {
  var out = [`enum ${en.name} {`];
  en.value?.forEach(({name, number}) => out.push(`  ${name} = ${number};`));
  return out.concat(`}`).join("\n");
};


interface FieldOptions {
  packed?: boolean;
  deprecated?: boolean;
}
function parse_FieldOptions(buf: Uint8Array): FieldOptions {
  var data = parse_shallow(buf);
  var out: FieldOptions = {};
  if(data[2]?.[0]) out.packed = !!data[2][0].data;
  if(data[3]?.[0]) out.deprecated = !!data[3][0].data;
  return out;
}


interface Field {
  name?: string;
  extendee?: string;
  number?: number;
  label?: number;
  type?: number;
  typeName?: string;
  defaultValue?: string;
  options?: FieldOptions;
}
function parse_Field(buf: Uint8Array): Field {
  var data = parse_shallow(buf);
  var out: Field = {};
  if(data[1]?.[0]) out.name = u8str(data[1][0].data);
  if(data[2]?.[0]) out.extendee = u8str(data[2][0].data);
  if(data[3]?.[0]) out.number = varint_to_i32(data[3][0].data);
  if(data[4]?.[0]) out.label = varint_to_i32(data[4][0].data);
  if(data[5]?.[0]) out.type = varint_to_i32(data[5][0].data);
  if(data[6]?.[0]) out.typeName = u8str(data[6][0].data);
  if(data[7]?.[0]) out.defaultValue = u8str(data[7][0].data);
  if(data[8]?.[0]) out.options = parse_FieldOptions(data[8][0].data);
  return out;
}
function write_Field(field: Field): string {
  var out = [];
  var label = ["", "optional ", "required ", "repeated "][field.label||0] || "";
  var type = field.typeName || TYPES[field.type||69] || "s5s";
  var opts = [];
  if(field.defaultValue) opts.push(`default = ${field.defaultValue}`);
  if(field.options?.packed) opts.push(`packed = true`);
  if(field.options?.deprecated) opts.push(`deprecated = true`);
  var os = opts.length ? ` [${opts.join(", ")}]`: "";
  out.push(`${label}${type} ${field.name} = ${field.number}${os};`);
  return out.length ? indent(out.join("\n"), 1) : "";
}


function write_extensions(ext: Field[], xtra = false, coalesce = true): string {
  var res: string[] = [];
  var xt: Array<[string, Array<Field>]> = [];
  ext.forEach(ext => {
    if(!ext.extendee) return;
    var row = coalesce ?
      xt.find(x => x[0] == ext.extendee) :
      (xt[xt.length - 1]?.[0] == ext.extendee ? xt[xt.length - 1]: null);
    if(row) row[1].push(ext);
    else xt.push([ext.extendee, [ext]]);
  });
  xt.forEach(extrow => {
    var out = [`extend ${extrow[0]} {`];
    extrow[1].forEach(ext => out.push(write_Field(ext)));
    res.push(out.concat(`}`).join("\n") + (xtra ? "\n" : ""));
  });
  return res.join("\n");
}


interface ExtensionRange { start?: number; end?: number; }
interface MessageType {
  name?: string;
  nestedType?: MessageType[];
  enumType?: Enum[];
  field?: Field[];
  extension?: Field[];
  extensionRange?: ExtensionRange[];
}
function parse_mtype(buf: Uint8Array): MessageType {
  var data = parse_shallow(buf);
  var out: MessageType = {};
  if(data[1]?.[0]) out.name = u8str(data[1][0].data);
  if(data[2]?.length >= 1) out.field = mappa(data[2], parse_Field);
  if(data[3]?.length >= 1) out.nestedType = mappa(data[3], parse_mtype);
  if(data[4]?.length >= 1) out.enumType = mappa(data[4], parse_Enum);
  if(data[6]?.length >= 1) out.extension = mappa(data[6], parse_Field);
  if(data[5]?.length >= 1) out.extensionRange = data[5].map(d => {
    var data = parse_shallow(d.data);
    var out: ExtensionRange = {};
    if(data[1]?.[0]) out.start = varint_to_i32(data[1][0].data);
    if(data[2]?.[0]) out.end   = varint_to_i32(data[2][0].data);
    return out;
  });
  return out;
}
var write_mtype = (message: MessageType): string => {
  var out = [ `message ${message.name} {` ];
  message.nestedType?.forEach(m => out.push(indent(write_mtype(m), 1)));
  message.enumType?.forEach(en => out.push(indent(write_Enum(en), 1)));
  message.field?.forEach(field => out.push(write_Field(field)));
  if(message.extensionRange) message.extensionRange.forEach(er => out.push(`  extensions ${er.start} to ${(er.end||0) - 1};`));
  if(message.extension?.length) out.push(indent(write_extensions(message.extension), 1));
  return out.concat(`}`).join("\n");
};


interface Descriptor {
  name?: string;
  package?: string;
  dependency?: string[];
  messageType?: MessageType[];
  enumType?: Enum[];
  extension?: Field[];
  options?: FileOptions;
}
function parse_FileDescriptor(buf: Uint8Array): Descriptor {
  var data = parse_shallow(buf);
  var out: Descriptor = {};
  if(data[1]?.[0]) out.name = u8str(data[1][0].data);
  if(data[2]?.[0]) out.package = u8str(data[2][0].data);
  if(data[3]?.[0]) out.dependency = data[3].map(x => u8str(x.data));

  if(data[4]?.length >= 1) out.messageType = mappa(data[4], parse_mtype);
  if(data[5]?.length >= 1) out.enumType = mappa(data[5], parse_Enum);
  if(data[7]?.length >= 1) out.extension = mappa(data[7], parse_Field);

  if(data[8]?.[0]) out.options = parse_FileOptions(data[8][0].data);

  return out;
}
var write_FileDescriptor = (pb: Descriptor): string => {
  var out = [
    'syntax = "proto2";',
    ''
  ];
  if(pb.dependency) pb.dependency.forEach((n: string) => { if(n) out.push(`import "${n}";`); });
  if(pb.package) out.push(`package ${pb.package};\n`);
  if(pb.options) {
    var o = out.length;

    if(pb.options.javaPackage) out.push(`option java_package = "${pb.options.javaPackage}";`);
    if(pb.options.javaOuterClassname?.replace(/\W/g, "")) out.push(`option java_outer_classname = "${pb.options.javaOuterClassname}";`);
    if(pb.options.javaMultipleFiles) out.push(`option java_multiple_files = true;`);
    if(pb.options.goPackage) out.push(`option go_package = "${pb.options.goPackage}";`);

    if(out.length > o) out.push('');
  }

  pb.enumType?.forEach(en => { if(en.name) out.push(write_Enum(en) + "\n"); });
  pb.messageType?.forEach(m => { if(m.name) { var o = write_mtype(m); if(o) out.push(o + "\n"); }});

  if(pb.extension?.length) {
    var e = write_extensions(pb.extension, true, false);
    if(e) out.push(e);
  }
  return out.join("\n") + "\n";
};

// #endregion

// #region macho.ts

interface MachOEntry {
  type: number;
  subtype: number;
  offset: number;
  size: number;
  align?: number;
  data: Uint8Array;
}
var parse_fat = (buf: Uint8Array): MachOEntry[] => {
  var dv = u8_to_dataview(buf);
  if(dv.getUint32(0, false) !== 0xCAFEBABE) throw new Error("Unsupported file");
  var nfat_arch = dv.getUint32(4, false);
  var out: MachOEntry[] = [];
  for(var i = 0; i < nfat_arch; ++i) {
    var start = i * 20 + 8;

    var cputype = dv.getUint32(start, false);
    var cpusubtype = dv.getUint32(start+4, false);
    var offset = dv.getUint32(start+8, false);
    var size = dv.getUint32(start+12, false);
    var align = dv.getUint32(start+16, false);

    out.push({
      type: cputype,
      subtype: cpusubtype,
      offset,
      size,
      align,
      data: buf.slice(offset, offset + size)
    });
  }
  return out;
};
var parse_macho = (buf: Uint8Array): MachOEntry[] => {
  var dv = u8_to_dataview(buf);
  var magic = dv.getUint32(0, false);
  switch(magic) {
    // fat binary (x86_64 / aarch64)
    case 0xCAFEBABE: return parse_fat(buf);
    // x86_64
    case 0xCFFAEDFE: return [{
      type: dv.getUint32(4, false),
      subtype: dv.getUint32(8, false),
      offset: 0,
      size: buf.length,
      data: buf
    }];
  }
  throw new Error("Unsupported file");
};

// #endregion

// #region otorp.ts

interface OtorpEntry {
  name: string;
  proto: string;
}

/** Find and stringify all relevant protobuf defs */
function otorp(buf: Uint8Array, builtins = false): OtorpEntry[] {
  var res = proto_offsets(buf);
  var registry: {[key: string]: Descriptor} = {};
  var names: Set<string> = new Set();
  var out: OtorpEntry[] = [];

  res.forEach((r, i) => {
    if(!builtins && r[1].startsWith("google/protobuf/")) return;
    var b = buf.slice(r[0], i < res.length - 1 ? res[i+1][0] : buf.length);
    var pb = parse_FileDescriptorProto(b/*, r[1]*/);
    names.add(r[1]);
    registry[r[1]] = pb;
  });

  names.forEach(name => {
    /* ensure partial ordering by dependencies */
    names.delete(name);
    var pb = registry[name];
    var doit = (pb.dependency||[]).every((d: string) => !names.has(d));
    if(!doit) { names.add(name); return; }

    var dups = res.filter(r => r[1] == name);
    if(dups.length == 1) return out.push({ name, proto: write_FileDescriptor(pb) });

    /* in a fat binary, compare the defs for x86_64/aarch64 */
    var pbs = dups.map(r => {
      var i = res.indexOf(r);
      var b = buf.slice(r[0], i < res.length - 1 ? res[i+1][0] : buf.length);
      var pb = parse_FileDescriptorProto(b/*, r[1]*/);
      return write_FileDescriptor(pb);
    });
    for(var l = 1; l < pbs.length; ++l) if(pbs[l] != pbs[0]) throw new Error(`Conflicting definitions for ${name} at offsets 0x${dups[0][0].toString(16)} and 0x${dups[l][0].toString(16)}`);
    return out.push({ name, proto: pbs[0] });
  });

  return out;
}
export default otorp;

/** Determine if an address is being referenced */
var is_referenced = (buf: Uint8Array, pos: number): boolean => {
  var dv = u8_to_dataview(buf);

  /* Search for LEA reference (x86) */
  for(var leaddr = 0; leaddr > -1 && leaddr < pos; leaddr = u8indexOf(buf, 0x8D, leaddr + 1))
    if(dv.getUint32(leaddr + 2, true) == pos - leaddr - 6) return true;

  /* Search for absolute reference to address */
  try {
    var headers = parse_macho(buf);
    for(var i = 0; i < headers.length; ++i) {
      if(pos < headers[i].offset || pos > headers[i].offset + headers[i].size) continue;
      var b = headers[i].data;
      var p = pos - headers[i].offset;
      var ref = new Uint8Array([0,0,0,0,0,0,0,0]);
      var dv = u8_to_dataview(ref);
      dv.setUint32(0, p, true);
      if(u8indexOf(b, ref, 0) > 0) return true;
      ref[4] = 0x01;
      if(u8indexOf(b, ref, 0) > 0) return true;
      ref[4] = 0x00; ref[6] = 0x10;
      if(u8indexOf(b, ref, 0) > 0) return true;
    }
  } catch(e) {throw e}
  return false;
};

type OffsetList = Array<[number, string, number, number]>;
/** Generate a list of potential starting points */
var proto_offsets = (buf: Uint8Array): OffsetList => {
  var meta = parse_macho(buf);
  var out: OffsetList = [];
  var off = 0;
  /* note: this loop only works for names < 128 chars */
  search: while((off = u8indexOf(buf, ".proto", off + 1)) > -1) {
    var pos = off;
    off += 6;
    while(off - pos < 256 && buf[pos] != off - pos - 1) {
      if(buf[pos] > 0x7F || buf[pos] < 0x20) continue search;
      --pos;
    }
    if(off - pos > 250) continue;
    var name = u8str(buf.slice(pos + 1, off));
    if(buf[--pos] != 0x0A) continue;
    if(!is_referenced(buf, pos)) { console.error(`Reference to ${name} at ${pos} not found`); continue; }
    var bin = meta.find(m => m.offset <= pos && m.offset + m.size >= pos);
    out.push([pos, name, bin?.type || -1, bin?.subtype || -1]);
  }
  return out;
};

/** Parse a descriptor that starts with the first byte of the supplied buffer */
var parse_FileDescriptorProto = (buf: Uint8Array): Descriptor => {
  var l = buf.length;
  while(l > 0) try {
    var b = buf.slice(0,l);
    var o = parse_FileDescriptor(b);
    return o;
  } catch(e) {
    var m = e.message.match(/at offset (\d+)/);
    if(m && parseInt(m[1], 10) < buf.length) l = parseInt(m[1], 10) - 1;
    else --l;
  }
  throw new RangeError("no protobuf message in range");
};


// #endregion

let spin: TerminalSpinner;
const width = Deno.consoleSize().columns;
function process(inf: string, outf: string) {
  const fi = Deno.statSync(inf);
  if(fi.isDirectory) for(let info of Deno.readDirSync(inf)) {
    if(spin) spin.set(inf.length > width - 4 ? "…" + inf.slice(-(width-4)) : inf);
    process(inf + (inf.slice(-1) == "/" ? "" : "/") + info.name, outf);
  }
  try {
    const buf: Uint8Array = Deno.readFileSync(inf);
    var dv = u8_to_dataview(buf);
    var magic = dv.getUint32(0, false);
    if(![0xCAFEBABE, 0xCFFAEDFE].includes(magic)) return;

    otorp(buf).forEach(({name, proto}) => {
      if(!outf) return console.log(proto);
      var pth = resolve(outf || "./", name.replace(/[/]/g, "$"));
      try {
        const str = Deno.readTextFileSync(pth);
        if(str == proto) return;
        throw `${pth} definition diverges!`;
      } catch(e) { if(typeof e == "string") throw e; }
      console.error(`writing ${name} to ${pth}`);
      Deno.writeTextFileSync(pth, proto);
    });
  } catch(e) {}
}

function doit() {
  const [ inf, outf ] = Deno.args;
  if(!inf || inf == "-h" || inf == "--help") {
    console.log(`usage: otorp.ts <path/to/bin> [output/folder]

if no output folder specified, log all discovered defs
if output folder specified, attempt to write defs in the folder

$ otorp.ts /Applications/Numbers.app out/                   # search all files
$ otorp.ts /Applications/Numbers.app/Contents/MacOS/Numbers # search one file
`);
    Deno.exit(1);
  }
  if(Deno.statSync(inf).isDirectory) (spin = new TerminalSpinner("")).start();
  if(outf) try { Deno.mkdirSync(outf, { recursive: true }); } catch(e) {}
  process(inf, outf);
  if(spin) spin.stop();
}
doit();