diff --git a/iwa/112.md b/iwa/112.md new file mode 100644 index 0000000..c08cde6 --- /dev/null +++ b/iwa/112.md @@ -0,0 +1,47 @@ +# Protobuf MD5 Checksums for 11.2 + +The checksums are based on definitions from version 11.2 (7032.0.145) + +```proto +MD5 (KNArchives.proto) = 4d15ddb1dacdf0e2a321d09234130abd +MD5 (KNArchives.sos.proto) = 5b8e5101e946eaddca341b78b5f6e660 +MD5 (KNCommandArchives.proto) = 3aa3d1796a8cb1f1cdaccca2d7a67704 +MD5 (KNCommandArchives.sos.proto) = af09afc07db32005ff1eaf183fe99c02 +MD5 (TNArchives.proto) = c6af8ddec038ccfb8d683f2c92bb8b2c +MD5 (TNArchives.sos.proto) = 7a8ce65fa23d554fba3549f6ec5d1aa5 +MD5 (TNCommandArchives.proto) = 4b7e7ebe4583d0a0166118292d338272 +MD5 (TNCommandArchives.sos.proto) = 279eafd525689d2e8805e951f9a0b9a6 +MD5 (TPArchives.proto) = 5ae0d81aded078509f8b1dfc48b1b118 +MD5 (TPCommandArchives.proto) = 1cf18ff056e7635f1cfcb68ec943892b +MD5 (TPCommandArchives.sos.proto) = 7b627bc1e053a03a762952c5d647a97a +MD5 (TSAArchives.proto) = 6a246bb28e425ff796c285c36132b2ef +MD5 (TSAArchives.sos.proto) = 8b55a583851f1d11a2af842f20223bc7 +MD5 (TSACommandArchives.sos.proto) = d0e4741e2fea6dde83b17a7e73d12af7 +MD5 (TSCEArchives.proto) = abe4e1a7256019562a3790eb58c6b9f9 +MD5 (TSCH3DArchives.proto) = 2ac8ee10b2e28c5d201524a388a2eb9e +MD5 (TSCHArchives.Common.proto) = 6cc9c2954517f29d8247e4d59caa980f +MD5 (TSCHArchives.GEN.proto) = 25bd8a10b1646970fb96f411f14821c9 +MD5 (TSCHArchives.proto) = e4f10e18142ee8b0e2bb6f94b03e05d6 +MD5 (TSCHArchives.sos.proto) = 4452d0264a26b946524ebbbb2dec56e0 +MD5 (TSCHCommandArchives.proto) = 75cc30b1ba010f1172d0d08eff8bfa9f +MD5 (TSCHPreUFFArchives.proto) = 5a9c935194bd772a30774071ac7a7f64 +MD5 (TSDArchives.proto) = 5cc5d066f5b394508ec13f64dedba7b7 +MD5 (TSDArchives.sos.proto) = ccbc06fd5b8db95e78b4b404a0d2177f +MD5 (TSDCommandArchives.proto) = 878c419d11b043333e1ed5148a868626 +MD5 (TSKArchives.proto) = bd76c5489f2bfb5a94750f1a83969549 +MD5 (TSKArchives.sos.proto) = 5e68b45687d33b9e2cdc0f64d76988f9 +MD5 (TSPArchiveMessages.proto) = 5bd640aed4df2758a393143096cbaf70 +MD5 (TSPDatabaseMessages.proto) = ab86cd136e1702555b5080f59609f2f1 +MD5 (TSPMessages.proto) = 1a33eb51dfb1f8ccbabdc6e236690ce0 +MD5 (TSSArchives.proto) = 301ea13a293ada201db8edf9b0f83d1d +MD5 (TSSArchives.sos.proto) = 951c42b9fd732552ffc4944fe414890a +MD5 (TSTArchives.proto) = 59a353cf0dd34b31ee932149c517e80d +MD5 (TSTArchives.sos.proto) = d085aa1e4449f85bcc37689c7d6e4c5f +MD5 (TSTCommandArchives.proto) = 4c1bbca393199455635dd80a89d9f61b +MD5 (TSTStylePropertyArchiving.proto) = 463bb1c64fe02a484b4e9e655d6af391 +MD5 (TSWPArchives.proto) = 7d171fbf72e184957773c6ad84ff4f09 +MD5 (TSWPArchives.sos.proto) = 06be0c76ea913408da04153b292f08e0 +MD5 (TSWPCommandArchives.proto) = dabb9f2e85ffdba52aaca61064303c71 +``` + +[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/notes?pixel)](https://github.com/SheetJS/notes) diff --git a/iwa/120.md b/iwa/120.md new file mode 100644 index 0000000..7f9ba22 --- /dev/null +++ b/iwa/120.md @@ -0,0 +1,47 @@ +# Protobuf MD5 Checksums for 12.0 + +The checksums are based on definitions from version 12.0 (7033.0.134) + +```proto +MD5 (KNArchives.proto) = 4d15ddb1dacdf0e2a321d09234130abd +MD5 (KNArchives.sos.proto) = 5b8e5101e946eaddca341b78b5f6e660 +MD5 (KNCommandArchives.proto) = 3aa3d1796a8cb1f1cdaccca2d7a67704 +MD5 (KNCommandArchives.sos.proto) = af09afc07db32005ff1eaf183fe99c02 +MD5 (TNArchives.proto) = c6af8ddec038ccfb8d683f2c92bb8b2c +MD5 (TNArchives.sos.proto) = 7a8ce65fa23d554fba3549f6ec5d1aa5 +MD5 (TNCommandArchives.proto) = 4b7e7ebe4583d0a0166118292d338272 +MD5 (TNCommandArchives.sos.proto) = 279eafd525689d2e8805e951f9a0b9a6 +MD5 (TPArchives.proto) = a8456e94f9b8f9b30315c51e2b89b18b +MD5 (TPCommandArchives.proto) = 1cf18ff056e7635f1cfcb68ec943892b +MD5 (TPCommandArchives.sos.proto) = 7b627bc1e053a03a762952c5d647a97a +MD5 (TSAArchives.proto) = 6a246bb28e425ff796c285c36132b2ef +MD5 (TSAArchives.sos.proto) = 8b55a583851f1d11a2af842f20223bc7 +MD5 (TSACommandArchives.sos.proto) = d0e4741e2fea6dde83b17a7e73d12af7 +MD5 (TSCEArchives.proto) = abe4e1a7256019562a3790eb58c6b9f9 +MD5 (TSCH3DArchives.proto) = 2ac8ee10b2e28c5d201524a388a2eb9e +MD5 (TSCHArchives.Common.proto) = 6cc9c2954517f29d8247e4d59caa980f +MD5 (TSCHArchives.GEN.proto) = 25bd8a10b1646970fb96f411f14821c9 +MD5 (TSCHArchives.proto) = e4f10e18142ee8b0e2bb6f94b03e05d6 +MD5 (TSCHArchives.sos.proto) = 4452d0264a26b946524ebbbb2dec56e0 +MD5 (TSCHCommandArchives.proto) = 75cc30b1ba010f1172d0d08eff8bfa9f +MD5 (TSCHPreUFFArchives.proto) = 5a9c935194bd772a30774071ac7a7f64 +MD5 (TSDArchives.proto) = 5cc5d066f5b394508ec13f64dedba7b7 +MD5 (TSDArchives.sos.proto) = ccbc06fd5b8db95e78b4b404a0d2177f +MD5 (TSDCommandArchives.proto) = 878c419d11b043333e1ed5148a868626 +MD5 (TSKArchives.proto) = 8fbe545c0fc2aaaaec2d402ec5ebe661 +MD5 (TSKArchives.sos.proto) = 5e68b45687d33b9e2cdc0f64d76988f9 +MD5 (TSPArchiveMessages.proto) = 7f2f47d6f8f6dc3c3cf55f6016bc75ae +MD5 (TSPDatabaseMessages.proto) = ab86cd136e1702555b5080f59609f2f1 +MD5 (TSPMessages.proto) = 1a33eb51dfb1f8ccbabdc6e236690ce0 +MD5 (TSSArchives.proto) = dcb28462d9fcd774010c34d2371ad579 +MD5 (TSSArchives.sos.proto) = 951c42b9fd732552ffc4944fe414890a +MD5 (TSTArchives.proto) = 6dfb293ca832fc8d4e550c46d4d49815 +MD5 (TSTArchives.sos.proto) = d085aa1e4449f85bcc37689c7d6e4c5f +MD5 (TSTCommandArchives.proto) = a81c34ef4847236a44743b221b6a27f4 +MD5 (TSTStylePropertyArchiving.proto) = 463bb1c64fe02a484b4e9e655d6af391 +MD5 (TSWPArchives.proto) = 3c2b0d6c6e7ab3655d573e959ce4b8d5 +MD5 (TSWPArchives.sos.proto) = 06be0c76ea913408da04153b292f08e0 +MD5 (TSWPCommandArchives.proto) = ade9e806dd98f5d1509d2ef5b0284af1 +``` + +[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/notes?pixel)](https://github.com/SheetJS/notes) diff --git a/iwa/README.md b/iwa/README.md index b5ccdc6..933bf03 100644 --- a/iwa/README.md +++ b/iwa/README.md @@ -1,4 +1,4 @@ -# iWork 2013+ +# iWork 2013+ This covers the standalone ZIP-based format. iCloud uses a different format to support differential sync between devices. @@ -6,7 +6,7 @@ support differential sync between devices. The ZIP container holds a number of Mac binary "property list" files (`.plist`) which can be safely ignored or blanked. It also can hold preview images that can be safely ignored. - + ## File Structure The `.numbers` file is a ZIP file containing a number of `.iwa` entries. The @@ -23,7 +23,43 @@ Each block follows the Snappy compressed format as described in apps do not expect a particular compression level, and it is possible to create the equivalent of a "STORED" block. -# Data Storage +## Protocol Buffers + +Most of the data is stored in Protocol Buffer ("protobuf") wire messages. + +The iWork apps (Keynote, Numbers, Pages) include embedded Protocol Buffers +definitions as part of the file format processors. + +The [`otorp` package on `npm`](https://npm.im/otorp) ships with a command-line +tool for extracting definitions from a Mach-O binary. + +Note that some fields marked as `required` have been changed to `optional` in +later versions. File parsers should assume all fields are optional. + +### App-Specific Definitions + +The listed definitions only appear in one app: + +**Keynote** +- `KNArchives.proto` +- `KNArchives.sos.proto` +- `KNCommandArchives.proto` +- `KNCommandArchives.sos.proto` + +**Numbers** +- `TNArchives.proto` +- `TNArchives.sos.proto` +- `TNCommandArchives.proto` +- `TNCommandArchives.sos.proto` + +**Pages** +- `TPArchives.proto` +- `TPCommandArchives.proto` +- `TPCommandArchives.sos.proto` + +The other files are common across the apps. + +## Data Storage The decompressed data is a series of chunks. @@ -39,7 +75,7 @@ The message type from the `MessageInfo` (tag 1, type `uint32`) corresponds to a dynamic registry spread across the embedded frameworks. The actual message data is a protobuf packet. -## Dynamic Registry and Message Types +### Dynamic Registry and Message Types The `.TSP.Reference` type acts as a pointer, referencing another message. The references do not include message type info, so readers and writers must be @@ -74,44 +110,134 @@ message .TST.TableModelArchive { } ``` -The reference in field 2 from `.TST.TableInfoArchive` is expected to be of type +The reference in field 2 from `.TST.TableInfoArchive` is expected to be of type `.TST.TableModelArchive` so the latter must be registered. `.TST.DataStore` is the type of field 4 from `.TST.TableModelArchive`. Since it is not referenced indirectly, the message type does not have to be registered. -## iWork Proto Definitions +## Data Storage in Numbers files -The iWork apps (Keynote, Numbers, Pages) include embedded Protocol Buffers -definitions as part of the file format processors. +The root message (type 1) has the following structure: -The [`otorp` package on `npm`](https://npm.im/otorp) ships with a command-line -tool for extracting definitions from a Mach-O binary. +```proto +message .TN.DocumentArchive { + repeated .TSP.Reference sheets = 1; +``` -### App-Specific Definitions +The message referenced in field 1 (type 2) has the following structure: -The listed definitions only appear in one app: +```proto +message .TN.SheetArchive { + required string name = 1; + repeated .TSP.Reference drawable_infos = 2; +``` -**Keynote** -- `KNArchives.proto` -- `KNArchives.sos.proto` -- `KNCommandArchives.proto` -- `KNCommandArchives.sos.proto` +`name` is the name of the worksheet. Each worksheet can contain multiple tables. +The messages referenced in field 2 (type 6000) refer to `.TST.TableInfoArchive` -**Numbers** -- `TNArchives.proto` -- `TNArchives.sos.proto` -- `TNCommandArchives.proto` -- `TNCommandArchives.sos.proto` +### Table Storage in iWork -**Pages** -- `TPArchives.proto` -- `TPCommandArchives.proto` -- `TPCommandArchives.sos.proto` +Table structure is shared across iWork apps. The protobuf definitions are +identical. The root element for tables is the `.TST.TableInfoArchive`: -The other files are common across the apps. +```proto +message .TST.TableInfoArchive { + required .TSP.Reference tableModel = 2; +``` -## Determining File Type +The message referenced in field 2 (type 6001) has the following structure: + +```proto +message .TST.TableModelArchive { + required .TST.DataStore base_data_store = 4; + required uint32 number_of_rows = 6; + required uint32 number_of_columns = 7; + // ... +} + +message .TST.DataStore { + required .TST.TileStorage tiles = 3; + required .TSP.Reference stringTable = 4; + optional .TSP.Reference formulaErrorTable = 12; + optional .TSP.Reference rich_text_table = 17; + // ... +} + +message .TST.TileStorage { + message .TST.TileStorage.Tile { + required uint32 tileid = 1; + required .TSP.Reference tile = 2; + } + repeated .TST.TileStorage.Tile tiles = 1; + // ... +} +``` + +Numbers uses a "shared string table" like Excel. Excel stores both plaintext and +rich strings in the same table, while Numbers has two separate tables. + +The message referenced in the tiles (type 6002) has the following structure: + +```proto +message .TST.Tile { + repeated .TST.TileRowInfo rowInfos = 5; + // ... +} + +message .TST.TileRowInfo { + required uint32 tile_row_index = 1; + required uint32 cell_count = 2; + required bytes cell_storage_buffer_pre_bnc = 3; + required bytes cell_offsets_pre_bnc = 4; + // ... +} +``` + +Each `.TST.TileRowInfo` message holds the data and property references for a +single row in the table. + +The cell offset fields are an array of 16-bit integers that describe offsets +within the respective storage buffers. `0xFFFF` indicates that the column index +for the given row is not included. + +A 32-bit flag is stored at offset 4, describing which fields are in the cell: + +| field description | bit mask | size | notes | +|:------------------|---------:|-----:|-------------------------------------| +| Error index | `0x0100` | 4 | index into formula error table | +| Rich text index | `0x0200` | 4 | index into rich shared string table | +| Plaintext index | `0x0010` | 4 | index into shared string table | +| Double value | `0x0020` | 8 | raw value (IEEE754 double) | +| Datetime value | `0x0040` | 8 | number of seconds since 1/1/2001 | + +The starting offset depends on the cell storage version (`0-1` or `2-3`), which +is stored in the first byte of each cell: + +| description | v1 offset | v3 offset | +|:----------------|---------------------------:|----------------------------:| +| Error index |`8 + POPCNT(f & 0x008E) * 4`|`12 + POPCNT(f & 0x0C8E) * 4`| +| Rich text index |`8 + POPCNT(f & 0x018E) * 4`|`12 + POPCNT(f & 0x0D8E) * 4`| +| Plaintext index |`8 + POPCNT(f & 0x138E) * 4`|`12 + POPCNT(f & 0x3F8E) * 4`| +| Double value |`8 + POPCNT(f & 0x139E) * 4`|`12 + POPCNT(f & 0x3F9E) * 4`| +| Datetime value |`8 + POPCNT(f & 0x13BE) * 4`|`12 + POPCNT(f & 0x3FBE) * 4`| + +The cell type is stored at byte offset 2: + +| type | value | +|-----:|:-----------------------------------------------------------------| +| `0` | "blank cell" (no value) | +| `2` | "Double value" (IEEE754 double) | +| `3` | get value from shared string table at "Plaintext index" | +| `5` | interpret "Datetime value" as number of seconds since 1/1/2001 | +| `6` | `true` if "Double value" is greater than zero, `false` otherwise | +| `7` | interpret "Double value" as number of seconds (Duration) | +| `8` | get error from formula error table at "Error index" | +| `9` | get value from rich shared string table at "Rich text index" | + +## Misc + +### Determining File Type All three file types use the same message tag (1) for the root `DocumentArchive` message. However, the required fields vary between formats. @@ -143,53 +269,11 @@ Pages is the only format to use and require field 15. Keynote requires field 2, a field that does not appear in Numbers. -## Misc - ### MD5 Checksums -The checksums are based on definitions from version 11.2 (7032.0.145) +- [11.2](./112.md) -```proto -MD5 (KNArchives.proto) = 4d15ddb1dacdf0e2a321d09234130abd -MD5 (KNArchives.sos.proto) = 5b8e5101e946eaddca341b78b5f6e660 -MD5 (KNCommandArchives.proto) = 3aa3d1796a8cb1f1cdaccca2d7a67704 -MD5 (KNCommandArchives.sos.proto) = af09afc07db32005ff1eaf183fe99c02 -MD5 (TNArchives.proto) = c6af8ddec038ccfb8d683f2c92bb8b2c -MD5 (TNArchives.sos.proto) = 7a8ce65fa23d554fba3549f6ec5d1aa5 -MD5 (TNCommandArchives.proto) = 4b7e7ebe4583d0a0166118292d338272 -MD5 (TNCommandArchives.sos.proto) = 279eafd525689d2e8805e951f9a0b9a6 -MD5 (TPArchives.proto) = 5ae0d81aded078509f8b1dfc48b1b118 -MD5 (TPCommandArchives.proto) = 1cf18ff056e7635f1cfcb68ec943892b -MD5 (TPCommandArchives.sos.proto) = 7b627bc1e053a03a762952c5d647a97a -MD5 (TSAArchives.proto) = 6a246bb28e425ff796c285c36132b2ef -MD5 (TSAArchives.sos.proto) = 8b55a583851f1d11a2af842f20223bc7 -MD5 (TSACommandArchives.sos.proto) = d0e4741e2fea6dde83b17a7e73d12af7 -MD5 (TSCEArchives.proto) = abe4e1a7256019562a3790eb58c6b9f9 -MD5 (TSCH3DArchives.proto) = 2ac8ee10b2e28c5d201524a388a2eb9e -MD5 (TSCHArchives.Common.proto) = 6cc9c2954517f29d8247e4d59caa980f -MD5 (TSCHArchives.GEN.proto) = 25bd8a10b1646970fb96f411f14821c9 -MD5 (TSCHArchives.proto) = e4f10e18142ee8b0e2bb6f94b03e05d6 -MD5 (TSCHArchives.sos.proto) = 4452d0264a26b946524ebbbb2dec56e0 -MD5 (TSCHCommandArchives.proto) = 75cc30b1ba010f1172d0d08eff8bfa9f -MD5 (TSCHPreUFFArchives.proto) = 5a9c935194bd772a30774071ac7a7f64 -MD5 (TSDArchives.proto) = 5cc5d066f5b394508ec13f64dedba7b7 -MD5 (TSDArchives.sos.proto) = ccbc06fd5b8db95e78b4b404a0d2177f -MD5 (TSDCommandArchives.proto) = 878c419d11b043333e1ed5148a868626 -MD5 (TSKArchives.proto) = bd76c5489f2bfb5a94750f1a83969549 -MD5 (TSKArchives.sos.proto) = 5e68b45687d33b9e2cdc0f64d76988f9 -MD5 (TSPArchiveMessages.proto) = 5bd640aed4df2758a393143096cbaf70 -MD5 (TSPDatabaseMessages.proto) = ab86cd136e1702555b5080f59609f2f1 -MD5 (TSPMessages.proto) = 1a33eb51dfb1f8ccbabdc6e236690ce0 -MD5 (TSSArchives.proto) = 301ea13a293ada201db8edf9b0f83d1d -MD5 (TSSArchives.sos.proto) = 951c42b9fd732552ffc4944fe414890a -MD5 (TSTArchives.proto) = 59a353cf0dd34b31ee932149c517e80d -MD5 (TSTArchives.sos.proto) = d085aa1e4449f85bcc37689c7d6e4c5f -MD5 (TSTCommandArchives.proto) = 4c1bbca393199455635dd80a89d9f61b -MD5 (TSTStylePropertyArchiving.proto) = 463bb1c64fe02a484b4e9e655d6af391 -MD5 (TSWPArchives.proto) = 7d171fbf72e184957773c6ad84ff4f09 -MD5 (TSWPArchives.sos.proto) = 06be0c76ea913408da04153b292f08e0 -MD5 (TSWPCommandArchives.proto) = dabb9f2e85ffdba52aaca61064303c71 -``` +- [12.0](./120.md) [![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/notes?pixel)](https://github.com/SheetJS/notes)