This commit is contained in:
SheetJS 2022-04-10 20:51:28 -04:00
parent fbb4b03953
commit d32a56a766
3 changed files with 250 additions and 72 deletions

47
iwa/112.md Normal file
View File

@ -0,0 +1,47 @@
# Protobuf MD5 Checksums for 11.2
The checksums are based on definitions from version 11.2 (7032.0.145)
```proto
MD5 (KNArchives.proto) = 4d15ddb1dacdf0e2a321d09234130abd
MD5 (KNArchives.sos.proto) = 5b8e5101e946eaddca341b78b5f6e660
MD5 (KNCommandArchives.proto) = 3aa3d1796a8cb1f1cdaccca2d7a67704
MD5 (KNCommandArchives.sos.proto) = af09afc07db32005ff1eaf183fe99c02
MD5 (TNArchives.proto) = c6af8ddec038ccfb8d683f2c92bb8b2c
MD5 (TNArchives.sos.proto) = 7a8ce65fa23d554fba3549f6ec5d1aa5
MD5 (TNCommandArchives.proto) = 4b7e7ebe4583d0a0166118292d338272
MD5 (TNCommandArchives.sos.proto) = 279eafd525689d2e8805e951f9a0b9a6
MD5 (TPArchives.proto) = 5ae0d81aded078509f8b1dfc48b1b118
MD5 (TPCommandArchives.proto) = 1cf18ff056e7635f1cfcb68ec943892b
MD5 (TPCommandArchives.sos.proto) = 7b627bc1e053a03a762952c5d647a97a
MD5 (TSAArchives.proto) = 6a246bb28e425ff796c285c36132b2ef
MD5 (TSAArchives.sos.proto) = 8b55a583851f1d11a2af842f20223bc7
MD5 (TSACommandArchives.sos.proto) = d0e4741e2fea6dde83b17a7e73d12af7
MD5 (TSCEArchives.proto) = abe4e1a7256019562a3790eb58c6b9f9
MD5 (TSCH3DArchives.proto) = 2ac8ee10b2e28c5d201524a388a2eb9e
MD5 (TSCHArchives.Common.proto) = 6cc9c2954517f29d8247e4d59caa980f
MD5 (TSCHArchives.GEN.proto) = 25bd8a10b1646970fb96f411f14821c9
MD5 (TSCHArchives.proto) = e4f10e18142ee8b0e2bb6f94b03e05d6
MD5 (TSCHArchives.sos.proto) = 4452d0264a26b946524ebbbb2dec56e0
MD5 (TSCHCommandArchives.proto) = 75cc30b1ba010f1172d0d08eff8bfa9f
MD5 (TSCHPreUFFArchives.proto) = 5a9c935194bd772a30774071ac7a7f64
MD5 (TSDArchives.proto) = 5cc5d066f5b394508ec13f64dedba7b7
MD5 (TSDArchives.sos.proto) = ccbc06fd5b8db95e78b4b404a0d2177f
MD5 (TSDCommandArchives.proto) = 878c419d11b043333e1ed5148a868626
MD5 (TSKArchives.proto) = bd76c5489f2bfb5a94750f1a83969549
MD5 (TSKArchives.sos.proto) = 5e68b45687d33b9e2cdc0f64d76988f9
MD5 (TSPArchiveMessages.proto) = 5bd640aed4df2758a393143096cbaf70
MD5 (TSPDatabaseMessages.proto) = ab86cd136e1702555b5080f59609f2f1
MD5 (TSPMessages.proto) = 1a33eb51dfb1f8ccbabdc6e236690ce0
MD5 (TSSArchives.proto) = 301ea13a293ada201db8edf9b0f83d1d
MD5 (TSSArchives.sos.proto) = 951c42b9fd732552ffc4944fe414890a
MD5 (TSTArchives.proto) = 59a353cf0dd34b31ee932149c517e80d
MD5 (TSTArchives.sos.proto) = d085aa1e4449f85bcc37689c7d6e4c5f
MD5 (TSTCommandArchives.proto) = 4c1bbca393199455635dd80a89d9f61b
MD5 (TSTStylePropertyArchiving.proto) = 463bb1c64fe02a484b4e9e655d6af391
MD5 (TSWPArchives.proto) = 7d171fbf72e184957773c6ad84ff4f09
MD5 (TSWPArchives.sos.proto) = 06be0c76ea913408da04153b292f08e0
MD5 (TSWPCommandArchives.proto) = dabb9f2e85ffdba52aaca61064303c71
```
[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/notes?pixel)](https://github.com/SheetJS/notes)

47
iwa/120.md Normal file
View File

@ -0,0 +1,47 @@
# Protobuf MD5 Checksums for 12.0
The checksums are based on definitions from version 12.0 (7033.0.134)
```proto
MD5 (KNArchives.proto) = 4d15ddb1dacdf0e2a321d09234130abd
MD5 (KNArchives.sos.proto) = 5b8e5101e946eaddca341b78b5f6e660
MD5 (KNCommandArchives.proto) = 3aa3d1796a8cb1f1cdaccca2d7a67704
MD5 (KNCommandArchives.sos.proto) = af09afc07db32005ff1eaf183fe99c02
MD5 (TNArchives.proto) = c6af8ddec038ccfb8d683f2c92bb8b2c
MD5 (TNArchives.sos.proto) = 7a8ce65fa23d554fba3549f6ec5d1aa5
MD5 (TNCommandArchives.proto) = 4b7e7ebe4583d0a0166118292d338272
MD5 (TNCommandArchives.sos.proto) = 279eafd525689d2e8805e951f9a0b9a6
MD5 (TPArchives.proto) = a8456e94f9b8f9b30315c51e2b89b18b
MD5 (TPCommandArchives.proto) = 1cf18ff056e7635f1cfcb68ec943892b
MD5 (TPCommandArchives.sos.proto) = 7b627bc1e053a03a762952c5d647a97a
MD5 (TSAArchives.proto) = 6a246bb28e425ff796c285c36132b2ef
MD5 (TSAArchives.sos.proto) = 8b55a583851f1d11a2af842f20223bc7
MD5 (TSACommandArchives.sos.proto) = d0e4741e2fea6dde83b17a7e73d12af7
MD5 (TSCEArchives.proto) = abe4e1a7256019562a3790eb58c6b9f9
MD5 (TSCH3DArchives.proto) = 2ac8ee10b2e28c5d201524a388a2eb9e
MD5 (TSCHArchives.Common.proto) = 6cc9c2954517f29d8247e4d59caa980f
MD5 (TSCHArchives.GEN.proto) = 25bd8a10b1646970fb96f411f14821c9
MD5 (TSCHArchives.proto) = e4f10e18142ee8b0e2bb6f94b03e05d6
MD5 (TSCHArchives.sos.proto) = 4452d0264a26b946524ebbbb2dec56e0
MD5 (TSCHCommandArchives.proto) = 75cc30b1ba010f1172d0d08eff8bfa9f
MD5 (TSCHPreUFFArchives.proto) = 5a9c935194bd772a30774071ac7a7f64
MD5 (TSDArchives.proto) = 5cc5d066f5b394508ec13f64dedba7b7
MD5 (TSDArchives.sos.proto) = ccbc06fd5b8db95e78b4b404a0d2177f
MD5 (TSDCommandArchives.proto) = 878c419d11b043333e1ed5148a868626
MD5 (TSKArchives.proto) = 8fbe545c0fc2aaaaec2d402ec5ebe661
MD5 (TSKArchives.sos.proto) = 5e68b45687d33b9e2cdc0f64d76988f9
MD5 (TSPArchiveMessages.proto) = 7f2f47d6f8f6dc3c3cf55f6016bc75ae
MD5 (TSPDatabaseMessages.proto) = ab86cd136e1702555b5080f59609f2f1
MD5 (TSPMessages.proto) = 1a33eb51dfb1f8ccbabdc6e236690ce0
MD5 (TSSArchives.proto) = dcb28462d9fcd774010c34d2371ad579
MD5 (TSSArchives.sos.proto) = 951c42b9fd732552ffc4944fe414890a
MD5 (TSTArchives.proto) = 6dfb293ca832fc8d4e550c46d4d49815
MD5 (TSTArchives.sos.proto) = d085aa1e4449f85bcc37689c7d6e4c5f
MD5 (TSTCommandArchives.proto) = a81c34ef4847236a44743b221b6a27f4
MD5 (TSTStylePropertyArchiving.proto) = 463bb1c64fe02a484b4e9e655d6af391
MD5 (TSWPArchives.proto) = 3c2b0d6c6e7ab3655d573e959ce4b8d5
MD5 (TSWPArchives.sos.proto) = 06be0c76ea913408da04153b292f08e0
MD5 (TSWPCommandArchives.proto) = ade9e806dd98f5d1509d2ef5b0284af1
```
[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/notes?pixel)](https://github.com/SheetJS/notes)

View File

@ -1,4 +1,4 @@
# iWork 2013+
# iWork 2013+
This covers the standalone ZIP-based format. iCloud uses a different format to
support differential sync between devices.
@ -6,7 +6,7 @@ support differential sync between devices.
The ZIP container holds a number of Mac binary "property list" files (`.plist`)
which can be safely ignored or blanked. It also can hold preview images that
can be safely ignored.
## File Structure
The `.numbers` file is a ZIP file containing a number of `.iwa` entries. The
@ -23,7 +23,43 @@ Each block follows the Snappy compressed format as described in
apps do not expect a particular compression level, and it is possible to create
the equivalent of a "STORED" block.
# Data Storage
## Protocol Buffers
Most of the data is stored in Protocol Buffer ("protobuf") wire messages.
The iWork apps (Keynote, Numbers, Pages) include embedded Protocol Buffers
definitions as part of the file format processors.
The [`otorp` package on `npm`](https://npm.im/otorp) ships with a command-line
tool for extracting definitions from a Mach-O binary.
Note that some fields marked as `required` have been changed to `optional` in
later versions. File parsers should assume all fields are optional.
### App-Specific Definitions
The listed definitions only appear in one app:
**Keynote**
- `KNArchives.proto`
- `KNArchives.sos.proto`
- `KNCommandArchives.proto`
- `KNCommandArchives.sos.proto`
**Numbers**
- `TNArchives.proto`
- `TNArchives.sos.proto`
- `TNCommandArchives.proto`
- `TNCommandArchives.sos.proto`
**Pages**
- `TPArchives.proto`
- `TPCommandArchives.proto`
- `TPCommandArchives.sos.proto`
The other files are common across the apps.
## Data Storage
The decompressed data is a series of chunks.
@ -39,7 +75,7 @@ The message type from the `MessageInfo` (tag 1, type `uint32`) corresponds to a
dynamic registry spread across the embedded frameworks. The actual message data
is a protobuf packet.
## Dynamic Registry and Message Types
### Dynamic Registry and Message Types
The `.TSP.Reference` type acts as a pointer, referencing another message. The
references do not include message type info, so readers and writers must be
@ -74,44 +110,134 @@ message .TST.TableModelArchive {
}
```
The reference in field 2 from `.TST.TableInfoArchive` is expected to be of type
The reference in field 2 from `.TST.TableInfoArchive` is expected to be of type
`.TST.TableModelArchive` so the latter must be registered.
`.TST.DataStore` is the type of field 4 from `.TST.TableModelArchive`. Since it
is not referenced indirectly, the message type does not have to be registered.
## iWork Proto Definitions
## Data Storage in Numbers files
The iWork apps (Keynote, Numbers, Pages) include embedded Protocol Buffers
definitions as part of the file format processors.
The root message (type 1) has the following structure:
The [`otorp` package on `npm`](https://npm.im/otorp) ships with a command-line
tool for extracting definitions from a Mach-O binary.
```proto
message .TN.DocumentArchive {
repeated .TSP.Reference sheets = 1;
```
### App-Specific Definitions
The message referenced in field 1 (type 2) has the following structure:
The listed definitions only appear in one app:
```proto
message .TN.SheetArchive {
required string name = 1;
repeated .TSP.Reference drawable_infos = 2;
```
**Keynote**
- `KNArchives.proto`
- `KNArchives.sos.proto`
- `KNCommandArchives.proto`
- `KNCommandArchives.sos.proto`
`name` is the name of the worksheet. Each worksheet can contain multiple tables.
The messages referenced in field 2 (type 6000) refer to `.TST.TableInfoArchive`
**Numbers**
- `TNArchives.proto`
- `TNArchives.sos.proto`
- `TNCommandArchives.proto`
- `TNCommandArchives.sos.proto`
### Table Storage in iWork
**Pages**
- `TPArchives.proto`
- `TPCommandArchives.proto`
- `TPCommandArchives.sos.proto`
Table structure is shared across iWork apps. The protobuf definitions are
identical. The root element for tables is the `.TST.TableInfoArchive`:
The other files are common across the apps.
```proto
message .TST.TableInfoArchive {
required .TSP.Reference tableModel = 2;
```
## Determining File Type
The message referenced in field 2 (type 6001) has the following structure:
```proto
message .TST.TableModelArchive {
required .TST.DataStore base_data_store = 4;
required uint32 number_of_rows = 6;
required uint32 number_of_columns = 7;
// ...
}
message .TST.DataStore {
required .TST.TileStorage tiles = 3;
required .TSP.Reference stringTable = 4;
optional .TSP.Reference formulaErrorTable = 12;
optional .TSP.Reference rich_text_table = 17;
// ...
}
message .TST.TileStorage {
message .TST.TileStorage.Tile {
required uint32 tileid = 1;
required .TSP.Reference tile = 2;
}
repeated .TST.TileStorage.Tile tiles = 1;
// ...
}
```
Numbers uses a "shared string table" like Excel. Excel stores both plaintext and
rich strings in the same table, while Numbers has two separate tables.
The message referenced in the tiles (type 6002) has the following structure:
```proto
message .TST.Tile {
repeated .TST.TileRowInfo rowInfos = 5;
// ...
}
message .TST.TileRowInfo {
required uint32 tile_row_index = 1;
required uint32 cell_count = 2;
required bytes cell_storage_buffer_pre_bnc = 3;
required bytes cell_offsets_pre_bnc = 4;
// ...
}
```
Each `.TST.TileRowInfo` message holds the data and property references for a
single row in the table.
The cell offset fields are an array of 16-bit integers that describe offsets
within the respective storage buffers. `0xFFFF` indicates that the column index
for the given row is not included.
A 32-bit flag is stored at offset 4, describing which fields are in the cell:
| field description | bit mask | size | notes |
|:------------------|---------:|-----:|-------------------------------------|
| Error index | `0x0100` | 4 | index into formula error table |
| Rich text index | `0x0200` | 4 | index into rich shared string table |
| Plaintext index | `0x0010` | 4 | index into shared string table |
| Double value | `0x0020` | 8 | raw value (IEEE754 double) |
| Datetime value | `0x0040` | 8 | number of seconds since 1/1/2001 |
The starting offset depends on the cell storage version (`0-1` or `2-3`), which
is stored in the first byte of each cell:
| description | v1 offset | v3 offset |
|:----------------|---------------------------:|----------------------------:|
| Error index |`8 + POPCNT(f & 0x008E) * 4`|`12 + POPCNT(f & 0x0C8E) * 4`|
| Rich text index |`8 + POPCNT(f & 0x018E) * 4`|`12 + POPCNT(f & 0x0D8E) * 4`|
| Plaintext index |`8 + POPCNT(f & 0x138E) * 4`|`12 + POPCNT(f & 0x3F8E) * 4`|
| Double value |`8 + POPCNT(f & 0x139E) * 4`|`12 + POPCNT(f & 0x3F9E) * 4`|
| Datetime value |`8 + POPCNT(f & 0x13BE) * 4`|`12 + POPCNT(f & 0x3FBE) * 4`|
The cell type is stored at byte offset 2:
| type | value |
|-----:|:-----------------------------------------------------------------|
| `0` | "blank cell" (no value) |
| `2` | "Double value" (IEEE754 double) |
| `3` | get value from shared string table at "Plaintext index" |
| `5` | interpret "Datetime value" as number of seconds since 1/1/2001 |
| `6` | `true` if "Double value" is greater than zero, `false` otherwise |
| `7` | interpret "Double value" as number of seconds (Duration) |
| `8` | get error from formula error table at "Error index" |
| `9` | get value from rich shared string table at "Rich text index" |
## Misc
### Determining File Type
All three file types use the same message tag (1) for the root `DocumentArchive`
message. However, the required fields vary between formats.
@ -143,53 +269,11 @@ Pages is the only format to use and require field 15. Keynote requires field 2,
a field that does not appear in Numbers.
## Misc
### MD5 Checksums
The checksums are based on definitions from version 11.2 (7032.0.145)
- [11.2](./112.md)
```proto
MD5 (KNArchives.proto) = 4d15ddb1dacdf0e2a321d09234130abd
MD5 (KNArchives.sos.proto) = 5b8e5101e946eaddca341b78b5f6e660
MD5 (KNCommandArchives.proto) = 3aa3d1796a8cb1f1cdaccca2d7a67704
MD5 (KNCommandArchives.sos.proto) = af09afc07db32005ff1eaf183fe99c02
MD5 (TNArchives.proto) = c6af8ddec038ccfb8d683f2c92bb8b2c
MD5 (TNArchives.sos.proto) = 7a8ce65fa23d554fba3549f6ec5d1aa5
MD5 (TNCommandArchives.proto) = 4b7e7ebe4583d0a0166118292d338272
MD5 (TNCommandArchives.sos.proto) = 279eafd525689d2e8805e951f9a0b9a6
MD5 (TPArchives.proto) = 5ae0d81aded078509f8b1dfc48b1b118
MD5 (TPCommandArchives.proto) = 1cf18ff056e7635f1cfcb68ec943892b
MD5 (TPCommandArchives.sos.proto) = 7b627bc1e053a03a762952c5d647a97a
MD5 (TSAArchives.proto) = 6a246bb28e425ff796c285c36132b2ef
MD5 (TSAArchives.sos.proto) = 8b55a583851f1d11a2af842f20223bc7
MD5 (TSACommandArchives.sos.proto) = d0e4741e2fea6dde83b17a7e73d12af7
MD5 (TSCEArchives.proto) = abe4e1a7256019562a3790eb58c6b9f9
MD5 (TSCH3DArchives.proto) = 2ac8ee10b2e28c5d201524a388a2eb9e
MD5 (TSCHArchives.Common.proto) = 6cc9c2954517f29d8247e4d59caa980f
MD5 (TSCHArchives.GEN.proto) = 25bd8a10b1646970fb96f411f14821c9
MD5 (TSCHArchives.proto) = e4f10e18142ee8b0e2bb6f94b03e05d6
MD5 (TSCHArchives.sos.proto) = 4452d0264a26b946524ebbbb2dec56e0
MD5 (TSCHCommandArchives.proto) = 75cc30b1ba010f1172d0d08eff8bfa9f
MD5 (TSCHPreUFFArchives.proto) = 5a9c935194bd772a30774071ac7a7f64
MD5 (TSDArchives.proto) = 5cc5d066f5b394508ec13f64dedba7b7
MD5 (TSDArchives.sos.proto) = ccbc06fd5b8db95e78b4b404a0d2177f
MD5 (TSDCommandArchives.proto) = 878c419d11b043333e1ed5148a868626
MD5 (TSKArchives.proto) = bd76c5489f2bfb5a94750f1a83969549
MD5 (TSKArchives.sos.proto) = 5e68b45687d33b9e2cdc0f64d76988f9
MD5 (TSPArchiveMessages.proto) = 5bd640aed4df2758a393143096cbaf70
MD5 (TSPDatabaseMessages.proto) = ab86cd136e1702555b5080f59609f2f1
MD5 (TSPMessages.proto) = 1a33eb51dfb1f8ccbabdc6e236690ce0
MD5 (TSSArchives.proto) = 301ea13a293ada201db8edf9b0f83d1d
MD5 (TSSArchives.sos.proto) = 951c42b9fd732552ffc4944fe414890a
MD5 (TSTArchives.proto) = 59a353cf0dd34b31ee932149c517e80d
MD5 (TSTArchives.sos.proto) = d085aa1e4449f85bcc37689c7d6e4c5f
MD5 (TSTCommandArchives.proto) = 4c1bbca393199455635dd80a89d9f61b
MD5 (TSTStylePropertyArchiving.proto) = 463bb1c64fe02a484b4e9e655d6af391
MD5 (TSWPArchives.proto) = 7d171fbf72e184957773c6ad84ff4f09
MD5 (TSWPArchives.sos.proto) = 06be0c76ea913408da04153b292f08e0
MD5 (TSWPCommandArchives.proto) = dabb9f2e85ffdba52aaca61064303c71
```
- [12.0](./120.md)
[![Analytics](https://ga-beacon.appspot.com/UA-36810333-1/SheetJS/notes?pixel)](https://github.com/SheetJS/notes)