From 2cbd88a15b19bcb1fe0ef709a8e4186933248e38 Mon Sep 17 00:00:00 2001 From: Kevin Bortis Date: Sun, 29 Mar 2026 15:28:08 +0200 Subject: [PATCH] spec: clarify LZMA properties prefix in cmpLength and cmpCrc64 scope --- docs/spec/appendixes/compression.adoc | 31 ++++++++++++++++++++++-- docs/spec/blocks/data.adoc | 4 +-- docs/spec/blocks/datastream_payload.adoc | 4 +-- docs/spec/blocks/ddt.adoc | 4 +-- docs/spec/blocks/ddt2.adoc | 4 +-- 5 files changed, 37 insertions(+), 10 deletions(-) diff --git a/docs/spec/appendixes/compression.adoc b/docs/spec/appendixes/compression.adoc index 09c0a76..c560331 100644 --- a/docs/spec/appendixes/compression.adoc +++ b/docs/spec/appendixes/compression.adoc @@ -12,13 +12,40 @@ For the latest and most accurate listing, refer to the `libaaruformat` source. |Algorithm |0 |None -|1 |LZMA — stream prepended by 5 bytes of parameters +|1 |LZMA — 5-byte properties prefix followed by compressed stream |2 |FLAC -|3 |LZMA after Claunia Subchannel Transform (see Appendix D) — stream prepended by 5 bytes of parameters +|3 |LZMA after Claunia Subchannel Transform (see Appendix D) — 5-byte properties prefix followed by compressed stream |4 |Zstandard (zstd) — standard zstd frame, no prefix |5 |Zstandard after Claunia Subchannel Transform (see Appendix D) — standard zstd frame, no prefix |=== +=== 📝 Notes on LZMA Properties Prefix (IDs 1 and 3) + +For LZMA-compressed blocks, the on-disk payload immediately following the block header is: + +---- +[ 5-byte LZMA properties ] [ compressed stream ] +---- + +The `cmpLength` field in the block header covers the **entire payload** including the 5-byte properties prefix: + +---- +cmpLength = 5 + compressed_stream_size +---- + +The `cmpCrc64` checksum covers only the **compressed stream**, excluding the properties prefix: + +---- +cmpCrc64 = CRC64(payload[5 .. cmpLength - 1]) +---- + +To decompress: + +1. Read `cmpLength` bytes from the file position after the block header. +2. The first 5 bytes are LZMA properties (see LZMA SDK specification). +3. The remaining `cmpLength - 5` bytes are the compressed stream. +4. Decompress into `length` bytes using the properties and the compressed stream. + === 📝 Notes on Zstandard (IDs 4 and 5) Unlike LZMA (IDs 1 and 3), Zstandard compressed payloads are stored as self-contained zstd frames. diff --git a/docs/spec/blocks/data.adoc b/docs/spec/blocks/data.adoc index f3aecb5..ed55723 100644 --- a/docs/spec/blocks/data.adoc +++ b/docs/spec/blocks/data.adoc @@ -56,7 +56,7 @@ typedef struct BlockHeader |uint32_t |4 bytes |cmpLength -|The size in bytes of the compressed data that follows this header. +|The total size in bytes of the compressed payload that follows this header. For LZMA (IDs 1, 3) this includes the 5-byte properties prefix; see Appendix C for details. |uint32_t |4 bytes @@ -66,7 +66,7 @@ typedef struct BlockHeader |uint64_t |8 bytes |cmpCrc64 -|The CRC64-ECMA checksum of the compressed data that follows this header. +|CRC64-ECMA of the compressed payload. For LZMA (IDs 1, 3) this covers the compressed stream only, excluding the 5-byte properties prefix; see Appendix C for details. |uint64_t |8 bytes diff --git a/docs/spec/blocks/datastream_payload.adoc b/docs/spec/blocks/datastream_payload.adoc index faf98a7..e9341f5 100644 --- a/docs/spec/blocks/datastream_payload.adoc +++ b/docs/spec/blocks/datastream_payload.adoc @@ -54,7 +54,7 @@ typedef struct DataStreamPayloadHeader |uint32_t |4 bytes |cmpLength -|Compressed length in bytes (includes LZMA properties if compression = Lzma) +|Total compressed payload size in bytes. For LZMA this includes the 5-byte properties prefix; see Appendix C. |uint32_t |4 bytes @@ -64,7 +64,7 @@ typedef struct DataStreamPayloadHeader |uint64_t |8 bytes |cmpCrc64 -|CRC64-ECMA checksum of the compressed payload data +|CRC64-ECMA of the compressed payload. For LZMA this covers the compressed stream only, excluding the 5-byte properties prefix; see Appendix C. |uint64_t |8 bytes diff --git a/docs/spec/blocks/ddt.adoc b/docs/spec/blocks/ddt.adoc index b0469d0..23cc468 100644 --- a/docs/spec/blocks/ddt.adoc +++ b/docs/spec/blocks/ddt.adoc @@ -61,7 +61,7 @@ typedef struct DdtHeader |uint64_t |8 bytes |cmpLength -|The size in bytes of the compressed table that follows this header. +|The total size in bytes of the compressed payload that follows this header. For LZMA this includes the 5-byte properties prefix; see Appendix C. |uint64_t |8 bytes @@ -71,7 +71,7 @@ typedef struct DdtHeader |uint64_t |8 bytes |cmpCrc64 -|The CRC64-ECMA checksum of the compressed table that follows this header. +|CRC64-ECMA of the compressed payload. For LZMA this covers the compressed stream only, excluding the 5-byte properties prefix; see Appendix C. |uint64_t |8 bytes diff --git a/docs/spec/blocks/ddt2.adoc b/docs/spec/blocks/ddt2.adoc index e1d034f..1f5ae32 100644 --- a/docs/spec/blocks/ddt2.adoc +++ b/docs/spec/blocks/ddt2.adoc @@ -114,7 +114,7 @@ Writers *MUST NOT* produce this identifier in new images. |uint64_t |8 bytes |cmpLength -|The size in bytes of the compressed table that follows this header. +|The total size in bytes of the compressed payload that follows this header. For LZMA this includes the 5-byte properties prefix; see Appendix C. |uint64_t |8 bytes @@ -124,7 +124,7 @@ Writers *MUST NOT* produce this identifier in new images. |uint64_t |8 bytes |cmpCrc64 -|The CRC64-ECMA checksum of the compressed table that follows this header. +|CRC64-ECMA of the compressed payload. For LZMA this covers the compressed stream only, excluding the 5-byte properties prefix; see Appendix C. |uint64_t |8 bytes