Enhance documentation for various structures with detailed descriptions and formatting improvements

2025-12-16 19:24:40 +00:00 · 2025-10-01 05:35:39 +01:00
parent 1f91ad1e08
commit 41aee42c53
16 changed files with 1935 additions and 1273 deletions
--- a/include/aaruformat/structs/checksum.h
+++ b/include/aaruformat/structs/checksum.h
@@ -19,29 +19,80 @@
 #ifndef LIBAARUFORMAT_CHECKSUM_H
 #define LIBAARUFORMAT_CHECKSUM_H

+#include <stdint.h>  // Fixed-width integer types for on-disk structures.
+
 #pragma pack(push, 1)

 /**
- *     Checksum block, contains a checksum of all user data sectors (except for optical discs that is 2352 uint8_ts raw
- *     sector if available
- *  */
-typedef struct ChecksumHeader {
-    /**Identifier, <see cref="BlockType.ChecksumBlock" /> */
-    uint32_t identifier;
-    /**Length in uint8_ts of the block */
-    uint32_t length;
-    /**How many checksums follow */
-    uint8_t entries;
+ * \file aaruformat/structs/checksum.h
+ * \brief On-disk layout definitions for the checksum block (BlockType::ChecksumBlock).
+ *
+ * A checksum block stores one or more whole-image (user data) checksums. For optical media the
+ * user data definition follows the format's raw sector rules (e.g. 2352-byte raw sector when available).
+ *
+ * Binary layout (all integers are little-endian, structure is packed):
+ *
+ *  +------------------------------+-------------------------------+
+ *  | Field                        | Size (bytes)                  |
+ *  +==============================+===============================+
+ *  | ChecksumHeader               | sizeof(ChecksumHeader)=9      |
+ *  |   identifier                 | 4 (BlockType::ChecksumBlock)  |
+ *  |   length                     | 4 (payload bytes that follow)|
+ *  |   entries                    | 1 (number of checksum entries)|
+ *  +------------------------------+-------------------------------+
+ *  | Repeated for each entry:                                     |
+ *  |   ChecksumEntry              | sizeof(ChecksumEntry)=5       |
+ *  |     type                     | 1 (ChecksumAlgorithm)         |
+ *  |     length                   | 4 (digest length)             |
+ *  |   digest bytes               | length                        |
+ *  +------------------------------+-------------------------------+
+ *
+ * Thus, the payload size (ChecksumHeader.length) MUST equal the sum over all entries of:
+ *   sizeof(ChecksumEntry) + entry.length.
+ *
+ * Typical digest lengths:
+ *  - Md5: 16 bytes
+ *  - Sha1: 20 bytes
+ *  - Sha256: 32 bytes
+ *  - SpamSum: variable length ASCII, NOT null-terminated on disk (a terminating '\0' may be appended in memory).
+ *
+ * \warning The structures are packed; never rely on host compiler default padding or directly casting from a buffer
+ *          without ensuring correct endianness if porting to big-endian systems (current implementation assumes LE).
+ *
+ * \see BlockType
+ * \see ChecksumAlgorithm
+ */
+
+/**
+ * \struct ChecksumHeader
+ * \brief Header that precedes the sequence of checksum entries for a checksum block.
+ *
+ * After this header, exactly \ref ChecksumHeader::length bytes follow containing \ref ChecksumHeader::entries
+ * consecutive \ref ChecksumEntry records, each immediately followed by its digest payload.
+ */
+typedef struct ChecksumHeader
+{
+    uint32_t identifier;  ///< Block identifier, must be BlockType::ChecksumBlock.
+    uint32_t length;      ///< Length in bytes of the payload (all entries + their digest data, excluding this header).
+    uint8_t  entries;     ///< Number of checksum entries that follow in the payload.
 } ChecksumHeader;

-/**Checksum entry, followed by checksum data itself */
-typedef struct ChecksumEntry {
-    /**Checksum algorithm */
-    uint8_t type;
-    /**Length in uint8_ts of checksum that follows this structure */
-    uint32_t length;
+/**
+ * \struct ChecksumEntry
+ * \brief Per-checksum metadata immediately followed by the digest / signature bytes.
+ *
+ * For fixed-length algorithms the \ref length MUST match the known digest size. For SpamSum it is variable.
+ * The bytes immediately following this structure (not null-terminated) constitute the digest and are exactly
+ * \ref length bytes long.
+ *
+ * Order of entries is not mandated; readers should scan all entries and match by \ref type.
+ */
+typedef struct ChecksumEntry
+{
+    uint8_t  type;    ///< Algorithm used (value from \ref ChecksumAlgorithm).
+    uint32_t length;  ///< Length in bytes of the digest that immediately follows this structure.
 } ChecksumEntry;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_CHECKSUM_H
+#endif  // LIBAARUFORMAT_CHECKSUM_H
--- a/include/aaruformat/structs/data.h
+++ b/include/aaruformat/structs/data.h
@@ -19,37 +19,82 @@
 #ifndef LIBAARUFORMAT_DATA_H
 #define LIBAARUFORMAT_DATA_H

+#include <stdint.h>  // Fixed width integer types used in on-disk packed structs.
+
 #pragma pack(push, 1)

-/**Block header, precedes block data */
-typedef struct BlockHeader {
-    /**Identifier, <see cref="BlockType.DataBlock" /> */
-    uint32_t identifier;
-    /**Type of data contained by this block */
-    uint16_t type;
-    /**Compression algorithm used to compress the block */
-    uint16_t compression;
-    /**Size in uint8_ts of each sector contained in this block */
-    uint32_t sectorSize;
-    /**Compressed length for the block */
-    uint32_t cmpLength;
-    /**Uncompressed length for the block */
-    uint32_t length;
-    /**CRC64-ECMA of the compressed block */
-    uint64_t cmpCrc64;
-    /**CRC64-ECMA of the uncompressed block */
-    uint64_t crc64;
+/**
+ * \file aaruformat/structs/data.h
+ * \brief On-disk layout structures for data-bearing and geometry blocks.
+ *
+ * These packed structures describe the headers that precede variable-length payloads
+ * inside blocks whose identifiers are enumerated in \ref BlockType.
+ * All integer fields are stored little-endian on disk. The library currently assumes a
+ * little-endian host; if ported to a big-endian architecture explicit byte swapping will be required.
+ *
+ * Layout of a data block (BlockType::DataBlock):
+ *   BlockHeader (sizeof(BlockHeader) bytes)
+ *   Compressed payload (cmpLength bytes)
+ *
+ * Payload decoding:
+ *   - Apply the algorithm indicated by \ref BlockHeader::compression (\ref CompressionType) to the
+ *     cmpLength bytes following the header to obtain exactly \ref BlockHeader::length bytes.
+ *   - The uncompressed data MUST be an integer multiple of \ref BlockHeader::sectorSize.
+ *   - A CRC64-ECMA is provided for both compressed (cmpCrc64) and uncompressed (crc64) forms to allow
+ *     validation at either stage of the pipeline.
+ *
+ * Geometry block (BlockType::GeometryBlock) has a \ref GeometryBlockHeader followed by no additional
+ * fixed payload in the current format version; it conveys legacy CHS-style logical geometry metadata.
+ *
+ * \warning These structs are packed; do not take their address and assume natural alignment.
+ * \see BlockType
+ * \see DataType
+ * \see CompressionType
+ */
+
+/**
+ * \struct BlockHeader
+ * \brief Header preceding the compressed data payload of a data block (BlockType::DataBlock).
+ *
+ * Invariants:
+ *  - cmpLength > 0 unless length == 0 (empty block)
+ *  - length == 0 implies cmpLength == 0
+ *  - If compression == CompressionType::None then cmpLength == length
+ *  - length % sectorSize == 0
+ *
+ * Validation strategy (recommended for readers):
+ *  1. Verify identifier == BlockType::DataBlock.
+ *  2. Verify sectorSize is non-zero and a power-of-two or a commonly used size (512/1024/2048/4096/2352).
+ *  3. Verify invariants above and CRCs after (de)compression.
+ */
+typedef struct BlockHeader
+{
+    uint32_t identifier;   ///< Block identifier, must be BlockType::DataBlock.
+    uint16_t type;         ///< Logical data classification (value from \ref DataType).
+    uint16_t compression;  ///< Compression algorithm used (value from \ref CompressionType).
+    uint32_t sectorSize;   ///< Size in bytes of each logical sector represented in this block.
+    uint32_t cmpLength;    ///< Size in bytes of the compressed payload immediately following this header.
+    uint32_t length;       ///< Size in bytes of the uncompressed payload resulting after decompression.
+    uint64_t cmpCrc64;     ///< CRC64-ECMA of the compressed payload (cmpLength bytes).
+    uint64_t crc64;        ///< CRC64-ECMA of the uncompressed payload (length bytes).
 } BlockHeader;

-/**Geometry block, contains physical geometry information */
-typedef struct GeometryBlockHeader {
-    /**Identifier, <see cref="BlockType.GeometryBlock" /> */
-    uint32_t identifier;
-    uint32_t cylinders;
-    uint32_t heads;
-    uint32_t sectorsPerTrack;
+/**
+ * \struct GeometryBlockHeader
+ * \brief Legacy CHS style logical geometry metadata (BlockType::GeometryBlock).
+ *
+ * Total logical sectors implied by this header is cylinders * heads * sectorsPerTrack.
+ * Sector size is not included here and must be derived from context (e.g., accompanying metadata
+ * or defaulting to 512 for many block devices).
+ */
+typedef struct GeometryBlockHeader
+{
+    uint32_t identifier;       ///< Block identifier, must be BlockType::GeometryBlock.
+    uint32_t cylinders;        ///< Number of cylinders.
+    uint32_t heads;            ///< Number of heads (tracks per cylinder).
+    uint32_t sectorsPerTrack;  ///< Number of sectors per track.
 } GeometryBlockHeader;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_DATA_H
+#endif  // LIBAARUFORMAT_DATA_H
--- a/include/aaruformat/structs/ddt.h
+++ b/include/aaruformat/structs/ddt.h
@@ -19,71 +19,149 @@
 #ifndef LIBAARUFORMAT_DDT_H
 #define LIBAARUFORMAT_DDT_H

+#include <stdint.h>  // fixed-width types for on-disk layout
+
 #pragma pack(push, 1)

-/**Header for a deduplication table. Table follows it */
+/** \file aaruformat/structs/ddt.h
+ *  \brief On-disk headers for Deduplication Data Tables (DDT) versions 1 and 2.
+ *
+ * A DDT maps logical sector indices (LBAs within an image's logical address space) to (block, sector)
+ * pairs plus a base file offset, enabling content de-duplication inside the container. Two generations
+ * exist:
+ *  - DdtHeader  ("version 1") flat table.
+ *  - DdtHeader2 ("version 2") hierarchical, multi-level subtables for scalability.
+ *
+ * All integers are little-endian. Structures are packed (1-byte alignment). When porting to a big-endian
+ * architecture callers must perform byte swapping. Do not rely on compiler-introduced padding.
+ *
+ * Compression of the table body (entries array) follows the same conventions as data blocks: first
+ * decompress according to the compression enum, then validate CRC64 for uncompressed contents.
+ *
+ * Related enumerations:
+ *  - BlockType::DeDuplicationTable / BlockType::DeDuplicationTable2
+ *  - CompressionType
+ *  - DataType
+ *  - DdtSizeType (for DdtHeader2::sizeType)
+ */
+
+/**
+ * \struct DdtHeader
+ * \brief Header preceding a version 1 (flat) deduplication table body.
+ *
+ * Immediately after this header there are \ref entries table records (compressed if \ref compression != None).
+ * Each table record encodes a pointer using an 8-bit file offset component and a sector offset inside a block:
+ *   logicalEntryValue = ((uint64_t)fileByteOffset << shift) + sectorOffsetWithinBlock
+ * where fileByteOffset is measured in bytes (granularity depends on shift) and sectorOffsetWithinBlock is
+ * relative to the start of the referenced data block. The sector size must be taken from the corresponding
+ * data block(s) (see BlockHeader::sectorSize) or higher-level metadata.
+ *
+ * Invariants:
+ *  - cmpLength == length if compression == CompressionType::None
+ *  - length % (entrySize) == 0 after decompression (implementation-defined entry size)
+ *  - entries * entrySize == length
+ *  - entries > 0 implies length > 0
+ */
 typedef struct DdtHeader
 {
-    /**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
-    uint32_t identifier;
-    /**Type of data pointed by this DDT */
-    uint16_t type;
-    /**Compression algorithm used to compress the DDT */
-    uint16_t compression;
-    /**Each entry is ((uint8_t offset in file) &lt;&lt; shift) + (sector offset in block) */
-    uint8_t  shift;
-    /**How many entries are in the table */
-    uint64_t entries;
-    /**Compressed length for the DDT */
-    uint64_t cmpLength;
-    /**Uncompressed length for the DDT */
-    uint64_t length;
-    /**CRC64-ECMA of the compressed DDT */
-    uint64_t cmpCrc64;
-    /**CRC64-ECMA of the uncompressed DDT */
-    uint64_t crc64;
+    uint32_t identifier;   ///< Block identifier, must be BlockType::DeDuplicationTable.
+    uint16_t type;         ///< Data classification (\ref DataType) for sectors referenced by this table.
+    uint16_t compression;  ///< Compression algorithm for the table body (\ref CompressionType).
+    uint8_t  shift;        ///< Left shift applied to per-entry file offset component forming logicalEntryValue.
+    uint64_t entries;      ///< Number of deduplication entries contained in (uncompressed) table.
+    uint64_t cmpLength;    ///< Size in bytes of compressed entries payload.
+    uint64_t length;       ///< Size in bytes of uncompressed entries payload.
+    uint64_t cmpCrc64;     ///< CRC64-ECMA of the compressed payload.
+    uint64_t crc64;        ///< CRC64-ECMA of the uncompressed payload.
 } DdtHeader;

+/**
+ * \struct DdtHeader2
+ * \brief Header preceding a version 2 hierarchical deduplication table.
+ *
+ * Version 2 introduces multi-level tables to efficiently address very large images by subdividing
+ * the logical address space. Tables at higher levels partition regions; leaves contain direct
+ * (block, sector) entry mappings. Navigation uses \ref tableLevel (0 = root) and \ref levels (total depth).
+ *
+ * Logical sector (LBA) mapping (actual implementation in decode_ddt_{single,multi}_level_v2):
+ *  1. Let L be the requested logical sector (can be negative externally). Internal index I = L + negative.
+ *     Valid range: 0 <= I < blocks. (Total user-data sectors often = blocks - negative - overflow.)
+ *  2. If tableShift == 0 (single-level): entryIndex = I.
+ *     Else (multi-level):
+ *        itemsPerPrimaryEntry = 1 << tableShift
+ *        primaryIndex  = I / itemsPerPrimaryEntry
+ *        secondaryIndex = I % itemsPerPrimaryEntry
+ *        The primary table entry at primaryIndex yields a secondary DDT file offset (scaled by 2^blockAlignmentShift),
+ *        whose table entries are then indexed by secondaryIndex.
+ *  3. Read raw DDT entry value E (16-bit if sizeType == SmallDdtSizeType, 32-bit if BigDdtSizeType).
+ *  4. If E == 0: sector_status = SectorStatusNotDumped; offset=block_offset=0.
+ *     Otherwise extract:
+ *        statusBits = E >> 12  (small) or E >> 28 (big)
+ *        baseBits   = E & 0x0FFF (small) or E & 0x0FFFFFFF (big)
+ *        sectorOffsetWithinBlock = baseBits & ((1 << dataShift) - 1)
+ *        blockIndex              = baseBits >> dataShift
+ *        block_offset (bytes)    = blockIndex << blockAlignmentShift
+ *        offset (sector units inside block) = sectorOffsetWithinBlock
+ *  5. The consumer combines block_offset, offset, and the (external) logical sector size to locate data.
+ *
+ * Field roles:
+ *  - negative:   Count of leading negative LBAs supported; added to L to form internal index.
+ *  - overflow:   Count of trailing LBAs beyond the user area upper bound that are still dumped and have
+ *                normal DDT entries (e.g. optical disc lead-out). Symmetrical to 'negative' on the high end.
+ *  - start:      For secondary tables, base internal index covered (written when creating new tables). Current decoding
+ *                logic does not consult this field (future-proof placeholder).
+ *  - blockAlignmentShift: log2 alignment of stored data blocks (byte granularity of block_offset).
+ *  - dataShift:  log2 of the number of addressable sectors per increment of blockIndex bitfield unit.
+ *  - tableShift: log2 of number of logical sectors covered by a single primary-table pointer (multi-level only).
+ *  - sizeType:   Selects entry width (small=16b, big=32b) impacting available bits for blockIndex+offset.
+ *
+ * Notes & current limitations:
+ *  - User area sector count = blocks - negative - overflow.
+ *  - Valid external LBA range exposed by the image = [-negative, (blocks - negative - 1)].
+ *    * Negative range: [-negative, -1]
+ *    * User area range: [0, (blocks - negative - overflow - 1)]
+ *    * Overflow range: [(blocks - negative - overflow), (blocks - negative - 1)]
+ *  - Both negative and overflow ranges are stored with normal DDT entries (if present), enabling complete
+ *    reproduction of lead-in / lead-out or similar padding regions.
+ *  - start is presently ignored during decoding; integrity checks against it may be added in future revisions.
+ *  - No masking is applied to I besides array bounds; callers must ensure L is within representable range.
+ *
+ * Example (Compact Disc):
+ *  Disc has 360000 user sectors. Lead-in captured as 15000 negative sectors and lead-out as 15000 overflow sectors.
+ *    negative = 15000
+ *    overflow = 15000
+ *    user sectors = 360000
+ *    blocks (internal span) = negative + user + overflow = 390000
+ *    External LBA spans: -15000 .. 374999
+ *      * Negative: -15000 .. -1 (15000 sectors)
+ *      * User:      0 .. 359999 (360000 sectors)
+ *      * Overflow:  360000 .. 374999 (15000 sectors)
+ *  Internal index I for any external L is I = L + negative.
+ *  User area sector count reported to callers (ctx->imageInfo.Sectors) = blocks - negative - overflow = 360000.
+ */
 typedef struct DdtHeader2
 {
-    /**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
-    uint32_t identifier;
-    /**Type of data pointed by this DDT */
-    uint16_t type;
-    /**Compression algorithm used to compress the DDT */
-    uint16_t compression;
-    /**How many levels of subtables are present */
-    uint8_t  levels;
-    /**Which level this table belongs to */
-    uint8_t  tableLevel;
-    /**Pointer to absolute byte offset in file where the previous level table is located */
-    uint64_t previousLevelOffset;
-    /**Negative displacement of LBAs */
-    uint16_t negative;
-    /**Number of blocks in media */
-    uint64_t blocks;
-    /**Positive overflow displacement of LBAs */
-    uint16_t overflow;
-    /**First LBA contained in this table */
-    uint64_t start;
-    /**Block alignment boundaries */
-    uint8_t  blockAlignmentShift;
-    /**Data shift */
-    uint8_t  dataShift;
-    /**Table shift */
-    uint8_t  tableShift;
-    /**Size type */
-    uint8_t  sizeType;
-    /**Entries in this table */
-    uint64_t entries;
-    /**Compressed length for the DDT */
-    uint64_t cmpLength;
-    /**Uncompressed length for the DDT */
-    uint64_t length;
-    /**CRC64-ECMA of the compressed DDT */
-    uint64_t cmpCrc64;
-    /**CRC64-ECMA of the uncompressed DDT */
-    uint64_t crc64;
+    uint32_t identifier;           ///< Block identifier, must be BlockType::DeDuplicationTable2.
+    uint16_t type;                 ///< Data classification (\ref DataType) for sectors referenced by this table.
+    uint16_t compression;          ///< Compression algorithm for this table body (\ref CompressionType).
+    uint8_t  levels;               ///< Total number of hierarchy levels (root depth); > 0.
+    uint8_t  tableLevel;           ///< Zero-based level index of this table (0 = root, increases downward).
+    uint64_t previousLevelOffset;  ///< Absolute byte offset of the parent (previous) level table; 0 if root.
+    uint16_t negative;             ///< Leading negative LBA count; added to external L to build internal index.
+    uint64_t blocks;               ///< Total internal span (negative + usable + overflow) in logical sectors.
+    uint16_t overflow;  ///< Trailing dumped sectors beyond user area (overflow range), still mapped with entries.
+    uint64_t
+            start;  ///< Base internal index covered by this table (used for secondary tables; currently informational).
+    uint8_t blockAlignmentShift;  ///< 2^blockAlignmentShift = block alignment boundary in bytes.
+    uint8_t dataShift;            ///< 2^dataShift = sectors represented per increment in blockIndex field.
+    uint8_t tableShift;  ///< 2^tableShift = number of logical sectors per primary entry (multi-level only; 0 for
+                         ///< single-level or secondary tables).
+    uint8_t sizeType;    ///< Entry size variant (\ref DdtSizeType) controlling width of E.
+    uint64_t entries;    ///< Number of entries contained in (uncompressed) table payload.
+    uint64_t cmpLength;  ///< Compressed payload size in bytes.
+    uint64_t length;     ///< Uncompressed payload size in bytes.
+    uint64_t cmpCrc64;   ///< CRC64-ECMA of compressed table payload.
+    uint64_t crc64;      ///< CRC64-ECMA of uncompressed table payload.
 } DdtHeader2;

 #pragma pack(pop)
--- a/include/aaruformat/structs/dump.h
+++ b/include/aaruformat/structs/dump.h
@@ -19,42 +19,109 @@
 #ifndef LIBAARUFORMAT_DUMP_H
 #define LIBAARUFORMAT_DUMP_H

+#include <stdint.h> /* Fixed-width integer types for on‑disk packed structures */
+
 #pragma pack(push, 1)

-/**Dump hardware block, contains a list of hardware used to dump the media on this image */
-typedef struct DumpHardwareHeader {
-    /**Identifier, <see cref="BlockType.DumpHardwareBlock" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint16_t entries;
-    /**Size of the whole block, not including this header, in uint8_ts */
-    uint32_t length;
-    /**CRC64-ECMA of the block */
-    uint64_t crc64;
+/** \file aaruformat/structs/dump.h
+ *  \brief Packed on-disk structures describing hardware and software used during image acquisition.
+ *
+ *  A Dump Hardware block (identifier = BlockType::DumpHardwareBlock) records one or more dump "environments" –
+ *  typically combinations of a physical device (drive, controller, adapter) and the software stack that
+ *  performed the read operation. Each environment is represented by a \ref DumpHardwareEntry followed by a
+ *  sequence of UTF‑8 strings and an optional array of extent ranges (\ref DumpExtent, defined in context.h) that
+ *  delimit portions of the medium this environment contributed to.
+ *
+ *  Binary layout (little-endian, packed, all multi-byte integers LE):
+ *
+ *    DumpHardwareHeader (sizeof = 16 bytes)
+ *      identifier  (4)  -> BlockType::DumpHardwareBlock
+ *      entries     (2)  -> number of following hardware entries
+ *      length      (4)  -> total bytes of payload that follow this header
+ *      crc64       (8)  -> CRC64-ECMA of the payload bytes
+ *
+ *    Repeated for i in [0, entries):
+ *      DumpHardwareEntry (36 bytes)
+ *        manufacturerLength (4)
+ *        modelLength        (4)
+ *        revisionLength     (4)
+ *        firmwareLength     (4)
+ *        serialLength       (4)
+ *        softwareNameLength (4)
+ *        softwareVersionLength (4)
+ *        softwareOperatingSystemLength (4)
+ *        extents (4) -> number of DumpExtent structs after the strings
+ *
+ *      Variable-length UTF-8 strings (not NUL-terminated on disk) appear immediately after the entry, in the
+ *      exact order of the length fields above; each string is present only if its length > 0. The reader allocates
+ *      an extra byte to append '\0' for in-memory convenience.
+ *
+ *      Array of 'extents' DumpExtent structures (each 16 bytes: start, end) follows the strings if extents > 0.
+ *      The semantic of each extent is an inclusive [start, end] logical sector (or unit) range contributed by
+ *      this hardware/software combination.
+ *
+ *  CRC semantics:
+ *   - crc64 covers exactly 'length' bytes immediately following the header.
+ *   - For legacy images with header.imageMajorVersion <= AARUF_VERSION_V1 the original C# writer produced a
+ *     byte-swapped CRC; the library compensates internally (see process_dumphw_block()).
+ *
+ *  Invariants / validation recommendations:
+ *   - identifier == BlockType::DumpHardwareBlock
+ *   - Accumulated size of all (entry + strings + extents arrays) == length
+ *   - All length fields are trusted only after bounds checking against remaining payload bytes
+ *   - Strings are raw UTF-8 data with no implicit terminator
+ *   - extents * sizeof(DumpExtent) fits inside remaining payload
+ *
+ *  Memory management notes (runtime library):
+ *   - Each string is malloc'ed with +1 byte for terminator during processing.
+ *   - Extents array is malloc'ed per entry when extents > 0.
+ *   - See aaruformatContext::dumpHardwareEntriesWithData for owning pointers.
+ *
+ *  \warning Structures are packed; never rely on natural alignment when mapping from a byte buffer.
+ *  \see DumpHardwareHeader
+ *  \see DumpHardwareEntry
+ *  \see DumpExtent (in context.h)
+ *  \see BlockType
+ */
+
+/** \struct DumpHardwareHeader
+ *  \brief Header that precedes a sequence of dump hardware entries and their variable-length payload.
+ */
+typedef struct DumpHardwareHeader
+{
+    uint32_t identifier;  ///< Block identifier, must be BlockType::DumpHardwareBlock.
+    uint16_t entries;     ///< Number of DumpHardwareEntry records that follow.
+    uint32_t length;      ///< Total payload bytes after this header (sum of entries, strings, and extents arrays).
+    uint64_t crc64;       ///< CRC64-ECMA of the payload (byte-swapped for legacy v1 images, handled automatically).
 } DumpHardwareHeader;

-/**Dump hardware entry, contains length of strings that follow, in the same order as the length, this structure */
-typedef struct DumpHardwareEntry {
-    /**Length of UTF-8 manufacturer string */
-    uint32_t manufacturerLength;
-    /**Length of UTF-8 model string */
-    uint32_t modelLength;
-    /**Length of UTF-8 revision string */
-    uint32_t revisionLength;
-    /**Length of UTF-8 firmware version string */
-    uint32_t firmwareLength;
-    /**Length of UTF-8 serial string */
-    uint32_t serialLength;
-    /**Length of UTF-8 software name string */
-    uint32_t softwareNameLength;
-    /**Length of UTF-8 software version string */
-    uint32_t softwareVersionLength;
-    /**Length of UTF-8 software operating system string */
-    uint32_t softwareOperatingSystemLength;
-    /**How many extents are after the strings */
-    uint32_t extents;
+/** \struct DumpHardwareEntry
+ *  \brief Per-environment length table describing subsequent UTF-8 strings and optional extent array.
+ *
+ *  Immediately after this structure the variable-length UTF‑8 strings appear in the documented order, each
+ *  present only if its corresponding length is non-zero. No padding is present between strings. When all
+ *  strings are consumed, an array of \ref DumpExtent follows if \ref extents > 0.
+ *
+ *  All length fields measure bytes (not characters) and exclude any in-memory NUL terminator added by the reader.
+ *
+ *  Typical semantics:
+ *   - manufacturer/model/revision/firmware/serial identify the hardware device.
+ *   - softwareName/softwareVersion/softwareOperatingSystem identify the acquisition software environment.
+ *   - extents list which logical ranges this environment actually dumped (useful for multi-device composites).
+ */
+typedef struct DumpHardwareEntry
+{
+    uint32_t manufacturerLength;             ///< Length in bytes of manufacturer UTF-8 string.
+    uint32_t modelLength;                    ///< Length in bytes of model UTF-8 string.
+    uint32_t revisionLength;                 ///< Length in bytes of revision / hardware revision string.
+    uint32_t firmwareLength;                 ///< Length in bytes of firmware version string.
+    uint32_t serialLength;                   ///< Length in bytes of device serial number string.
+    uint32_t softwareNameLength;             ///< Length in bytes of dumping software name string.
+    uint32_t softwareVersionLength;          ///< Length in bytes of dumping software version string.
+    uint32_t softwareOperatingSystemLength;  ///< Length in bytes of host operating system string.
+    uint32_t extents;                        ///< Number of DumpExtent records following the strings (0 = none).
 } DumpHardwareEntry;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_DUMP_H
+#endif  // LIBAARUFORMAT_DUMP_H
--- a/include/aaruformat/structs/header.h
+++ b/include/aaruformat/structs/header.h
@@ -19,73 +19,111 @@
 #ifndef LIBAARUFORMAT_HEADER_H
 #define LIBAARUFORMAT_HEADER_H

-#define AARU_HEADER_APP_NAME_LEN 64
-#define GUID_SIZE 16
+/** \file aaruformat/structs/header.h
+ *  \brief On-disk container header structures (v1 and v2) for Aaru images.
+ *
+ *  These packed headers appear at the very beginning (offset 0) of every Aaru image file and
+ *  advertise container format version, creator application, indexing offset and optional extended
+ *  feature capability bitfields (v2+). All multi-byte integers are little-endian. Strings stored
+ *  in the fixed-size application field are UTF‑16LE and zero padded (not necessarily NUL-terminated
+ *  if fully filled). The GUID field (v2) allows derivative / child images to reference an origin.
+ *
+ *  Version progression:
+ *   - v1: \ref AaruHeader (no GUID, no alignment or shift metadata, no feature bitfields).
+ *   - v2: \ref AaruHeaderV2 introduces GUID, block/data/table shift hints (mirroring DDT metadata),
+ *         and three 64‑bit feature bitmaps to negotiate reader/writer compatibility.
+ *
+ *  Compatibility handling (recommended logic for consumers):
+ *   1. If any bit set in featureIncompatible is not implemented by the reader: abort (cannot safely read/write).
+ *   2. Else if any bit set in featureCompatibleRo is not implemented: allow read‑only operations.
+ *   3. Bits only present in featureCompatible but not implemented MAY be ignored for both read/write while
+ *      still preserving round‑trip capability (writer should not clear unknown bits when re‑saving).
+ *
+ *  Alignment & shift semantics (duplicated here for quick reference, see DdtHeader2 for full details):
+ *   - blockAlignmentShift: underlying blocks are aligned to 2^blockAlignmentShift bytes.
+ *   - dataShift: data pointer / DDT entry low bits encode offsets modulo 2^dataShift sectors/items.
+ *   - tableShift: primary DDT entries span 2^tableShift logical sectors (0 implies single-level tables).
+ *
+ *  Invariants:
+ *   - identifier == AARU_MAGIC (external constant; not defined here).
+ *   - For v1: sizeof(AaruHeader) exact and indexOffset > 0 (indexOffset == 0 => corrupt/unreadable image).
+ *   - For v2: sizeof(AaruHeaderV2) exact; indexOffset > 0; blockAlignmentShift, dataShift, tableShift within
+ *             sane bounds (e.g. < 63). Zero is permissible only for the shift fields (not for indexOffset).
+ *
+ *  Security / robustness considerations:
+ *   - Always bounds-check indexOffset against file size before seeking.
+ *   - Treat application field as untrusted UTF‑16LE; validate surrogate pairs if necessary.
+ *   - Unknown feature bits MUST be preserved if a file is rewritten to avoid capability loss.
+ */
+
+#define AARU_HEADER_APP_NAME_LEN 64 /**< Size in bytes (UTF-16LE) of application name field (32 UTF-16 code units). */
+#define GUID_SIZE                16 /**< Size in bytes of GUID / UUID-like binary identifier. */

 #pragma pack(push, 1)

-/**Header, at start of file */
-typedef struct AaruHeader {
-    /**Header identifier, <see cref="AARU_MAGIC" /> */
-    uint64_t identifier;
-    /**UTF-16LE name of the application that created the image */
-    uint8_t application[AARU_HEADER_APP_NAME_LEN];
-    /**Image format major version. A new major version means a possibly incompatible change of format */
-    uint8_t imageMajorVersion;
-    /**Image format minor version. A new minor version indicates a compatible change of format */
-    uint8_t imageMinorVersion;
-    /**Major version of the application that created the image */
-    uint8_t applicationMajorVersion;
-    /**Minor version of the application that created the image */
-    uint8_t applicationMinorVersion;
-    /**Type of media contained on image */
-    uint32_t mediaType;
-    /**Offset to index */
-    uint64_t indexOffset;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
-    int64_t creationTime;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
-    int64_t lastWrittenTime;
+/** \struct AaruHeader
+ *  \brief Version 1 container header placed at offset 0 for legacy / initial format.
+ *
+ *  Field summary:
+ *   - identifier: magic signature (AARU_MAGIC) identifying the container.
+ *   - application: UTF‑16LE creator application name (fixed 64 bytes, zero padded).
+ *   - imageMajorVersion / imageMinorVersion: container format version of the file itself (not the app).
+ *   - applicationMajorVersion / applicationMinorVersion: version of the creating application.
+ *   - mediaType: media type enumeration (\ref MediaType).
+ *   - indexOffset: byte offset to the first index block (must be > 0).
+ *   - creationTime / lastWrittenTime: 64-bit Windows FILETIME timestamps (100 ns intervals since 1601-01-01 UTC).
+ */
+typedef struct AaruHeader
+{
+    uint64_t identifier;                             ///< File magic (AARU_MAGIC).
+    uint8_t  application[AARU_HEADER_APP_NAME_LEN];  ///< UTF-16LE creator application name (fixed-size buffer).
+    uint8_t  imageMajorVersion;        ///< Container format major version (incompatible changes when incremented).
+    uint8_t  imageMinorVersion;        ///< Container format minor version (backward compatible evolutions).
+    uint8_t  applicationMajorVersion;  ///< Creator application major version.
+    uint8_t  applicationMinorVersion;  ///< Creator application minor / patch version.
+    uint32_t mediaType;                ///< Media type enumeration (value from \ref MediaType).
+    uint64_t indexOffset;      ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
+    int64_t  creationTime;     ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
+    int64_t  lastWrittenTime;  ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
 } AaruHeader;

-/**Header, at start of file */
-typedef struct AaruHeaderV2 {
-    /**Header identifier, see AARU_MAGIC */
-    uint64_t identifier;
-    /**UTF-16LE name of the application that created the image */
-    uint8_t application[AARU_HEADER_APP_NAME_LEN];
-    /**Image format major version. A new major version means a possibly incompatible change of format */
-    uint8_t imageMajorVersion;
-    /**Image format minor version. A new minor version indicates a compatible change of format */
-    uint8_t imageMinorVersion;
-    /**Major version of the application that created the image */
-    uint8_t applicationMajorVersion;
-    /**Minor version of the application that created the image */
-    uint8_t applicationMinorVersion;
-    /**Type of media contained on image */
-    uint32_t mediaType;
-    /**Offset to index */
-    uint64_t indexOffset;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
-    int64_t creationTime;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
-    int64_t lastWrittenTime;
-    /**Unique identifier that allows children images to recognize and find this image.*/
-    uint8_t guid[GUID_SIZE];
-    /**Block alignment shift. All blocks in the image are aligned at 2 << blockAlignmentShift bytes */
-    uint8_t blockAlignmentShift;
-    /**Data shift. All data blocks in the image contain 2 << dataShift items at most */
-    uint8_t dataShift;
-    /**Table shift. All deduplication tables in the image use this shift to calculate the position of an item */
-    uint8_t tableShift;
-    /**Features used in this image that if unsupported are still compatible for reading and writing implementations */
-    uint64_t featureCompatible;
-    /**Features used in this image that if unsupported are still compatible for reading implementations but not for writing */
-    uint64_t featureCompatibleRo;
-    /**Featured used in this image that if unsupported prevent reading or writing the image*/
-    uint64_t featureIncompatible;
+/** \struct AaruHeaderV2
+ *  \brief Version 2 container header with GUID, alignment shifts, and feature negotiation bitmaps.
+ *
+ *  Additions over v1:
+ *   - guid: stable 128-bit identifier enabling linkage by derivative images.
+ *   - blockAlignmentShift / dataShift / tableShift: global structural hints copied into data & DDT blocks.
+ *   - featureCompatible / featureCompatibleRo / featureIncompatible: capability bitmasks.
+ *
+ *  Feature bitmask semantics:
+ *   - featureCompatible: Optional features; absence of implementation should not impact R/W correctness.
+ *   - featureCompatibleRo: If unimplemented, image MAY be opened read-only.
+ *   - featureIncompatible: If any bit unimplemented, image MUST NOT be opened (prevent misinterpretation).
+ *
+ *  Readers should AND their supported bit set with the header masks to decide access level (see file
+ *  documentation). Writers must preserve unknown bits when saving an existing image.
+ */
+typedef struct AaruHeaderV2
+{
+    uint64_t identifier;                             ///< File magic (AARU_MAGIC).
+    uint8_t  application[AARU_HEADER_APP_NAME_LEN];  ///< UTF-16LE creator application name (fixed 64 bytes).
+    uint8_t  imageMajorVersion;                      ///< Container format major version.
+    uint8_t  imageMinorVersion;                      ///< Container format minor version.
+    uint8_t  applicationMajorVersion;                ///< Creator application major version.
+    uint8_t  applicationMinorVersion;                ///< Creator application minor / patch version.
+    uint32_t mediaType;                              ///< Media type enumeration (value from \ref MediaType).
+    uint64_t indexOffset;      ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
+    int64_t  creationTime;     ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
+    int64_t  lastWrittenTime;  ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
+    uint8_t  guid[GUID_SIZE];  ///< 128-bit image GUID (binary, not text); stable across children.
+    uint8_t  blockAlignmentShift;  ///< log2 block alignment (block size alignment = 2^blockAlignmentShift bytes).
+    uint8_t  dataShift;            ///< log2 sectors/items per block-index increment in DDT entries (2^dataShift).
+    uint8_t  tableShift;           ///< log2 sectors spanned by each primary DDT entry (0 = single-level).
+    uint64_t featureCompatible;    ///< Feature bits: unimplemented bits are ignorable (still R/W safe).
+    uint64_t featureCompatibleRo;  ///< Feature bits: unimplemented -> degrade to read-only access.
+    uint64_t featureIncompatible;  ///< Feature bits: any unimplemented -> abort (cannot open safely).
 } AaruHeaderV2;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_HEADER_H
+#endif  // LIBAARUFORMAT_HEADER_H
--- a/include/aaruformat/structs/index.h
+++ b/include/aaruformat/structs/index.h
@@ -21,50 +21,95 @@

 #pragma pack(push, 1)

-/**Header for the index, followed by entries */
+/** \file aaruformat/structs/index.h
+ *  \brief On‑disk index block header and entry structures (versions 1, 2 and 3).
+ *
+ *  The index provides a directory of all blocks contained in an Aaru image. Each index block starts with
+ *  a versioned header (IndexHeader / IndexHeader2 / IndexHeader3) followed by a contiguous array of
+ *  fixed‑size \ref IndexEntry records. Version 3 adds support for hierarchical (chained / nested) subindexes.
+ *
+ *  Version mapping by block identifier (see \ref BlockType):
+ *   - IndexBlock  (v1) -> \ref IndexHeader  followed by 16‑bit entry count entries.
+ *   - IndexBlock2 (v2) -> \ref IndexHeader2 followed by 64‑bit entry count entries.
+ *   - IndexBlock3 (v3) -> \ref IndexHeader3 with optional hierarchical subindex references.
+ *
+ *  CRC coverage & endianness:
+ *   - The crc64 field stores a CRC64-ECMA over the entries array ONLY (header bytes are excluded).
+ *   - For images with imageMajorVersion <= AARUF_VERSION_V1 a legacy writer byte-swapped the CRC; readers
+ *     compensate (see verify_index_v1/v2/v3). The value in the header remains whatever was originally written.
+ *
+ *  Hierarchical (v3) behavior:
+ *   - Entries whose blockType == IndexBlock3 refer to subindex blocks; readers recursively load and flatten.
+ *   - IndexHeader3::previous can point to a preceding index segment (for append / incremental scenarios) or 0.
+ *   - CRC of the main index does NOT cover subindex contents; each subindex has its own header + CRC.
+ *
+ *  Invariants / validation recommendations:
+ *   - identifier must equal the expected BlockType variant for that version.
+ *   - entries > 0 implies the entries array byte size == entries * sizeof(IndexEntry).
+ *   - crc64 must match recomputed CRC64( entries array ) (after legacy byte swap handling if required).
+ *   - For v3, if previous != 0 it should point to another IndexBlock3 header (optional best‑effort check).
+ *
+ *  Notes:
+ *   - Structures are packed (1‑byte alignment). All multi-byte integers are little‑endian on disk.
+ *   - The index does not store per-entry CRC; integrity relies on each individual block's own CRC plus the index CRC.
+ *   - dataType in \ref IndexEntry is meaningful only for block types that carry typed data (e.g. DataBlock,
+ * DumpHardwareBlock, etc.).
+ *
+ *  See also: verify_index_v1(), verify_index_v2(), verify_index_v3() for integrity procedures.
+ */
+
+/** \struct IndexHeader
+ *  \brief Index header (version 1) for legacy images (identifier == IndexBlock).
+ *
+ *  Uses a 16‑bit entry counter limiting the number of indexable blocks in v1.
+ */
 typedef struct IndexHeader
 {
-    /**Identifier, <see cref="BlockType.Index" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint16_t entries;
-    /**CRC64-ECMA of the index */
-    uint64_t crc64;
+    uint32_t identifier;  ///< Block identifier (must be BlockType::IndexBlock).
+    uint16_t entries;     ///< Number of \ref IndexEntry records that follow immediately.
+    uint64_t crc64;       ///< CRC64-ECMA of the entries array (legacy byte-swapped for early images).
 } IndexHeader;

-/**Header for the index, followed by entries */
+/** \struct IndexHeader2
+ *  \brief Index header (version 2) with 64‑bit entry counter (identifier == IndexBlock2).
+ *
+ *  Enlarges the entry count field to 64 bits for large images; otherwise structurally identical to v1.
+ */
 typedef struct IndexHeader2
 {
-    /**Identifier, <see cref="BlockType.Index" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint64_t entries;
-    /**CRC64-ECMA of the index */
-    uint64_t crc64;
+    uint32_t identifier;  ///< Block identifier (must be BlockType::IndexBlock2).
+    uint64_t entries;     ///< Number of \ref IndexEntry records that follow immediately.
+    uint64_t crc64;  ///< CRC64-ECMA of the entries array (legacy byte-swapped rule still applies for old versions).
 } IndexHeader2;

-/**Header for the index, followed by entries */
+/** \struct IndexHeader3
+ *  \brief Index header (version 3) adding hierarchical chaining (identifier == IndexBlock3).
+ *
+ *  Supports flattened hierarchical indexes: entries referencing additional IndexBlock3 subindexes.
+ *  The 'previous' pointer allows chaining earlier index segments (e.g., incremental append) enabling
+ *  cumulative discovery without rewriting earlier headers.
+ */
 typedef struct IndexHeader3
 {
-    /**Identifier, <see cref="BlockType.Index" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint64_t entries;
-    /**CRC64-ECMA of the index */
-    uint64_t crc64;
-    /**Pointer to the previous index header */
-    uint64_t previous;
+    uint32_t identifier;  ///< Block identifier (must be BlockType::IndexBlock3).
+    uint64_t entries;     ///< Number of \ref IndexEntry records that follow in this (sub)index block.
+    uint64_t crc64;       ///< CRC64-ECMA of the local entries array (does NOT cover subindexes or previous chains).
+    uint64_t previous;    ///< File offset of a previous IndexBlock3 header (0 if none / root segment).
 } IndexHeader3;

-/**Index entry */
+/** \struct IndexEntry
+ *  \brief Single index entry describing a block's type, (optional) data classification, and file offset.
+ *
+ *  Semantics by blockType (see \ref BlockType):
+ *   - DataBlock / GeometryBlock / ChecksumBlock / etc.: dataType conveys specific stored data category (\ref DataType).
+ *   - Deduplication (DDT) or Index blocks: dataType may be ignored or set to a sentinel.
+ *   - IndexBlock3: this entry refers to a subindex; offset points to another IndexHeader3.
+ */
 typedef struct IndexEntry
 {
-    /**Type of item pointed by this entry */
-    uint32_t blockType;
-    /**Type of data contained by the block pointed by this entry */
-    uint16_t dataType;
-    /**Offset in file where item is stored */
-    uint64_t offset;
+    uint32_t blockType;  ///< Block identifier of the referenced block (value from \ref BlockType).
+    uint16_t dataType;   ///< Data classification (value from \ref DataType) or unused for untyped blocks.
+    uint64_t offset;     ///< Absolute byte offset in the image where the referenced block header begins.
 } IndexEntry;

 #pragma pack(pop)
--- a/include/aaruformat/structs/metadata.h
+++ b/include/aaruformat/structs/metadata.h
@@ -21,73 +21,95 @@

 #pragma pack(push, 1)

-/**Metadata block, contains metadata */
-typedef struct MetadataBlockHeader {
-    /**Identifier, <see cref="BlockType.MetadataBlock" /> */
-    uint32_t identifier;
-    /**Size in uint8_ts of this whole metadata block */
-    uint32_t blockSize;
-    /**Sequence of media set this media belongs to */
-    int32_t mediaSequence;
-    /**Total number of media on the media set this media belongs to */
-    int32_t lastMediaSequence;
-    /**Offset to start of creator string from start of this block */
-    uint32_t creatorOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t creatorLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t commentsOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t commentsLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaTitleOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaTitleLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaManufacturerOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaManufacturerLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaModelOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaModelLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaSerialNumberOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaSerialNumberLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaBarcodeOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaBarcodeLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaPartNumberOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaPartNumberLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveManufacturerOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveManufacturerLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveModelOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveModelLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveSerialNumberOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveSerialNumberLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveFirmwareRevisionOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveFirmwareRevisionLength;
+/** \file aaruformat/structs/metadata.h
+ *  \brief Packed on-disk metadata block headers for descriptive strings and CICM XML (if present).
+ *
+ *  Two metadata-related block header layouts are defined:
+ *   - \ref MetadataBlockHeader (BlockType::MetadataBlock): offsets + lengths for several UTF-16LE strings.
+ *   - \ref CicmMetadataBlock (BlockType::CicmBlock): length of embedded CICM XML metadata payload.
+ *
+ *  All multi-byte integers are little-endian. Structures are packed (1-byte alignment). All textual fields
+ *  referenced by offsets are UTF-16LE, null-terminated (0x0000). Length fields include the terminating
+ *  null (i.e. length >= 2 and an even number). Offsets are relative to the start of the corresponding block
+ *  header (byte 0 = first byte of the header). No padding is implicitly added between strings; producers
+ *  may pack them tightly or align them manually (alignment not required by the specification).
+ *
+ *  Metadata block layout (conceptual):
+ *    MetadataBlockHeader (fixed size)
+ *    <variable region holding each present UTF-16LE string in any order chosen by the writer>
+ *
+ *  Invariants / validation recommendations for MetadataBlockHeader:
+ *   - identifier == BlockType::MetadataBlock
+ *   - blockSize >= sizeof(MetadataBlockHeader)
+ *   - For every (offset,length) pair where length > 0:
+ *       * offset >= sizeof(MetadataBlockHeader)
+ *       * offset + length <= blockSize
+ *       * length % 2 == 0
+ *       * The 16-bit code unit at (offset + length - 2) == 0x0000 (null terminator)
+ *   - mediaSequence >= 0 and lastMediaSequence >= 0; if lastMediaSequence > 0 then 0 <= mediaSequence <
+ * lastMediaSequence
+ *
+ *  CICM metadata block layout:
+ *    CicmMetadataBlock (header)
+ *    <length bytes of UTF-8 or XML text payload (implementation-defined, not null-terminated)>
+ *
+ *  NOTE: The library code reading these blocks must not assume strings are present; a zero length means the
+ *  corresponding field is omitted. Offsets for omitted fields MAY be zero or arbitrary; readers should skip them
+ *  whenever length == 0.
+ */
+
+/** \struct MetadataBlockHeader
+ *  \brief Header for a metadata block containing offsets and lengths to UTF-16LE descriptive strings.
+ *
+ *  Descriptive fields (all optional): creator, comments, media title/manufacturer/model/serial/barcode/part number,
+ *  drive manufacturer/model/serial/firmware revision. Strings can be used to describe both physical medium and
+ *  acquisition hardware. Length values include the UTF-16LE null terminator (two zero bytes).
+ */
+typedef struct MetadataBlockHeader
+{
+    uint32_t identifier;         ///< Block identifier, must be BlockType::MetadataBlock.
+    uint32_t blockSize;          ///< Total size in bytes of the entire metadata block (header + strings).
+    int32_t  mediaSequence;      ///< Sequence number within a multi-disc / multi-volume set (0-based or 1-based as
+                                 ///< producer defines).
+    int32_t  lastMediaSequence;  ///< Total number of media in the set; 0 or 1 if single item.
+    uint32_t creatorOffset;      ///< Offset to UTF-16LE creator string (or undefined if creatorLength==0).
+    uint32_t creatorLength;      ///< Length in bytes (including null) of creator string (0 if absent).
+    uint32_t commentsOffset;     ///< Offset to UTF-16LE comments string.
+    uint32_t commentsLength;     ///< Length in bytes (including null) of comments string.
+    uint32_t mediaTitleOffset;   ///< Offset to UTF-16LE media title string.
+    uint32_t mediaTitleLength;   ///< Length in bytes (including null) of media title string.
+    uint32_t mediaManufacturerOffset;      ///< Offset to UTF-16LE media manufacturer string.
+    uint32_t mediaManufacturerLength;      ///< Length in bytes (including null) of media manufacturer string.
+    uint32_t mediaModelOffset;             ///< Offset to UTF-16LE media model string.
+    uint32_t mediaModelLength;             ///< Length in bytes (including null) of media model string.
+    uint32_t mediaSerialNumberOffset;      ///< Offset to UTF-16LE media serial number string.
+    uint32_t mediaSerialNumberLength;      ///< Length in bytes (including null) of media serial number string.
+    uint32_t mediaBarcodeOffset;           ///< Offset to UTF-16LE media barcode string.
+    uint32_t mediaBarcodeLength;           ///< Length in bytes (including null) of media barcode string.
+    uint32_t mediaPartNumberOffset;        ///< Offset to UTF-16LE media part number string.
+    uint32_t mediaPartNumberLength;        ///< Length in bytes (including null) of media part number string.
+    uint32_t driveManufacturerOffset;      ///< Offset to UTF-16LE drive manufacturer string.
+    uint32_t driveManufacturerLength;      ///< Length in bytes (including null) of drive manufacturer string.
+    uint32_t driveModelOffset;             ///< Offset to UTF-16LE drive model string.
+    uint32_t driveModelLength;             ///< Length in bytes (including null) of drive model string.
+    uint32_t driveSerialNumberOffset;      ///< Offset to UTF-16LE drive serial number string.
+    uint32_t driveSerialNumberLength;      ///< Length in bytes (including null) of drive serial number string.
+    uint32_t driveFirmwareRevisionOffset;  ///< Offset to UTF-16LE drive firmware revision string.
+    uint32_t driveFirmwareRevisionLength;  ///< Length in bytes (including null) of drive firmware revision string.
 } MetadataBlockHeader;

-/**Geometry block, contains physical geometry information */
-typedef struct CicmMetadataBlock {
-    /**Identifier, <see cref="BlockType.CicmBlock" /> */
-    uint32_t identifier;
-    uint32_t length;
+/** \struct CicmMetadataBlock
+ *  \brief Header for a CICM XML metadata block (identifier == BlockType::CicmBlock).
+ *
+ *  The following 'length' bytes immediately after the header contain the CICM XML payload. Encoding is typically
+ *  UTF-8; the payload is not required to be null-terminated.
+ */
+typedef struct CicmMetadataBlock
+{
+    uint32_t identifier;  ///< Block identifier, must be BlockType::CicmBlock.
+    uint32_t length;      ///< Length in bytes of the CICM metadata payload that follows.
 } CicmMetadataBlock;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_METADATA_H
+#endif  // LIBAARUFORMAT_METADATA_H
--- a/include/aaruformat/structs/optical.h
+++ b/include/aaruformat/structs/optical.h
@@ -21,36 +21,65 @@

 #pragma pack(push, 1)

-/**Contains list of optical disc tracks */
-typedef struct TracksHeader {
-    /**Identifier, <see cref="BlockType.TracksBlock" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint16_t entries;
-    /**CRC64-ECMA of the block */
-    uint64_t crc64;
+/** \file aaruformat/structs/optical.h
+ *  \brief On-disk structures describing optical disc tracks (Track list block).
+ *
+ *  An optical tracks block (identifier == BlockType::TracksBlock) stores a list of \ref TrackEntry
+ *  records describing the logical layout of tracks and sessions for CD/DVD/BD and similar media.
+ *
+ *  Layout:
+ *    TracksHeader (fixed)
+ *    TrackEntry[ entries ] (array, packed)
+ *
+ *  CRC semantics:
+ *   - TracksHeader::crc64 is a CRC64-ECMA over the contiguous TrackEntry array ONLY (header excluded).
+ *   - For legacy images (imageMajorVersion <= AARUF_VERSION_V1) a byte swap is applied when verifying.
+ *
+ *  Field semantics (TrackEntry):
+ *   - sequence: Logical track number (1..99 typical for CD). Values outside that range may encode extras.
+ *   - type: Value from \ref TrackType (Audio, Data, Mode variants, etc.).
+ *   - start / end: Inclusive Logical Block Address (LBA) bounds for the track. end >= start.
+ *   - pregap: Number of sectors of pre-gap *preceding* the track's first user-accessible sector (can be 0 or negative
+ *             if representing lead-in semantics; negative interpretation is implementation-defined).
+ *   - session: Session number starting at 1 for multi-session discs (1 for single session).
+ *   - isrc: 13-byte ISRC (raw code, no terminating null). If fewer significant characters, remaining bytes are 0.
+ *   - flags: Bitmask of track/control flags. Unless otherwise specified, recommended mapping (mirrors CD subchannel Q
+ *            control bits) is: bit0 Pre-emphasis, bit1 Copy permitted, bit2 Data track, bit3 Four-channel audio,
+ *            bits4-7 reserved. Actual semantics may be extended by the format specification.
+ *
+ *  Invariants / validation recommendations:
+ *   - identifier == BlockType::TracksBlock
+ *   - entries * sizeof(TrackEntry) bytes are present after the header in the block image.
+ *   - 1 <= sequence <= 99 for standard CD tracks (non-conforming values allowed but should be documented).
+ *   - start <= end; pregap >= 0 (if negative pregaps unsupported in implementation).
+ *   - ISRC bytes either all zero (no ISRC) or printable ASCII (A-Z 0-9 -) per ISO 3901 (without hyphen formatting).
+ */
+
+/** \struct TracksHeader
+ *  \brief Header for an optical tracks block listing track entries.
+ */
+typedef struct TracksHeader
+{
+    uint32_t identifier;  ///< Block identifier (must be BlockType::TracksBlock).
+    uint16_t entries;     ///< Number of TrackEntry records following this header.
+    uint64_t crc64;  ///< CRC64-ECMA of the TrackEntry array (header excluded, legacy byte-swap for early versions).
 } TracksHeader;

-/**Optical disc track */
-typedef struct TrackEntry {
-    /**Track sequence */
-    uint8_t sequence;
-    /**Track type */
-    uint8_t type;
-    /**Track starting LBA */
-    int64_t start;
-    /**Track last LBA */
-    int64_t end;
-    /**Track pregap in sectors */
-    int64_t pregap;
-    /**Track session */
-    uint8_t session;
-    /**Track's ISRC in ASCII */
-    uint8_t isrc[13];
-    /**Track flags */
-    uint8_t flags;
+/** \struct TrackEntry
+ *  \brief Single optical disc track descriptor (sequence, type, LBAs, session, ISRC, flags).
+ */
+typedef struct TrackEntry
+{
+    uint8_t sequence;  ///< Track number (1..99 typical for CD audio/data). 0 may indicate placeholder/non-standard.
+    uint8_t type;      ///< Track type (value from \ref TrackType).
+    int64_t start;     ///< Inclusive starting LBA of the track.
+    int64_t end;       ///< Inclusive ending LBA of the track.
+    int64_t pregap;    ///< Pre-gap length in sectors preceding track start (0 if none).
+    uint8_t session;   ///< Session number (1-based). 1 for single-session discs.
+    uint8_t isrc[13];  ///< ISRC raw 13-byte code (no null terminator). All zeros if not present.
+    uint8_t flags;     ///< Control / attribute bitfield (see file documentation for suggested bit mapping).
 } TrackEntry;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_OPTICAL_H
+#endif  // LIBAARUFORMAT_OPTICAL_H
--- a/include/aaruformat/structs/options.h
+++ b/include/aaruformat/structs/options.h
@@ -19,19 +19,214 @@
 #ifndef LIBAARUFORMAT_OPTIONS_H
 #define LIBAARUFORMAT_OPTIONS_H

+#include <stdbool.h>  ///< For bool type used in aaru_options.
+#include <stdint.h>   ///< For fixed-width integer types.
+
+/** \file aaruformat/structs/options.h
+ *  \brief Image creation / open tuning options structure and related semantics.
+ *
+ *  The library accepts a semicolon-delimited key=value options string (see parse_options()). Recognized keys:
+ *    compress=true|false          Enable/disable block compression (LZMA for data blocks, FLAC for audio tracks).
+ *    deduplicate=true|false       If true, identical (duplicate) sectors are stored once (DDT entries point to same
+ *                                 physical block). If false, duplicates are still tracked in DDT but each occurrence
+ *                                 is stored independently (no storage savings). DDT itself is always present.
+ *    dictionary=<bytes>           LZMA dictionary size in bytes (fallback default 33554432 if 0 or invalid).
+ *    table_shift=<n>              DDT v2 table shift (default 9) (items per primary entry = 2^n when multi-level).
+ *    data_shift=<n>               Global data shift (default 12). Defines per-block address granularity: the low
+ *                                 2^n range encodes the sector (or unit) offset within a block; higher bits combine
+ *                                 with block_alignment to derive block file offsets. Used by DDT but not limited to it.
+ *    block_alignment=<n>          log2 alignment of underlying data blocks (default 9 => 512 bytes) (block size = 2^n).
+ *    md5=true|false               Generate MD5 checksum (stored in checksum block if true).
+ *    sha1=true|false              Generate SHA-1 checksum.
+ *    sha256=true|false            Generate SHA-256 checksum.
+ *    blake3=true|false            Generate BLAKE3 checksum (may require build-time support; ignored if unsupported).
+ *    spamsum=true|false           Generate SpamSum fuzzy hash.
+ *
+ *  Defaults (when option string NULL or key omitted):
+ *    compress=true, deduplicate=true, dictionary=33554432, table_shift=9, data_shift=12,
+ *    block_alignment=9, md5=false, sha1=false, sha256=false, blake3=false, spamsum=false.
+ *
+ *  Validation / normalization done in parse_options():
+ *   - Zero / missing dictionary resets to default 33554432.
+ *   - Zero table_shift resets to 9.
+ *   - Zero data_shift resets to 12.
+ *   - Zero block_alignment resets to 9.
+ *
+ *  Rationale:
+ *   - table_shift, data_shift and block_alignment mirror fields stored in on-disk headers (see AaruHeaderV2 &
+ * DdtHeader2); data_shift is a global per-block granularity exponent (not DDT-specific) governing how in-block offsets
+ * are encoded.
+ *   - compress selects adaptive codec usage: LZMA applied to generic/data blocks, FLAC applied to audio track payloads.
+ *   - deduplicate toggles storage optimization only: the DDT directory is always built for addressing; disabling simply
+ *     forces each sector's content to be written even if already present (useful for forensic byte-for-byte
+ * duplication).
+ *   - dictionary tunes compression ratio/memory use; large values increase memory footprint.
+ *   - Checksums are optional; enabling multiple increases CPU time at write finalization.
+ *
+ *  Performance / space trade-offs (deduplicate=false):
+ *   - Significantly larger image size: every repeated sector payload is written again.
+ *   - Higher write I/O and longer creation time for highly redundant sources (e.g., zero-filled regions) compared to
+ *     deduplicate=true, although CPU time spent on duplicate detection/hash lookups is reduced.
+ *   - Potentially simpler post-process forensic validation (physical ordering preserved without logical coalescing).
+ *   - Use when exact physical repetition is more critical than storage efficiency, or to benchmark raw device
+ * throughput.
+ *   - For typical archival use-cases with large zero / repeated patterns, deduplicate=true markedly reduces footprint.
+ *
+ *  Approximate in-RAM hash map usage for deduplication (deduplicate=true):
+ *   The on-disk DDT can span many secondary tables, but only the primary table plus a currently loaded secondary (and
+ *   possibly a small cache) reside in memory; their footprint is typically <<5% of total indexed media space and is
+ * often negligible compared to the hash map used to detect duplicate sectors. Therefore we focus here on the hash /
+ * lookup structure ("hash_map") memory, not the entire DDT on-disk size.
+ *
+ *   Worst-case (all sectors unique) per 1 GiB of user data:
+ *     sectors_per_GiB = 2^30 / sector_size
+ *     hash_bytes ≈ sectors_per_GiB * H   (H ≈ 16 bytes: 8-byte fingerprint + ~8 bytes map overhead)
+ *
+ *   Resulting hash_map RAM per GiB (unique sectors):
+ *     +--------------+------------------+------------------------------+
+ *     | Sector size  | Sectors / GiB    | Hash map (~16 B / sector)    |
+ *     +--------------+------------------+------------------------------+
+ *     |   512 bytes  | 2,097,152        | ~33.5 MiB  (≈32.0–36.0 MiB)  |
+ *     |  2048 bytes  |   524,288        | ~ 8.0 MiB  (≈7.5–8.5  MiB)   |
+ *     |  4096 bytes  |   262,144        | ~ 4.0 MiB  (≈3.8–4.3  MiB)   |
+ *     +--------------+------------------+------------------------------+
+ *
+ *   (Range reflects allocator + load factor variation.)
+ *
+ *   Targeted projections (hash map only, R=1):
+ *     2048‑byte sectors (~8 MiB per GiB unique)
+ *       Capacity | Hash map (MiB) | Hash map (GiB)
+ *       ---------+---------------+----------------
+ *         25 GiB |     ~200       |   0.20
+ *         50 GiB |     ~400       |   0.39
+ *
+ *     512‑byte sectors (~34 MiB per GiB unique; using 33.5 MiB for calc)
+ *       Capacity | Hash map (MiB) | Hash map (GiB)
+ *       ---------+---------------+----------------
+ *        128 GiB |   ~4288        |   4.19
+ *        500 GiB |  ~16750        |  16.36
+ *      1   TiB*  |  ~34304        |  33.50
+ *      2   TiB*  |  ~68608        |  67.00
+ *
+ *     *TiB = 1024 GiB binary. For decimal TB reduce by ~7% (×0.93).
+ *
+ *   Duplicate ratio scaling:
+ *     Effective hash RAM ≈ table_value * R, where R = unique_sectors / total_sectors.
+ *     Example: 500 GiB @512 B, R=0.4 ⇒ ~16750 MiB * 0.4 ≈ 6700 MiB (~6.54 GiB).
+ *
+ *   Quick rule of thumb (hash only):
+ *     hash_bytes_per_GiB ≈ 16 * (2^30 / sector_size) ≈ (17.1799e9 / sector_size) bytes
+ *       → ≈ 33.6 MiB (512 B), 8.4 MiB (2048 B), 4.2 MiB (4096 B) per GiB unique.
+ *
+ *   Memory planning tip:
+ *     If projected hash_map usage risks exceeding available RAM, consider:
+ *       - Increasing table_shift (reduces simultaneous secondary loads / contention)
+ *       - Lowering data_shift (if practical) to encourage earlier big DDT adoption with fewer unique blocks
+ *       - Segmenting the dump into phases (if workflow permits)
+ *       - Accepting higher duplicate ratio by pre-zero detection or sparse treatment externally.
+ *       - Resuming the dump in multiple passes: each resume rebuilds the hash_map from scratch, so peak RAM still
+ *         matches a single-pass estimate, but average RAM over total wall time can drop if you unload between passes.
+ *
+ *   NOTE: DDT in-RAM portion (primary + one secondary) usually adds only a few additional MiB even for very large
+ * images, hence omitted from sizing tables. Include +5% safety margin if extremely tight on memory.
+ *
+ *  Guidance for table_shift / data_shift selection:
+ *   Let:
+ *     S = total logical sectors expected in image (estimate if unknown).
+ *     T = table_shift (items per primary DDT entry = 2^T when multi-level; 0 => single-level).
+ *     D = data_shift (in-block sector offset span = 2^D).
+ *     BA = block_alignment (bytes) = 2^block_alignment.
+ *     SS = sector size (bytes).
+ *
+ *   1. data_shift constraints:
+ *      - For SMALL DDT entries (12 payload bits after status): D must satisfy 0 < D < 12 and (12 - D) >= 1 so that at
+ *        least one bit remains for block index. Practical range for small DDT: 6..10 (leaves 2+ bits for block index).
+ *      - For BIG DDT entries (28 payload bits after status): D may be larger (up to 27) but values >16 rarely useful.
+ *      - Effective address granularity inside a block = min(2^D * SS, physical block span implied by BA).
+ *      - Choosing D too large wastes bits (larger offset range than block actually contains) and reduces the number of
+ *        block index bits within a small entry, potentially forcing upgrade to big DDT earlier.
+ *
+ *      Recommended starting points:
+ *        * 512‑byte sectors, 512‑byte block alignment: D=9 (512 offsets) or D=8 (256 offsets) keeps small DDT viable.
+ *        * 2048‑byte optical sectors, 2048‑byte alignment: D=8 (256 offsets) typically sufficient.
+ *        * Mixed / large logical block sizes: keep D so that (2^D * SS) ≈ typical dedup block region you want
+ * addressable.
+ *
+ *   2. block capacity within an entry:
+ *      - SMALL DDT: usable block index bits = 12 - D.
+ *        Max representable block index (small) = 2^(12-D) - 1.
+ *      - BIG DDT: usable block index bits = 28 - D.
+ *        Max representable block index (big)   = 2^(28-D) - 1.
+ *      - If (requiredBlockIndex > max) you must either reduce D or rely on big DDT.
+ *
+ *      Approximate requiredBlockIndex ≈ (TotalUniqueBlocks) where
+ *        TotalUniqueBlocks ≈ (S * SS) / (BA * (2^D * SS / (SS))) = S / (2^D * (BA / SS))
+ *        Simplified (assuming BA = SS): TotalUniqueBlocks ≈ S / 2^D.
+ *
+ *   3. table_shift considerations (multi-level DDT):
+ *      - Primary entries count ≈ ceil(S / 2^T). Choose T so this count fits memory and keeps lookup fast.
+ *      - Larger T reduces primary table size, increasing secondary table dereferences.
+ *      - Typical balanced values: T in [8..12] (256..4096 sectors per primary entry).
+ *      - Set T=0 for single-level when S is small enough that all entries fit comfortably in memory.
+ *
+ *      Memory rough estimate for single-level SMALL DDT:
+ *        bytes ≈ S * 2  (each small entry 2 bytes). For BIG DDT: bytes ≈ S * 4.
+ *      Multi-level: primary table bytes ≈ (S / 2^T) * entrySize + sum(secondary tables).
+ *
+ *   4. Example scenarios:
+ *      - 50M sectors (≈25 GiB @512B), want small DDT: pick D=8 (256); block index bits=4 (max 16 blocks) insufficient.
+ *        Need either D=6 (1024 block indices) or accept BIG DDT (28-8=20 bits => million+ blocks). So prefer BIG DDT
+ * here.
+ *      - 2M sectors, 2048B alignment, optical: D=8 gives S/2^D ≈ 7812 unique offsets; small DDT block index bits=4 (max
+ * 16) inadequate → choose D=6 (offset span 64 sectors) giving 6 block index bits (max 64) or just use big DDT.
+ *
+ *   5. Practical recommendations:
+ *      - If unsure and image > ~1M sectors: keep defaults (data_shift=12, table_shift=9) and allow big DDT.
+ *      - For small archival (<100k sectors): T=0 (single-level), D≈8..10 to keep small DDT feasible.
+ *      - Benchmark before lowering D purely to stay in small DDT; increased secondary lookups or larger primary tables
+ * can offset saved space.
+ *
+ *   Recommended presets (approximate bands):
+ *     +----------------------+----------------------+---------------------------+-------------------------------+
+ *     | Total logical sectors | table_shift (T)      | data_shift (D)            | Notes                         |
+ *     +----------------------+----------------------+---------------------------+-------------------------------+
+ *     |   <   50,000          | 0                    | 8 – 10                    | Single-level small DDT likely |
+ *     | 50K –   1,000,000     | 8 – 9                | 9 – 10                    | Still feasible small DDT      |
+ *     | 1M  –  10,000,000     | 9 – 10               | 10 – 12                   | Borderline small -> big DDT   |
+ *     | 10M – 100,000,000     | 10 – 11              | 11 – 12                   | Prefer big DDT; tune T for mem|
+ *     |   > 100,000,000       | 11 – 12              | 12                        | Big DDT; higher T saves memory|
+ *     +----------------------+----------------------+---------------------------+-------------------------------+
+ *     Ranges show typical stable regions; pick the lower end of table_shift if memory is ample, higher if minimizing
+ *     primary table size. Always validate actual unique block count vs payload bits.
+ *
+ *   NOTE: The library will automatically fall back to BIG DDT where needed; these settings bias structure, they do not
+ *         guarantee small DDT retention.
+ *
+ *  Thread-safety: aaru_options is a plain POD struct; caller may copy freely. parse_options() returns by value.
+ *
+ *  Future compatibility: unknown keys are ignored by current parser; consumers should preserve original option
+ *  strings if round-tripping is required.
+ */
+
+/** \struct aaru_options
+ *  \brief Parsed user-specified tunables controlling compression, deduplication, hashing and DDT geometry.
+ *
+ *  All shifts are exponents of two.
+ */
 typedef struct
 {
-    bool     compress;
-    bool     deduplicate;
-    uint32_t dictionary;
-    uint8_t  table_shift;
-    uint8_t  data_shift;
-    uint8_t  block_alignment;
-    bool     md5;
-    bool     sha1;
-    bool     sha256;
-    bool     blake3;
-    bool     spamsum;
+    bool     compress;     ///< Enable adaptive compression (LZMA for data blocks, FLAC for audio). Default: true.
+    bool     deduplicate;  ///< Storage dedup flag (DDT always exists). true=share identical sector content, false=store
+                           ///< each instance.
+    uint32_t dictionary;   ///< LZMA dictionary size in bytes (>= 4096 recommended). Default: 33554432 (32 MiB).
+    uint8_t  table_shift;  ///< DDT table shift (multi-level fan-out exponent). Default: 9.
+    uint8_t  data_shift;   ///< Global data shift: low bits encode sector offset inside a block (2^data_shift span).
+    uint8_t  block_alignment;  ///< log2 underlying block alignment (2^n bytes). Default: 9 (512 bytes).
+    bool     md5;              ///< Generate MD5 checksum (ChecksumAlgorithm::Md5) when finalizing image.
+    bool     sha1;             ///< Generate SHA-1 checksum (ChecksumAlgorithm::Sha1) when finalizing image.
+    bool     sha256;           ///< Generate SHA-256 checksum (ChecksumAlgorithm::Sha256) when finalizing image.
+    bool     blake3;           ///< Generate BLAKE3 checksum if supported (not stored if algorithm unavailable).
+    bool     spamsum;          ///< Generate SpamSum fuzzy hash (ChecksumAlgorithm::SpamSum) if enabled.
 } aaru_options;

 #endif  // LIBAARUFORMAT_OPTIONS_H