Enhance documentation for various structures with detailed descriptions and formatting improvements

2025-12-16 19:24:40 +00:00 · 2025-10-01 05:35:39 +01:00
parent 1f91ad1e08
commit 41aee42c53
16 changed files with 1935 additions and 1273 deletions
--- a/include/aaru.h
+++ b/include/aaru.h
--- a/include/aaruformat/consts.h
+++ b/include/aaruformat/consts.h
@@ -24,43 +24,90 @@
 #pragma ide diagnostic ignored "OCUnusedMacroInspection"
 #endif

-/** Magic identidier = "DICMFRMT". */
-#define DIC_MAGIC              0x544D52464D434944
-/** Magic identidier = "AARUFRMT". */
-#define AARU_MAGIC             0x544D524655524141
-/** Image format version. A change in this number indicates an incompatible change to the format that prevents older
- * implementations from reading it correctly, if at all. */
-#define AARUF_VERSION          2
-/** First version of AaruFormat, created in C#.
- * CRC64 was byte-swapped
+/** \file aaruformat/consts.h
+ *  \brief Core public constants and compile‑time limits for the Aaru container format implementation.
+ *
+ *  This header exposes magic identifiers, format version selectors, resource limits, codec parameter bounds,
+ *  and bit masks used across libaaruformat. All values are immutable interface contracts; changing them breaks
+ *  backward compatibility unless a new format version is declared.
+ *
+ *  Summary:
+ *   - Magic numbers (DIC_MAGIC, AARU_MAGIC) identify container families (legacy DiscImageChef vs AaruFormat).
+ *   - Version macros distinguish format generations (V1 C# / legacy CRC endianness, V2 current C implementation).
+ *   - Cache and table size limits provide protective upper bounds against runaway memory consumption.
+ *   - Audio constants (SAMPLES_PER_SECTOR, MIN/MAX_FLAKE_BLOCK) align with Red Book (CD‑DA) and FLAC encoding best
+ * practices.
+ *   - CD_* masks assist with extracting flags / positional subfields in deduplicated Compact Disc sector tables.
+ *   - CRC64 constants implement ECMA‑182 polynomial and standard seed, enabling deterministic end‑to‑end block
+ * integrity.
+ *
+ *  Notes:
+ *   - Magic values are stored little‑endian on disk when written as 64‑bit integers; when inspecting raw bytes make
+ * sure to account for host endianness.
+ *   - AARUF_VERSION must be incremented only when an incompatible on‑disk layout change is introduced.
+ *   - MAX_DDT_ENTRY_CACHE is a soft upper bound sized to balance deduplication hit rate vs RAM; tune in future builds
+ * via configuration if adaptive heuristics are introduced.
+ *   - The LZMA properties length (5) derives from the standard LZMA header (lc/lp/pb + dict size) and is constant for
+ *     raw LZMA streams used here.
+ *   - FLAC sample block guidance: empirical evaluation shows >4608 samples per block does not yield meaningful ratio
+ * gains for typical optical audio captures while increasing decode buffer size.
+ *
+ *  Thread safety: All macros are compile‑time constants; no synchronization required.
+ *  Portability: Constants chosen to fit within 64‑bit targets; arithmetic assumes two's complement.
 */
-#define AARUF_VERSION_V1       1
-/** Second version of AaruFormat, created in C.
- * Introduced new header, many new features, and blocks.
- */
-#define AARUF_VERSION_V2       2
-/** Maximum read cache size, 512MiB. */
-#define MAX_CACHE_SIZE         536870912
-/** Size in bytes of LZMA properties. */
-#define LZMA_PROPERTIES_LENGTH 5
-/** Maximum number of entries for the DDT cache. */
-#define MAX_DDT_ENTRY_CACHE    16000000
-/** How many samples are contained in a RedBook sector. */
-#define SAMPLES_PER_SECTOR     588
-/** Maximum number of samples for a FLAC block. Bigger than 4608 gives no benefit. */
-#define MAX_FLAKE_BLOCK        4608
-/** Minimum number of samples for a FLAC block. CUETools.Codecs.FLAKE does not support it to be smaller than 256. */
-#define MIN_FLAKE_BLOCK        256
-/** This mask is to check for flags in CompactDisc suffix/prefix DDT */
-#define CD_XFIX_MASK           0xFF000000
-/** This mask is to check for position in CompactDisc suffix/prefix deduplicated block */
-#define CD_DFIX_MASK           0x00FFFFFF

-#define CRC64_ECMA_POLY 0xC96C5795D7870F42
-#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
+/** Magic identifier for legacy DiscImageChef container (ASCII "DICMFRMT").
+ *  Retained for backward compatibility / migration tooling. */
+#define DIC_MAGIC  0x544D52464D434944ULL
+/** Magic identifier for AaruFormat container (ASCII "AARUFRMT").
+ *  Used in the primary header to assert correct file type. */
+#define AARU_MAGIC 0x544D524655524141ULL
+
+/** Current image format major version (incompatible changes bump this).
+ *  Readers should reject headers with a higher number unless explicitly forward compatible. */
+#define AARUF_VERSION    2
+/** First on‑disk version (C# implementation).
+ *  Quirk: CRC64 values were stored byte‑swapped relative to ECMA‑182 canonical output. */
+#define AARUF_VERSION_V1 1
+/** Second on‑disk version (C implementation).
+ *  Introduced: extended header (GUID, feature bitmaps), hierarchical DDT v2, improved index (v2/v3),
+ *  multi‑codec compression, refined metadata blocks. */
+#define AARUF_VERSION_V2 2
+
+/** Maximum read cache size (bytes). 512 MiB chosen to prevent excessive resident memory while
+ *  still enabling efficient sequential and moderate random access patterns. */
+#define MAX_CACHE_SIZE 536870912ULL
+
+/** Size in bytes of the fixed LZMA properties header (lc/lp/pb + dictionary size). */
+#define LZMA_PROPERTIES_LENGTH 5
+
+/** Maximum number of cached DDT entry descriptors retained in memory for fast duplicate detection.
+ *  At 16,000,000 entries with a compact structure, this caps hash_map overhead while covering large images.
+ *  (Approx memory just for lookup bookkeeping: ~16 bytes * N ≈ 256 MB worst case; typical effective <50% of cap.) */
+#define MAX_DDT_ENTRY_CACHE 16000000
+
+/** Red Book (CD‑DA) PCM samples per 2352‑byte sector: 44,100 Hz / 75 sectors per second = 588 samples. */
+#define SAMPLES_PER_SECTOR 588
+
+/** FLAC maximum block size used for encoding audio sectors.
+ *  Empirically >4608 samples yields diminishing compression returns and higher decode latency. */
+#define MAX_FLAKE_BLOCK 4608
+/** FLAC minimum block size. CUETools.Codecs.FLAKE does not accept blocks smaller than 256 samples. */
+#define MIN_FLAKE_BLOCK 256
+
+/** Mask for extracting correction / fix flags in Compact Disc suffix/prefix DDT entries.
+ *  High 8 bits store status (see SectorStatus / CdFixFlags relationships). */
+#define CD_XFIX_MASK 0xFF000000U
+/** Mask for extracting positional index (lower 24 bits) in Compact Disc suffix/prefix deduplicated block entries. */
+#define CD_DFIX_MASK 0x00FFFFFFU
+
+/** ECMA‑182 CRC64 polynomial (reflected form used in standard implementations). */
+#define CRC64_ECMA_POLY 0xC96C5795D7870F42ULL
+/** Initial seed value for CRC64 computations (all bits set). */
+#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFFULL

 #ifndef _MSC_VER
 #pragma clang diagnostic pop
 #endif

-#endif  // LIBAARUFORMAT_CONSTS_H
+#endif  // LIBAARUFORMAT_CONSTS_H
--- a/include/aaruformat/context.h
+++ b/include/aaruformat/context.h
@@ -25,6 +25,41 @@
 #include "structs.h"
 #include "utarray.h"

+/** \file aaruformat/context.h
+ *  \brief Central runtime context structures for libaaruformat (image state, caches, checksum buffers).
+ *
+ *  The principal structure, \ref aaruformatContext, aggregates: header metadata, open stream handle, deduplication
+ *  tables (DDT) currently in memory, optical disc auxiliary data (sector prefix/suffix/subchannel), track listings,
+ *  geometry & metadata blocks, checksum accumulators, CRC & ECC helper contexts, hash map for deduplication, and
+ *  transient write buffers.
+ *
+ *  Memory ownership model (unless otherwise stated): if a pointer field is non-NULL it is owned by the context and
+ *  will be freed (or otherwise released) during context close / destruction. Callers must not free or reallocate
+ *  these pointers directly. External callers should treat all internal buffers as read‑only unless explicitly writing.
+ *
+ *  Threading: a single context instance is NOT thread-safe; serialize access if used across threads.
+ *  Lifetime: allocate, initialize/open, perform read/write/verify operations, then close/free.
+ *
+ *  Deduplication tables (DDT): only a subset (primary table + an active secondary + optional cache) is retained in RAM;
+ *  large images may rely on lazy loading of secondary tables. Flags (inMemoryDdt, userDataDdt*, cachedSecondary*)
+ *  indicate what is currently resident.
+ *
+ *  Optical auxiliary buffers (sectorPrefix / sectorSuffix / subchannel / corrected variants) are populated only for
+ *  images where those components exist (e.g., raw CD dumps). They may be NULL for block devices / non‑optical media.
+ *
+ *  Index handling: indexEntries (UT_array) holds a flattened list of \ref IndexEntry structures (regardless of
+ * v1/v2/v3). hash_map_t *sectorHashMap provides fast duplicate detection keyed by content fingerprint / sparse sector
+ * key.
+ *
+ *  Invariants / sanity expectations (not strictly enforced everywhere):
+ *   - magic == AARU_MAGIC after successful open/create.
+ *   - header.imageMajorVersion <= AARUF_VERSION.
+ *   - imageStream != NULL when any I/O method is in progress.
+ *   - If deduplicate == false, sectorHashMap may still be populated for bookkeeping but duplicates are stored
+ * independently.
+ *   - If userDataDdtMini != NULL then userDataDdtBig == NULL (and vice versa) for a given level.
+ */
+
 #ifndef MD5_DIGEST_LENGTH
 #define MD5_DIGEST_LENGTH 16
 #endif
@@ -37,121 +72,186 @@
 #define SHA256_DIGEST_LENGTH 32
 #endif

+/** \struct Crc64Context
+ *  \brief Internal (legacy) CRC64 computation context (superseded by crt \ref crc64_ctx usage).
+ *
+ *  Kept for compatibility with earlier code paths; new code should prefer the opaque crc64_ctx API.
+ */
 typedef struct Crc64Context
 {
-    uint64_t finalSeed;
-    uint64_t table[256];
-    uint64_t hashInt;
+    uint64_t finalSeed;   ///< Final CRC value (post processing) or running seed.
+    uint64_t table[256];  ///< Precomputed 256-entry lookup table for the ECMA polynomial.
+    uint64_t hashInt;     ///< Intermediate accumulator.
 } Crc64Context;

+/** \struct CdEccContext
+ *  \brief Lookup tables and state for Compact Disc EDC/ECC (P/Q) regeneration / verification.
+ *
+ *  Fields may be lazily allocated; inited_edc indicates tables are ready.
+ */
 typedef struct CdEccContext
 {
-    bool      inited_edc;
-    uint8_t  *ecc_b_table;
-    uint8_t  *ecc_f_table;
-    uint32_t *edc_table;
+    bool      inited_edc;   ///< True once EDC/ECC tables have been initialized.
+    uint8_t  *ecc_b_table;  ///< Backward (B) ECC table (allocated, size implementation-defined).
+    uint8_t  *ecc_f_table;  ///< Forward (F) ECC table.
+    uint32_t *edc_table;    ///< EDC (CRC) lookup table.
 } CdEccContext;

+/** \struct Checksums
+ *  \brief Collected whole‑image checksums / hashes present in a checksum block.
+ *
+ *  Only hash arrays with corresponding has* flags set contain valid data. spamsum is a dynamically allocated
+ *  NUL‑terminated buffer (original SpamSum signature bytes followed by appended '\0').
+ */
 typedef struct Checksums
 {
-    bool     hasMd5;
-    bool     hasSha1;
-    bool     hasSha256;
-    bool     hasSpamSum;
-    uint8_t  md5[MD5_DIGEST_LENGTH];
-    uint8_t  sha1[SHA1_DIGEST_LENGTH];
-    uint8_t  sha256[SHA256_DIGEST_LENGTH];
-    uint8_t *spamsum;
+    bool     hasMd5;                        ///< True if md5[] buffer populated.
+    bool     hasSha1;                       ///< True if sha1[] buffer populated.
+    bool     hasSha256;                     ///< True if sha256[] buffer populated.
+    bool     hasSpamSum;                    ///< True if spamsum pointer allocated and signature read.
+    uint8_t  md5[MD5_DIGEST_LENGTH];        ///< MD5 digest (16 bytes).
+    uint8_t  sha1[SHA1_DIGEST_LENGTH];      ///< SHA-1 digest (20 bytes).
+    uint8_t  sha256[SHA256_DIGEST_LENGTH];  ///< SHA-256 digest (32 bytes).
+    uint8_t *spamsum;                       ///< SpamSum fuzzy hash (ASCII), allocated length+1 with trailing 0.
 } Checksums;

+/** \struct mediaTagEntry
+ *  \brief Hash table entry for an arbitrary media tag (e.g., proprietary drive/medium descriptor).
+ *
+ *  Stored via uthash (hh handle). Type is a format‑specific integer identifier mapping to external interpretation.
+ */
 typedef struct mediaTagEntry
 {
-    uint8_t       *data;
-    int32_t        type;
-    uint32_t       length;
-    UT_hash_handle hh;
+    uint8_t       *data;    ///< Tag data blob (opaque to library core); length bytes long.
+    int32_t        type;    ///< Numeric type identifier.
+    uint32_t       length;  ///< Length in bytes of data.
+    UT_hash_handle hh;      ///< uthash linkage.
 } mediaTagEntry;

+/** \struct aaruformatContext
+ *  \brief Master context representing an open or in‑creation Aaru image.
+ *
+ *  Contains stream handle, parsed headers, deduplication structures, optical extras, metadata blocks, checksum
+ *  information, caches, and write-state. Allocate with library factory (or zero‑init + explicit open) and destroy
+ *  with corresponding close/free routine.
+ *
+ *  Field grouping:
+ *   - Core & header: magic, library*Version, imageStream, header.
+ *   - Optical sector adjuncts: sectorPrefix/sectorSuffix/subchannel plus corrected variants & mode2Subheaders.
+ *   - Deduplication: inMemoryDdt, userDataDdt*, userDataDdtHeader, mini/big/cached secondary arrays, version tags.
+ *   - Metadata & geometry: geometryBlock, metadataBlockHeader+metadataBlock, cicmBlockHeader+cicmBlock, tracksHeader.
+ *   - Tracks & hardware: trackEntries, dataTracks, dumpHardwareHeader, dumpHardwareEntriesWithData.
+ *   - Integrity & ECC: checksums, eccCdContext, crc64Context.
+ *   - Index & dedup lookup: indexEntries (UT_array of IndexEntry), sectorHashMap (duplicate detection), deduplicate
+ * flag.
+ *   - Write path: isWriting, currentBlockHeader, writingBuffer(+position/offset), nextBlockPosition.
+ *
+ *  Notes:
+ *   - userDataDdt points to memory-mapped or fully loaded DDT (legacy path); userDataDdtMini / userDataDdtBig
+ * supersede.
+ *   - shift retained for backward compatibility with earlier single‑level address shift semantics.
+ *   - mappedMemoryDdtSize is meaningful only if userDataDdt references an mmapped region.
+ */
 typedef struct aaruformatContext
 {
-    uint64_t                            magic;
-    uint8_t                             libraryMajorVersion;
-    uint8_t                             libraryMinorVersion;
-    FILE                               *imageStream;
-    AaruHeaderV2                        header;
-    uint8_t                            *sectorPrefix;
-    uint8_t                            *sectorPrefixCorrected;
-    uint8_t                            *sectorSuffix;
-    uint8_t                            *sectorSuffixCorrected;
-    uint8_t                            *sectorSubchannel;
-    uint8_t                            *mode2Subheaders;
-    uint8_t                             shift;
-    bool                                inMemoryDdt;
-    uint64_t                           *userDataDdt;
-    size_t                              mappedMemoryDdtSize;
-    uint32_t                           *sectorPrefixDdt;
-    uint32_t                           *sectorSuffixDdt;
-    GeometryBlockHeader                 geometryBlock;
-    MetadataBlockHeader                 metadataBlockHeader;
-    uint8_t                            *metadataBlock;
-    TracksHeader                        tracksHeader;
-    TrackEntry                         *trackEntries;
-    CicmMetadataBlock                   cicmBlockHeader;
-    uint8_t                            *cicmBlock;
-    DumpHardwareHeader                  dumpHardwareHeader;
-    struct DumpHardwareEntriesWithData *dumpHardwareEntriesWithData;
-    ImageInfo                           imageInfo;
-    CdEccContext                       *eccCdContext;
-    uint8_t                             numberOfDataTracks;
-    TrackEntry                         *dataTracks;
-    bool                               *readableSectorTags;
-    struct CacheHeader                  blockHeaderCache;
-    struct CacheHeader                  blockCache;
-    Checksums                           checksums;
-    mediaTagEntry                      *mediaTags;
-    DdtHeader2                          userDataDdtHeader;
-    int                                 ddtVersion;
-    uint16_t                           *userDataDdtMini;
-    uint32_t                           *userDataDdtBig;
-    uint16_t                           *sectorPrefixDdtMini;
-    uint16_t                           *sectorSuffixDdtMini;
-    uint64_t                            cachedDdtOffset;
-    uint64_t                            cachedDdtPosition;
-    uint64_t                            primaryDdtOffset;
-    uint16_t                           *cachedSecondaryDdtSmall;
-    uint32_t                           *cachedSecondaryDdtBig;
-    bool                                isWriting;
-    BlockHeader                         currentBlockHeader;
-    uint8_t                            *writingBuffer;
-    int                                 currentBlockOffset;
-    crc64_ctx                          *crc64Context;
-    int                                 writingBufferPosition;
-    uint64_t                            nextBlockPosition;
-    UT_array                           *indexEntries;
-    hash_map_t                         *sectorHashMap;
-    bool                                deduplicate;
+    uint64_t     magic;                ///< File magic (AARU_MAGIC) post-open.
+    uint8_t      libraryMajorVersion;  ///< Linked library major version.
+    uint8_t      libraryMinorVersion;  ///< Linked library minor version.
+    FILE        *imageStream;          ///< Underlying FILE* stream (binary mode).
+    AaruHeaderV2 header;               ///< Parsed container header (v2).
+
+    /* Optical auxiliary buffers (NULL if not present) */
+    uint8_t *sectorPrefix;           ///< Raw per-sector prefix (e.g., sync+header) uncorrected.
+    uint8_t *sectorPrefixCorrected;  ///< Corrected variant (post error correction) if stored.
+    uint8_t *sectorSuffix;           ///< Raw per-sector suffix (EDC/ECC) uncorrected.
+    uint8_t *sectorSuffixCorrected;  ///< Corrected suffix if stored separately.
+    uint8_t *sectorSubchannel;       ///< Raw 96-byte subchannel (if captured).
+    uint8_t *mode2Subheaders;        ///< MODE2 Form1/Form2 8-byte subheaders (concatenated).
+
+    uint8_t   shift;                ///< Legacy overall shift (deprecated by data_shift/table_shift).
+    bool      inMemoryDdt;          ///< True if primary (and possibly secondary) DDT loaded.
+    uint64_t *userDataDdt;          ///< Legacy flat DDT pointer (NULL when using v2 mini/big arrays).
+    size_t    mappedMemoryDdtSize;  ///< Length of mmapped DDT if userDataDdt is mmapped.
+    uint32_t *sectorPrefixDdt;      ///< Legacy CD sector prefix DDT (deprecated by *_Mini/Big).
+    uint32_t *sectorSuffixDdt;      ///< Legacy CD sector suffix DDT.
+
+    GeometryBlockHeader                 geometryBlock;        ///< Logical geometry block (if present).
+    MetadataBlockHeader                 metadataBlockHeader;  ///< Metadata block header.
+    uint8_t                            *metadataBlock;        ///< Raw metadata UTF-16LE concatenated strings.
+    TracksHeader                        tracksHeader;         ///< Tracks header (optical) if present.
+    TrackEntry                         *trackEntries;         ///< Full track list (tracksHeader.entries elements).
+    CicmMetadataBlock                   cicmBlockHeader;      ///< CICM metadata header (if present).
+    uint8_t                            *cicmBlock;            ///< CICM XML payload.
+    DumpHardwareHeader                  dumpHardwareHeader;   ///< Dump hardware header.
+    struct DumpHardwareEntriesWithData *dumpHardwareEntriesWithData;  ///< Array of dump hardware entries + strings.
+    ImageInfo                           imageInfo;                    ///< Exposed high-level image info summary.
+
+    CdEccContext *eccCdContext;        ///< CD ECC/EDC helper tables (allocated on demand).
+    uint8_t       numberOfDataTracks;  ///< Count of tracks considered "data" (sequence 1..99 heuristics).
+    TrackEntry   *dataTracks;          ///< Filtered list of data tracks (subset of trackEntries).
+    bool         *readableSectorTags;  ///< Per-sector boolean array (optical tags read successfully?).
+
+    struct CacheHeader blockHeaderCache;  ///< LRU/Cache header for block headers.
+    struct CacheHeader blockCache;        ///< LRU/Cache header for block payloads.
+
+    Checksums      checksums;  ///< Whole-image checksums discovered.
+    mediaTagEntry *mediaTags;  ///< Hash table of extra media tags (uthash root).
+
+    DdtHeader2 userDataDdtHeader;    ///< Active user data DDT v2 header (primary table meta).
+    int        ddtVersion;           ///< DDT version in use (1=legacy, 2=v2 hierarchical).
+    uint16_t  *userDataDdtMini;      ///< DDT entries (small variant) primary/secondary current.
+    uint32_t  *userDataDdtBig;       ///< DDT entries (big variant) primary/secondary current.
+    uint16_t  *sectorPrefixDdtMini;  ///< CD sector prefix corrected DDT (small) if present.
+    uint16_t  *sectorSuffixDdtMini;  ///< CD sector suffix corrected DDT (small) if present.
+
+    uint64_t  cachedDdtOffset;          ///< File offset of currently cached secondary DDT (0=none).
+    uint64_t  cachedDdtPosition;        ///< Position index of cached secondary DDT.
+    uint64_t  primaryDdtOffset;         ///< File offset of the primary DDT v2 table.
+    uint16_t *cachedSecondaryDdtSmall;  ///< Cached secondary table (small entries) or NULL.
+    uint32_t *cachedSecondaryDdtBig;    ///< Cached secondary table (big entries) or NULL.
+
+    bool        isWriting;              ///< True if context opened/created for writing.
+    BlockHeader currentBlockHeader;     ///< Header for block currently being assembled (write path).
+    uint8_t    *writingBuffer;          ///< Accumulation buffer for current block data.
+    int         currentBlockOffset;     ///< Logical offset inside block (units: bytes or sectors depending on path).
+    crc64_ctx  *crc64Context;           ///< Opaque CRC64 context for streaming updates.
+    int         writingBufferPosition;  ///< Current size / position within writingBuffer.
+    uint64_t    nextBlockPosition;      ///< Absolute file offset where next block will be written.
+
+    UT_array   *indexEntries;   ///< Flattened index entries (UT_array of IndexEntry).
+    hash_map_t *sectorHashMap;  ///< Deduplication hash map (fingerprint->entry mapping).
+    bool        deduplicate;    ///< Storage deduplication active (duplicates coalesce).
 } aaruformatContext;

+/** \struct DumpHardwareEntriesWithData
+ *  \brief In-memory representation of a dump hardware entry plus decoded variable-length fields & extents.
+ *
+ *  All string pointers are NUL-terminated UTF-8 copies of on-disk data (or NULL if absent). extents array may be NULL
+ *  when no ranges were recorded. Freed during context teardown.
+ */
 typedef struct DumpHardwareEntriesWithData
 {
-    DumpHardwareEntry  entry;
-    struct DumpExtent *extents;
-    uint8_t           *manufacturer;
-    uint8_t           *model;
-    uint8_t           *revision;
-    uint8_t           *firmware;
-    uint8_t           *serial;
-    uint8_t           *softwareName;
-    uint8_t           *softwareVersion;
-    uint8_t           *softwareOperatingSystem;
+    DumpHardwareEntry  entry;                    ///< Fixed-size header with lengths & counts.
+    struct DumpExtent *extents;                  ///< Array of extents (entry.extents elements) or NULL.
+    uint8_t           *manufacturer;             ///< Manufacturer string (UTF-8) or NULL.
+    uint8_t           *model;                    ///< Model string or NULL.
+    uint8_t           *revision;                 ///< Hardware revision string or NULL.
+    uint8_t           *firmware;                 ///< Firmware version string or NULL.
+    uint8_t           *serial;                   ///< Serial number string or NULL.
+    uint8_t           *softwareName;             ///< Dump software name or NULL.
+    uint8_t           *softwareVersion;          ///< Dump software version or NULL.
+    uint8_t           *softwareOperatingSystem;  ///< Host operating system string or NULL.
 } DumpHardwareEntriesWithData;

 #pragma pack(push, 1)

+/** \struct DumpExtent
+ *  \brief Inclusive [start,end] logical sector range contributed by a single hardware environment.
+ */
 typedef struct DumpExtent
 {
-    uint64_t start;
-    uint64_t end;
+    uint64_t start;  ///< Starting LBA (inclusive).
+    uint64_t end;    ///< Ending LBA (inclusive); >= start.
 } DumpExtent;

 #pragma pack(pop)
--- a/include/aaruformat/crc64.h
+++ b/include/aaruformat/crc64.h
@@ -20,11 +20,49 @@
 #define LIBAARUFORMAT_CRC64_H
 #include <stdint.h>

+/** \file aaruformat/crc64.h
+ *  \brief CRC64 (ECMA-182) core context and precomputed slicing-by-4 tables.
+ *
+ *  Exposes:
+ *   - \ref crc64_ctx: minimal incremental state (initialize crc to CRC64_ECMA_SEED).
+ *   - crc64_table[4][256]: 4-way (slicing-by-4) lookup tables for high-throughput updates.
+ *   - CRC64_ECMA_POLY / CRC64_ECMA_SEED macros matching ECMA-182 (reflected polynomial, all-bits-set seed).
+ *
+ *  Algorithm characteristics:
+ *   - Polynomial: 0xC96C5795D7870F42 (reflected form).
+ *   - Seed / initial value: 0xFFFFFFFFFFFFFFFFULL.
+ *   - Final XOR: none (raw accumulator is the result).
+ *   - Bit order: reflected; least significant bit processed first.
+ *
+ *  Table layout & optimization:
+ *   Four 256-entry tables are used (slicing-by-4) allowing 4-byte chunks to be folded per iteration, reducing data
+ *   dependency chains compared to a single-table approach. This improves throughput on modern CPUs with abundant ILP.
+ *
+ *  Incremental usage (pseudo-code):
+ *  \code{.c}
+ *  crc64_ctx ctx = { .crc = CRC64_ECMA_SEED };
+ *  ctx.crc = crc64_update(ctx.crc, buf, len); // internal helper using crc64_table
+ *  // ctx.crc now holds ECMA-182 CRC64 value.
+ *  \endcode
+ *
+ *  Thread safety: The table is read-only; each thread must use its own crc64_ctx.
+ *  Endianness: Table values are host-endian 64-bit constants; algorithm result is endianness-agnostic.
+ */
+
+/** \struct crc64_ctx
+ *  \brief Minimal ECMA-182 CRC64 incremental state container (running value only).
+ */
 typedef struct
 {
-    uint64_t crc;
+    uint64_t crc;  ///< Running CRC value (initialize to CRC64_ECMA_SEED before first update).
 } crc64_ctx;

+/** \var crc64_table
+ *  \brief Precomputed slicing-by-4 ECMA-182 CRC64 lookup tables (4 * 256 * 8 = 8192 bytes).
+ *
+ *  Each row corresponds to one byte lane in a 4-byte block update; actual folding logic resides in the implementation.
+ *  Content generated offline; do not modify manually.
+ */
 const static uint64_t crc64_table[4][256] = {
    {0x0000000000000000, 0xB32E4CBE03A75F6F, 0xF4843657A840A05B, 0x47AA7AE9ABE7FF34, 0x7BD0C384FF8F5E33,
     0xC8FE8F3AFC28015C, 0x8F54F5D357CFFE68, 0x3C7AB96D5468A107, 0xF7A18709FF1EBC66, 0x448FCBB7FCB9E309,
@@ -236,7 +274,9 @@ const static uint64_t crc64_table[4][256] = {
     0x1E5CD90C6EC2440D}
 };

-#define CRC64_ECMA_POLY 0xC96C5795D7870F42
-#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
+/** ECMA-182 reflected polynomial constant. */
+#define CRC64_ECMA_POLY 0xC96C5795D7870F42ULL
+/** ECMA-182 initial seed (all bits set). */
+#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFFULL

 #endif  // LIBAARUFORMAT_CRC64_H
--- a/include/aaruformat/errors.h
+++ b/include/aaruformat/errors.h
@@ -19,35 +19,136 @@
 #ifndef LIBAARUFORMAT_ERRORS_H
 #define LIBAARUFORMAT_ERRORS_H

-#define AARUF_ERROR_NOT_AARUFORMAT            (-1)
-#define AARUF_ERROR_FILE_TOO_SMALL            (-2)
-#define AARUF_ERROR_INCOMPATIBLE_VERSION      (-3)
-#define AARUF_ERROR_CANNOT_READ_INDEX         (-4)
-#define AARUF_ERROR_SECTOR_OUT_OF_BOUNDS      (-5)
-#define AARUF_ERROR_CANNOT_READ_HEADER        (-6)
-#define AARUF_ERROR_CANNOT_READ_BLOCK         (-7)
-#define AARUF_ERROR_UNSUPPORTED_COMPRESSION   (-8)
-#define AARUF_ERROR_NOT_ENOUGH_MEMORY         (-9)
-#define AARUF_ERROR_BUFFER_TOO_SMALL          (-10)
-#define AARUF_ERROR_MEDIA_TAG_NOT_PRESENT     (-11)
-#define AARUF_ERROR_INCORRECT_MEDIA_TYPE      (-12)
-#define AARUF_ERROR_TRACK_NOT_FOUND           (-13)
-#define AARUF_ERROR_REACHED_UNREACHABLE_CODE  (-14)
-#define AARUF_ERROR_INVALID_TRACK_FORMAT      (-15)
-#define AARUF_ERROR_SECTOR_TAG_NOT_PRESENT    (-16)
-#define AARUF_ERROR_CANNOT_DECOMPRESS_BLOCK   (-17)
-#define AARUF_ERROR_INVALID_BLOCK_CRC         (-18)
-#define AARUF_ERROR_CANNOT_CREATE_FILE        (-19)
-#define AARUF_ERROR_INVALID_APP_NAME_LENGTH   (-20)
-#define AARUF_ERROR_CANNOT_WRITE_HEADER       (-21)
-#define AARUF_READ_ONLY                       (-22)
-#define AARUF_ERROR_CANNOT_WRITE_BLOCK_HEADER (-23)
-#define AARUF_ERROR_CANNOT_WRITE_BLOCK_DATA   (-24)
-#define AARUF_ERROR_CANNOT_SET_DDT_ENTRY      (-25)
+/** \file aaruformat/errors.h
+ *  \brief Public error and status code definitions for libaaruformat.
+ *
+ *  Negative values represent fatal / non-recoverable error conditions returned by library functions.
+ *  Non-negative values (>=0) are either success (0) or sector-level status annotations used when
+ *  decoding per-sector metadata (e.g. a sector not dumped or with corrected/unrecoverable errors).
+ *
+ *  Usage guidelines:
+ *   - Always test for < 0 to check generic failure without enumerating all codes.
+ *   - Use exact comparisons for caller-specific handling (e.g. retry on AARUF_ERROR_CANNOT_READ_BLOCK).
+ *   - Sector status codes are never returned as fatal function results; they appear in output parameters
+ *     populated by read/identify routines.
+ *
+ *  Helper: see aaruformat_error_string() for a human-readable textual description suitable for logs.
+ */

-#define AARUF_STATUS_OK                 0
-#define AARUF_STATUS_SECTOR_NOT_DUMPED  1
-#define AARUF_STATUS_SECTOR_WITH_ERRORS 2
-#define AARUF_STATUS_SECTOR_DELETED     3
+/** \name Fatal / library-level error codes (negative)
+ *  @{ */
+#define AARUF_ERROR_NOT_AARUFORMAT            (-1)   ///< Input file/stream failed magic or structural validation.
+#define AARUF_ERROR_FILE_TOO_SMALL            (-2)   ///< File size insufficient for mandatory header / structures.
+#define AARUF_ERROR_INCOMPATIBLE_VERSION      (-3)   ///< Image uses a newer incompatible on-disk version.
+#define AARUF_ERROR_CANNOT_READ_INDEX         (-4)   ///< Index block unreadable / truncated / bad identifier.
+#define AARUF_ERROR_SECTOR_OUT_OF_BOUNDS      (-5)   ///< Requested logical sector outside media bounds.
+#define AARUF_ERROR_CANNOT_READ_HEADER        (-6)   ///< Failed to read container header.
+#define AARUF_ERROR_CANNOT_READ_BLOCK         (-7)   ///< Generic block read failure (seek/read error).
+#define AARUF_ERROR_UNSUPPORTED_COMPRESSION   (-8)   ///< Block marked with unsupported compression algorithm.
+#define AARUF_ERROR_NOT_ENOUGH_MEMORY         (-9)   ///< Memory allocation failure (critical).
+#define AARUF_ERROR_BUFFER_TOO_SMALL          (-10)  ///< Caller-supplied buffer insufficient for data.
+#define AARUF_ERROR_MEDIA_TAG_NOT_PRESENT     (-11)  ///< Requested media tag absent.
+#define AARUF_ERROR_INCORRECT_MEDIA_TYPE      (-12)  ///< Operation incompatible with image media type.
+#define AARUF_ERROR_TRACK_NOT_FOUND           (-13)  ///< Referenced track number not present.
+#define AARUF_ERROR_REACHED_UNREACHABLE_CODE  (-14)  ///< Internal logic assertion hit unexpected path.
+#define AARUF_ERROR_INVALID_TRACK_FORMAT      (-15)  ///< Track metadata internally inconsistent or malformed.
+#define AARUF_ERROR_SECTOR_TAG_NOT_PRESENT    (-16)  ///< Requested sector tag (e.g. subchannel/prefix) not stored.
+#define AARUF_ERROR_CANNOT_DECOMPRESS_BLOCK   (-17)  ///< Decompression routine failed or size mismatch.
+#define AARUF_ERROR_INVALID_BLOCK_CRC         (-18)  ///< CRC64 mismatch indicating corruption.
+#define AARUF_ERROR_CANNOT_CREATE_FILE        (-19)  ///< Output file could not be created / opened for write.
+#define AARUF_ERROR_INVALID_APP_NAME_LENGTH   (-20)  ///< Application name field length invalid (sanity limit).
+#define AARUF_ERROR_CANNOT_WRITE_HEADER       (-21)  ///< Failure writing container header.
+#define AARUF_READ_ONLY                       (-22)  ///< Operation requires write mode but context is read-only.
+#define AARUF_ERROR_CANNOT_WRITE_BLOCK_HEADER (-23)  ///< Failure writing block header.
+#define AARUF_ERROR_CANNOT_WRITE_BLOCK_DATA   (-24)  ///< Failure writing block payload.
+#define AARUF_ERROR_CANNOT_SET_DDT_ENTRY      (-25)  ///< Failed to encode/store a DDT entry (overflow or IO).
+/** @} */
+
+/** \name Non-fatal sector status codes (non-negative)
+ *  Returned through output parameters to describe individual sector state.
+ *  @{ */
+#define AARUF_STATUS_OK                 0  ///< Sector present and read without uncorrectable errors.
+#define AARUF_STATUS_SECTOR_NOT_DUMPED  1  ///< Sector not captured (gap / missing / intentionally skipped).
+#define AARUF_STATUS_SECTOR_WITH_ERRORS 2  ///< Sector present but with unrecoverable or flagged errors.
+#define AARUF_STATUS_SECTOR_DELETED     3  ///< Sector logically marked deleted (e.g. filesystem deleted area).
+
+/** @} */
+
+/** \brief Convert an AaruFormat error or status code to a static human-readable string.
+ *
+ *  Designed for diagnostics / logging; returns a constant string literal. Unknown codes yield
+ *  "Unknown error/status". This helper is inline to avoid adding a separate translation unit.
+ *
+ *  \param code Error (<0) or status (>=0) numeric code.
+ *  \return Constant C string describing the code.
+ */
+static inline const char *aaruformat_error_string(int code)
+{
+    switch(code)
+    {
+        /* Errors */
+        case AARUF_ERROR_NOT_AARUFORMAT:
+            return "Not an AaruFormat image";
+        case AARUF_ERROR_FILE_TOO_SMALL:
+            return "File too small";
+        case AARUF_ERROR_INCOMPATIBLE_VERSION:
+            return "Incompatible image version";
+        case AARUF_ERROR_CANNOT_READ_INDEX:
+            return "Cannot read index";
+        case AARUF_ERROR_SECTOR_OUT_OF_BOUNDS:
+            return "Sector out of bounds";
+        case AARUF_ERROR_CANNOT_READ_HEADER:
+            return "Cannot read header";
+        case AARUF_ERROR_CANNOT_READ_BLOCK:
+            return "Cannot read block";
+        case AARUF_ERROR_UNSUPPORTED_COMPRESSION:
+            return "Unsupported compression";
+        case AARUF_ERROR_NOT_ENOUGH_MEMORY:
+            return "Not enough memory";
+        case AARUF_ERROR_BUFFER_TOO_SMALL:
+            return "Buffer too small";
+        case AARUF_ERROR_MEDIA_TAG_NOT_PRESENT:
+            return "Media tag not present";
+        case AARUF_ERROR_INCORRECT_MEDIA_TYPE:
+            return "Incorrect media type";
+        case AARUF_ERROR_TRACK_NOT_FOUND:
+            return "Track not found";
+        case AARUF_ERROR_REACHED_UNREACHABLE_CODE:
+            return "Internal unreachable code reached";
+        case AARUF_ERROR_INVALID_TRACK_FORMAT:
+            return "Invalid track format";
+        case AARUF_ERROR_SECTOR_TAG_NOT_PRESENT:
+            return "Sector tag not present";
+        case AARUF_ERROR_CANNOT_DECOMPRESS_BLOCK:
+            return "Cannot decompress block";
+        case AARUF_ERROR_INVALID_BLOCK_CRC:
+            return "Invalid block CRC";
+        case AARUF_ERROR_CANNOT_CREATE_FILE:
+            return "Cannot create file";
+        case AARUF_ERROR_INVALID_APP_NAME_LENGTH:
+            return "Invalid application name length";
+        case AARUF_ERROR_CANNOT_WRITE_HEADER:
+            return "Cannot write header";
+        case AARUF_READ_ONLY:
+            return "Read-only context";
+        case AARUF_ERROR_CANNOT_WRITE_BLOCK_HEADER:
+            return "Cannot write block header";
+        case AARUF_ERROR_CANNOT_WRITE_BLOCK_DATA:
+            return "Cannot write block data";
+        case AARUF_ERROR_CANNOT_SET_DDT_ENTRY:
+            return "Cannot set DDT entry";
+
+        /* Status */
+        case AARUF_STATUS_OK:
+            return "OK";
+        case AARUF_STATUS_SECTOR_NOT_DUMPED:
+            return "Sector not dumped";
+        case AARUF_STATUS_SECTOR_WITH_ERRORS:
+            return "Sector with errors";
+        case AARUF_STATUS_SECTOR_DELETED:
+            return "Sector deleted";
+    }
+    return "Unknown error/status";
+}

 #endif  // LIBAARUFORMAT_ERRORS_H
--- a/include/aaruformat/hash_map.h
+++ b/include/aaruformat/hash_map.h
@@ -22,22 +22,40 @@
 #include <stdbool.h>
 #include <stdlib.h>

+/** \struct kv_pair_t
+ *  \brief Single key/value slot used internally by the open-addressing hash map.
+ *
+ *  Collision resolution strategy (implementation detail): linear or quadratic probing (see source). An empty
+ *  slot is typically represented by a key sentinel (e.g. 0 or another reserved value) – callers never interact
+ *  with individual kv_pair_t entries directly; they are managed through the map API.
+ */
 typedef struct
 {
-    uint64_t key;
-    uint64_t value;
+    uint64_t key;    ///< Stored key (64-bit). May use a reserved sentinel to denote an empty slot.
+    uint64_t value;  ///< Associated value payload (64-bit) stored alongside the key.
 } kv_pair_t;

+/** \struct hash_map_t
+ *  \brief Minimal open-addressing hash map for 64-bit key/value pairs used in deduplication lookup.
+ *
+ *  Fields:
+ *   - table: Pointer to contiguous array of kv_pair_t entries (capacity == size).
+ *   - size:  Total number of slots allocated in table (must be >= 1).
+ *   - count: Number of occupied (non-empty) slots currently in use.
+ *
+ *  Load factor guidance: insert performance degrades as count approaches size; callers may rebuild with a larger
+ *  size when (count * 10 / size) exceeds a chosen threshold (e.g. 70 – 80%). No automatic resizing is performed.
+ */
 typedef struct
 {
-    kv_pair_t *table;
-    size_t     size;
-    size_t     count;
+    kv_pair_t *table;  ///< Array of key/value slots of length == size.
+    size_t     size;   ///< Allocated slot capacity of table.
+    size_t     count;  ///< Number of active (filled) entries.
 } hash_map_t;

 hash_map_t *create_map(size_t size);
-void free_map(hash_map_t *map);
-bool insert_map(hash_map_t *map, uint64_t key, uint64_t value);
-bool lookup_map(const hash_map_t *map, uint64_t key, uint64_t *out_value);
+void        free_map(hash_map_t *map);
+bool        insert_map(hash_map_t *map, uint64_t key, uint64_t value);
+bool        lookup_map(const hash_map_t *map, uint64_t key, uint64_t *out_value);

 #endif  // LIBAARUFORMAT_HASH_MAP_H
--- a/include/aaruformat/lru.h
+++ b/include/aaruformat/lru.h
@@ -8,49 +8,49 @@
 #include <stdint.h>
 #include <uthash.h>

+/** \struct CacheEntry
+ *  \brief Single hash entry in the in-memory cache.
+ *
+ *  This structure is managed by uthash (open addressing with chaining semantics provided by macros).
+ *  It represents one key/value association tracked by the cache. The cache implementation supports
+ *  both string keys (null-terminated) and 64-bit numeric keys; numeric keys are stored by casting
+ *  to a temporary string buffer upstream (see implementation). Callers do not allocate or free
+ *  individual entries directly; use the cache API helpers.
+ *
+ *  Lifetime & ownership:
+ *   - key points either to a heap-allocated C string owned by the cache or to a short-lived buffer
+ *     duplicated internally; callers must not free it after insertion.
+ *   - value is an opaque pointer supplied by caller; the cache does not take ownership of the pointee
+ *     (caller remains responsible for the underlying object unless documented otherwise).
+ */
 struct CacheEntry
 {
-    char          *key;
-    void          *value;
-    UT_hash_handle hh;
+    char          *key;    ///< Null-terminated key string (unique within the cache). May encode numeric keys.
+    void          *value;  ///< Opaque value pointer associated with key (not freed automatically on eviction/clear).
+    UT_hash_handle hh;  ///< uthash handle linking this entry into the hash table (must remain last or per uthash docs).
 };

+/** \struct CacheHeader
+ *  \brief Cache top-level descriptor encapsulating the hash table root and capacity limit.
+ *
+ *  The cache enforces an upper bound (max_items) on the number of tracked entries. Insert helpers are expected
+ *  to evict (or refuse) when the limit is exceeded (strategy defined in implementation; current behavior may be
+ *  simple non-evicting if not yet implemented as a true LRU). The cache pointer holds the uthash root (NULL when
+ * empty).
+ *
+ *  Fields:
+ *   - max_items: Maximum number of entries allowed; 0 means "no explicit limit" if accepted by implementation.
+ *   - cache:     uthash root pointer; NULL when the cache is empty.
+ */
 struct CacheHeader
 {
-    uint64_t           max_items;
-    struct CacheEntry *cache;
+    uint64_t max_items;        ///< Hard limit for number of entries (policy: enforce/ignore depends on implementation).
+    struct CacheEntry *cache;  ///< Hash root (uthash). NULL when empty.
 };

-/**
- * Finds an item in the specified cache
- * @param cache Pointer to the cache header
- * @param key Key
- * @return Value if found, NULL if not
- */
 void *find_in_cache(struct CacheHeader *cache, const char *key);
-
-/**
- * Adds an item to the specified cache
- * @param cache Pointer to the cache header
- * @param key Key
- * @param value Value
- */
-void add_to_cache(struct CacheHeader *cache, const char *key, void *value);
-
-/**
- * Finds an item in the specified cache using a 64-bit integer key
- * @param cache Pointer to the cache header
- * @param key Key
- * @return Value if found, NULL if not
- */
+void  add_to_cache(struct CacheHeader *cache, const char *key, void *value);
 void *find_in_cache_uint64(struct CacheHeader *cache, uint64_t key);
-
-/**
- * Adds an item to the specified cache using a 64-bit integer key
- * @param cache Pointer to the cache header
- * @param key Key
- * @param value Value
- */
-void add_to_cache_uint64(struct CacheHeader *cache, uint64_t key, void *value);
+void  add_to_cache_uint64(struct CacheHeader *cache, uint64_t key, void *value);

 #endif  // LIBAARUFORMAT_LRU_H
--- a/include/aaruformat/structs/checksum.h
+++ b/include/aaruformat/structs/checksum.h
@@ -19,29 +19,80 @@
 #ifndef LIBAARUFORMAT_CHECKSUM_H
 #define LIBAARUFORMAT_CHECKSUM_H

+#include <stdint.h>  // Fixed-width integer types for on-disk structures.
+
 #pragma pack(push, 1)

 /**
- *     Checksum block, contains a checksum of all user data sectors (except for optical discs that is 2352 uint8_ts raw
- *     sector if available
- *  */
-typedef struct ChecksumHeader {
-    /**Identifier, <see cref="BlockType.ChecksumBlock" /> */
-    uint32_t identifier;
-    /**Length in uint8_ts of the block */
-    uint32_t length;
-    /**How many checksums follow */
-    uint8_t entries;
+ * \file aaruformat/structs/checksum.h
+ * \brief On-disk layout definitions for the checksum block (BlockType::ChecksumBlock).
+ *
+ * A checksum block stores one or more whole-image (user data) checksums. For optical media the
+ * user data definition follows the format's raw sector rules (e.g. 2352-byte raw sector when available).
+ *
+ * Binary layout (all integers are little-endian, structure is packed):
+ *
+ *  +------------------------------+-------------------------------+
+ *  | Field                        | Size (bytes)                  |
+ *  +==============================+===============================+
+ *  | ChecksumHeader               | sizeof(ChecksumHeader)=9      |
+ *  |   identifier                 | 4 (BlockType::ChecksumBlock)  |
+ *  |   length                     | 4 (payload bytes that follow)|
+ *  |   entries                    | 1 (number of checksum entries)|
+ *  +------------------------------+-------------------------------+
+ *  | Repeated for each entry:                                     |
+ *  |   ChecksumEntry              | sizeof(ChecksumEntry)=5       |
+ *  |     type                     | 1 (ChecksumAlgorithm)         |
+ *  |     length                   | 4 (digest length)             |
+ *  |   digest bytes               | length                        |
+ *  +------------------------------+-------------------------------+
+ *
+ * Thus, the payload size (ChecksumHeader.length) MUST equal the sum over all entries of:
+ *   sizeof(ChecksumEntry) + entry.length.
+ *
+ * Typical digest lengths:
+ *  - Md5: 16 bytes
+ *  - Sha1: 20 bytes
+ *  - Sha256: 32 bytes
+ *  - SpamSum: variable length ASCII, NOT null-terminated on disk (a terminating '\0' may be appended in memory).
+ *
+ * \warning The structures are packed; never rely on host compiler default padding or directly casting from a buffer
+ *          without ensuring correct endianness if porting to big-endian systems (current implementation assumes LE).
+ *
+ * \see BlockType
+ * \see ChecksumAlgorithm
+ */
+
+/**
+ * \struct ChecksumHeader
+ * \brief Header that precedes the sequence of checksum entries for a checksum block.
+ *
+ * After this header, exactly \ref ChecksumHeader::length bytes follow containing \ref ChecksumHeader::entries
+ * consecutive \ref ChecksumEntry records, each immediately followed by its digest payload.
+ */
+typedef struct ChecksumHeader
+{
+    uint32_t identifier;  ///< Block identifier, must be BlockType::ChecksumBlock.
+    uint32_t length;      ///< Length in bytes of the payload (all entries + their digest data, excluding this header).
+    uint8_t  entries;     ///< Number of checksum entries that follow in the payload.
 } ChecksumHeader;

-/**Checksum entry, followed by checksum data itself */
-typedef struct ChecksumEntry {
-    /**Checksum algorithm */
-    uint8_t type;
-    /**Length in uint8_ts of checksum that follows this structure */
-    uint32_t length;
+/**
+ * \struct ChecksumEntry
+ * \brief Per-checksum metadata immediately followed by the digest / signature bytes.
+ *
+ * For fixed-length algorithms the \ref length MUST match the known digest size. For SpamSum it is variable.
+ * The bytes immediately following this structure (not null-terminated) constitute the digest and are exactly
+ * \ref length bytes long.
+ *
+ * Order of entries is not mandated; readers should scan all entries and match by \ref type.
+ */
+typedef struct ChecksumEntry
+{
+    uint8_t  type;    ///< Algorithm used (value from \ref ChecksumAlgorithm).
+    uint32_t length;  ///< Length in bytes of the digest that immediately follows this structure.
 } ChecksumEntry;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_CHECKSUM_H
+#endif  // LIBAARUFORMAT_CHECKSUM_H
--- a/include/aaruformat/structs/data.h
+++ b/include/aaruformat/structs/data.h
@@ -19,37 +19,82 @@
 #ifndef LIBAARUFORMAT_DATA_H
 #define LIBAARUFORMAT_DATA_H

+#include <stdint.h>  // Fixed width integer types used in on-disk packed structs.
+
 #pragma pack(push, 1)

-/**Block header, precedes block data */
-typedef struct BlockHeader {
-    /**Identifier, <see cref="BlockType.DataBlock" /> */
-    uint32_t identifier;
-    /**Type of data contained by this block */
-    uint16_t type;
-    /**Compression algorithm used to compress the block */
-    uint16_t compression;
-    /**Size in uint8_ts of each sector contained in this block */
-    uint32_t sectorSize;
-    /**Compressed length for the block */
-    uint32_t cmpLength;
-    /**Uncompressed length for the block */
-    uint32_t length;
-    /**CRC64-ECMA of the compressed block */
-    uint64_t cmpCrc64;
-    /**CRC64-ECMA of the uncompressed block */
-    uint64_t crc64;
+/**
+ * \file aaruformat/structs/data.h
+ * \brief On-disk layout structures for data-bearing and geometry blocks.
+ *
+ * These packed structures describe the headers that precede variable-length payloads
+ * inside blocks whose identifiers are enumerated in \ref BlockType.
+ * All integer fields are stored little-endian on disk. The library currently assumes a
+ * little-endian host; if ported to a big-endian architecture explicit byte swapping will be required.
+ *
+ * Layout of a data block (BlockType::DataBlock):
+ *   BlockHeader (sizeof(BlockHeader) bytes)
+ *   Compressed payload (cmpLength bytes)
+ *
+ * Payload decoding:
+ *   - Apply the algorithm indicated by \ref BlockHeader::compression (\ref CompressionType) to the
+ *     cmpLength bytes following the header to obtain exactly \ref BlockHeader::length bytes.
+ *   - The uncompressed data MUST be an integer multiple of \ref BlockHeader::sectorSize.
+ *   - A CRC64-ECMA is provided for both compressed (cmpCrc64) and uncompressed (crc64) forms to allow
+ *     validation at either stage of the pipeline.
+ *
+ * Geometry block (BlockType::GeometryBlock) has a \ref GeometryBlockHeader followed by no additional
+ * fixed payload in the current format version; it conveys legacy CHS-style logical geometry metadata.
+ *
+ * \warning These structs are packed; do not take their address and assume natural alignment.
+ * \see BlockType
+ * \see DataType
+ * \see CompressionType
+ */
+
+/**
+ * \struct BlockHeader
+ * \brief Header preceding the compressed data payload of a data block (BlockType::DataBlock).
+ *
+ * Invariants:
+ *  - cmpLength > 0 unless length == 0 (empty block)
+ *  - length == 0 implies cmpLength == 0
+ *  - If compression == CompressionType::None then cmpLength == length
+ *  - length % sectorSize == 0
+ *
+ * Validation strategy (recommended for readers):
+ *  1. Verify identifier == BlockType::DataBlock.
+ *  2. Verify sectorSize is non-zero and a power-of-two or a commonly used size (512/1024/2048/4096/2352).
+ *  3. Verify invariants above and CRCs after (de)compression.
+ */
+typedef struct BlockHeader
+{
+    uint32_t identifier;   ///< Block identifier, must be BlockType::DataBlock.
+    uint16_t type;         ///< Logical data classification (value from \ref DataType).
+    uint16_t compression;  ///< Compression algorithm used (value from \ref CompressionType).
+    uint32_t sectorSize;   ///< Size in bytes of each logical sector represented in this block.
+    uint32_t cmpLength;    ///< Size in bytes of the compressed payload immediately following this header.
+    uint32_t length;       ///< Size in bytes of the uncompressed payload resulting after decompression.
+    uint64_t cmpCrc64;     ///< CRC64-ECMA of the compressed payload (cmpLength bytes).
+    uint64_t crc64;        ///< CRC64-ECMA of the uncompressed payload (length bytes).
 } BlockHeader;

-/**Geometry block, contains physical geometry information */
-typedef struct GeometryBlockHeader {
-    /**Identifier, <see cref="BlockType.GeometryBlock" /> */
-    uint32_t identifier;
-    uint32_t cylinders;
-    uint32_t heads;
-    uint32_t sectorsPerTrack;
+/**
+ * \struct GeometryBlockHeader
+ * \brief Legacy CHS style logical geometry metadata (BlockType::GeometryBlock).
+ *
+ * Total logical sectors implied by this header is cylinders * heads * sectorsPerTrack.
+ * Sector size is not included here and must be derived from context (e.g., accompanying metadata
+ * or defaulting to 512 for many block devices).
+ */
+typedef struct GeometryBlockHeader
+{
+    uint32_t identifier;       ///< Block identifier, must be BlockType::GeometryBlock.
+    uint32_t cylinders;        ///< Number of cylinders.
+    uint32_t heads;            ///< Number of heads (tracks per cylinder).
+    uint32_t sectorsPerTrack;  ///< Number of sectors per track.
 } GeometryBlockHeader;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_DATA_H
+#endif  // LIBAARUFORMAT_DATA_H
--- a/include/aaruformat/structs/ddt.h
+++ b/include/aaruformat/structs/ddt.h
@@ -19,71 +19,149 @@
 #ifndef LIBAARUFORMAT_DDT_H
 #define LIBAARUFORMAT_DDT_H

+#include <stdint.h>  // fixed-width types for on-disk layout
+
 #pragma pack(push, 1)

-/**Header for a deduplication table. Table follows it */
+/** \file aaruformat/structs/ddt.h
+ *  \brief On-disk headers for Deduplication Data Tables (DDT) versions 1 and 2.
+ *
+ * A DDT maps logical sector indices (LBAs within an image's logical address space) to (block, sector)
+ * pairs plus a base file offset, enabling content de-duplication inside the container. Two generations
+ * exist:
+ *  - DdtHeader  ("version 1") flat table.
+ *  - DdtHeader2 ("version 2") hierarchical, multi-level subtables for scalability.
+ *
+ * All integers are little-endian. Structures are packed (1-byte alignment). When porting to a big-endian
+ * architecture callers must perform byte swapping. Do not rely on compiler-introduced padding.
+ *
+ * Compression of the table body (entries array) follows the same conventions as data blocks: first
+ * decompress according to the compression enum, then validate CRC64 for uncompressed contents.
+ *
+ * Related enumerations:
+ *  - BlockType::DeDuplicationTable / BlockType::DeDuplicationTable2
+ *  - CompressionType
+ *  - DataType
+ *  - DdtSizeType (for DdtHeader2::sizeType)
+ */
+
+/**
+ * \struct DdtHeader
+ * \brief Header preceding a version 1 (flat) deduplication table body.
+ *
+ * Immediately after this header there are \ref entries table records (compressed if \ref compression != None).
+ * Each table record encodes a pointer using an 8-bit file offset component and a sector offset inside a block:
+ *   logicalEntryValue = ((uint64_t)fileByteOffset << shift) + sectorOffsetWithinBlock
+ * where fileByteOffset is measured in bytes (granularity depends on shift) and sectorOffsetWithinBlock is
+ * relative to the start of the referenced data block. The sector size must be taken from the corresponding
+ * data block(s) (see BlockHeader::sectorSize) or higher-level metadata.
+ *
+ * Invariants:
+ *  - cmpLength == length if compression == CompressionType::None
+ *  - length % (entrySize) == 0 after decompression (implementation-defined entry size)
+ *  - entries * entrySize == length
+ *  - entries > 0 implies length > 0
+ */
 typedef struct DdtHeader
 {
-    /**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
-    uint32_t identifier;
-    /**Type of data pointed by this DDT */
-    uint16_t type;
-    /**Compression algorithm used to compress the DDT */
-    uint16_t compression;
-    /**Each entry is ((uint8_t offset in file) &lt;&lt; shift) + (sector offset in block) */
-    uint8_t  shift;
-    /**How many entries are in the table */
-    uint64_t entries;
-    /**Compressed length for the DDT */
-    uint64_t cmpLength;
-    /**Uncompressed length for the DDT */
-    uint64_t length;
-    /**CRC64-ECMA of the compressed DDT */
-    uint64_t cmpCrc64;
-    /**CRC64-ECMA of the uncompressed DDT */
-    uint64_t crc64;
+    uint32_t identifier;   ///< Block identifier, must be BlockType::DeDuplicationTable.
+    uint16_t type;         ///< Data classification (\ref DataType) for sectors referenced by this table.
+    uint16_t compression;  ///< Compression algorithm for the table body (\ref CompressionType).
+    uint8_t  shift;        ///< Left shift applied to per-entry file offset component forming logicalEntryValue.
+    uint64_t entries;      ///< Number of deduplication entries contained in (uncompressed) table.
+    uint64_t cmpLength;    ///< Size in bytes of compressed entries payload.
+    uint64_t length;       ///< Size in bytes of uncompressed entries payload.
+    uint64_t cmpCrc64;     ///< CRC64-ECMA of the compressed payload.
+    uint64_t crc64;        ///< CRC64-ECMA of the uncompressed payload.
 } DdtHeader;

+/**
+ * \struct DdtHeader2
+ * \brief Header preceding a version 2 hierarchical deduplication table.
+ *
+ * Version 2 introduces multi-level tables to efficiently address very large images by subdividing
+ * the logical address space. Tables at higher levels partition regions; leaves contain direct
+ * (block, sector) entry mappings. Navigation uses \ref tableLevel (0 = root) and \ref levels (total depth).
+ *
+ * Logical sector (LBA) mapping (actual implementation in decode_ddt_{single,multi}_level_v2):
+ *  1. Let L be the requested logical sector (can be negative externally). Internal index I = L + negative.
+ *     Valid range: 0 <= I < blocks. (Total user-data sectors often = blocks - negative - overflow.)
+ *  2. If tableShift == 0 (single-level): entryIndex = I.
+ *     Else (multi-level):
+ *        itemsPerPrimaryEntry = 1 << tableShift
+ *        primaryIndex  = I / itemsPerPrimaryEntry
+ *        secondaryIndex = I % itemsPerPrimaryEntry
+ *        The primary table entry at primaryIndex yields a secondary DDT file offset (scaled by 2^blockAlignmentShift),
+ *        whose table entries are then indexed by secondaryIndex.
+ *  3. Read raw DDT entry value E (16-bit if sizeType == SmallDdtSizeType, 32-bit if BigDdtSizeType).
+ *  4. If E == 0: sector_status = SectorStatusNotDumped; offset=block_offset=0.
+ *     Otherwise extract:
+ *        statusBits = E >> 12  (small) or E >> 28 (big)
+ *        baseBits   = E & 0x0FFF (small) or E & 0x0FFFFFFF (big)
+ *        sectorOffsetWithinBlock = baseBits & ((1 << dataShift) - 1)
+ *        blockIndex              = baseBits >> dataShift
+ *        block_offset (bytes)    = blockIndex << blockAlignmentShift
+ *        offset (sector units inside block) = sectorOffsetWithinBlock
+ *  5. The consumer combines block_offset, offset, and the (external) logical sector size to locate data.
+ *
+ * Field roles:
+ *  - negative:   Count of leading negative LBAs supported; added to L to form internal index.
+ *  - overflow:   Count of trailing LBAs beyond the user area upper bound that are still dumped and have
+ *                normal DDT entries (e.g. optical disc lead-out). Symmetrical to 'negative' on the high end.
+ *  - start:      For secondary tables, base internal index covered (written when creating new tables). Current decoding
+ *                logic does not consult this field (future-proof placeholder).
+ *  - blockAlignmentShift: log2 alignment of stored data blocks (byte granularity of block_offset).
+ *  - dataShift:  log2 of the number of addressable sectors per increment of blockIndex bitfield unit.
+ *  - tableShift: log2 of number of logical sectors covered by a single primary-table pointer (multi-level only).
+ *  - sizeType:   Selects entry width (small=16b, big=32b) impacting available bits for blockIndex+offset.
+ *
+ * Notes & current limitations:
+ *  - User area sector count = blocks - negative - overflow.
+ *  - Valid external LBA range exposed by the image = [-negative, (blocks - negative - 1)].
+ *    * Negative range: [-negative, -1]
+ *    * User area range: [0, (blocks - negative - overflow - 1)]
+ *    * Overflow range: [(blocks - negative - overflow), (blocks - negative - 1)]
+ *  - Both negative and overflow ranges are stored with normal DDT entries (if present), enabling complete
+ *    reproduction of lead-in / lead-out or similar padding regions.
+ *  - start is presently ignored during decoding; integrity checks against it may be added in future revisions.
+ *  - No masking is applied to I besides array bounds; callers must ensure L is within representable range.
+ *
+ * Example (Compact Disc):
+ *  Disc has 360000 user sectors. Lead-in captured as 15000 negative sectors and lead-out as 15000 overflow sectors.
+ *    negative = 15000
+ *    overflow = 15000
+ *    user sectors = 360000
+ *    blocks (internal span) = negative + user + overflow = 390000
+ *    External LBA spans: -15000 .. 374999
+ *      * Negative: -15000 .. -1 (15000 sectors)
+ *      * User:      0 .. 359999 (360000 sectors)
+ *      * Overflow:  360000 .. 374999 (15000 sectors)
+ *  Internal index I for any external L is I = L + negative.
+ *  User area sector count reported to callers (ctx->imageInfo.Sectors) = blocks - negative - overflow = 360000.
+ */
 typedef struct DdtHeader2
 {
-    /**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
-    uint32_t identifier;
-    /**Type of data pointed by this DDT */
-    uint16_t type;
-    /**Compression algorithm used to compress the DDT */
-    uint16_t compression;
-    /**How many levels of subtables are present */
-    uint8_t  levels;
-    /**Which level this table belongs to */
-    uint8_t  tableLevel;
-    /**Pointer to absolute byte offset in file where the previous level table is located */
-    uint64_t previousLevelOffset;
-    /**Negative displacement of LBAs */
-    uint16_t negative;
-    /**Number of blocks in media */
-    uint64_t blocks;
-    /**Positive overflow displacement of LBAs */
-    uint16_t overflow;
-    /**First LBA contained in this table */
-    uint64_t start;
-    /**Block alignment boundaries */
-    uint8_t  blockAlignmentShift;
-    /**Data shift */
-    uint8_t  dataShift;
-    /**Table shift */
-    uint8_t  tableShift;
-    /**Size type */
-    uint8_t  sizeType;
-    /**Entries in this table */
-    uint64_t entries;
-    /**Compressed length for the DDT */
-    uint64_t cmpLength;
-    /**Uncompressed length for the DDT */
-    uint64_t length;
-    /**CRC64-ECMA of the compressed DDT */
-    uint64_t cmpCrc64;
-    /**CRC64-ECMA of the uncompressed DDT */
-    uint64_t crc64;
+    uint32_t identifier;           ///< Block identifier, must be BlockType::DeDuplicationTable2.
+    uint16_t type;                 ///< Data classification (\ref DataType) for sectors referenced by this table.
+    uint16_t compression;          ///< Compression algorithm for this table body (\ref CompressionType).
+    uint8_t  levels;               ///< Total number of hierarchy levels (root depth); > 0.
+    uint8_t  tableLevel;           ///< Zero-based level index of this table (0 = root, increases downward).
+    uint64_t previousLevelOffset;  ///< Absolute byte offset of the parent (previous) level table; 0 if root.
+    uint16_t negative;             ///< Leading negative LBA count; added to external L to build internal index.
+    uint64_t blocks;               ///< Total internal span (negative + usable + overflow) in logical sectors.
+    uint16_t overflow;  ///< Trailing dumped sectors beyond user area (overflow range), still mapped with entries.
+    uint64_t
+            start;  ///< Base internal index covered by this table (used for secondary tables; currently informational).
+    uint8_t blockAlignmentShift;  ///< 2^blockAlignmentShift = block alignment boundary in bytes.
+    uint8_t dataShift;            ///< 2^dataShift = sectors represented per increment in blockIndex field.
+    uint8_t tableShift;  ///< 2^tableShift = number of logical sectors per primary entry (multi-level only; 0 for
+                         ///< single-level or secondary tables).
+    uint8_t sizeType;    ///< Entry size variant (\ref DdtSizeType) controlling width of E.
+    uint64_t entries;    ///< Number of entries contained in (uncompressed) table payload.
+    uint64_t cmpLength;  ///< Compressed payload size in bytes.
+    uint64_t length;     ///< Uncompressed payload size in bytes.
+    uint64_t cmpCrc64;   ///< CRC64-ECMA of compressed table payload.
+    uint64_t crc64;      ///< CRC64-ECMA of uncompressed table payload.
 } DdtHeader2;

 #pragma pack(pop)
--- a/include/aaruformat/structs/dump.h
+++ b/include/aaruformat/structs/dump.h
@@ -19,42 +19,109 @@
 #ifndef LIBAARUFORMAT_DUMP_H
 #define LIBAARUFORMAT_DUMP_H

+#include <stdint.h> /* Fixed-width integer types for on‑disk packed structures */
+
 #pragma pack(push, 1)

-/**Dump hardware block, contains a list of hardware used to dump the media on this image */
-typedef struct DumpHardwareHeader {
-    /**Identifier, <see cref="BlockType.DumpHardwareBlock" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint16_t entries;
-    /**Size of the whole block, not including this header, in uint8_ts */
-    uint32_t length;
-    /**CRC64-ECMA of the block */
-    uint64_t crc64;
+/** \file aaruformat/structs/dump.h
+ *  \brief Packed on-disk structures describing hardware and software used during image acquisition.
+ *
+ *  A Dump Hardware block (identifier = BlockType::DumpHardwareBlock) records one or more dump "environments" –
+ *  typically combinations of a physical device (drive, controller, adapter) and the software stack that
+ *  performed the read operation. Each environment is represented by a \ref DumpHardwareEntry followed by a
+ *  sequence of UTF‑8 strings and an optional array of extent ranges (\ref DumpExtent, defined in context.h) that
+ *  delimit portions of the medium this environment contributed to.
+ *
+ *  Binary layout (little-endian, packed, all multi-byte integers LE):
+ *
+ *    DumpHardwareHeader (sizeof = 16 bytes)
+ *      identifier  (4)  -> BlockType::DumpHardwareBlock
+ *      entries     (2)  -> number of following hardware entries
+ *      length      (4)  -> total bytes of payload that follow this header
+ *      crc64       (8)  -> CRC64-ECMA of the payload bytes
+ *
+ *    Repeated for i in [0, entries):
+ *      DumpHardwareEntry (36 bytes)
+ *        manufacturerLength (4)
+ *        modelLength        (4)
+ *        revisionLength     (4)
+ *        firmwareLength     (4)
+ *        serialLength       (4)
+ *        softwareNameLength (4)
+ *        softwareVersionLength (4)
+ *        softwareOperatingSystemLength (4)
+ *        extents (4) -> number of DumpExtent structs after the strings
+ *
+ *      Variable-length UTF-8 strings (not NUL-terminated on disk) appear immediately after the entry, in the
+ *      exact order of the length fields above; each string is present only if its length > 0. The reader allocates
+ *      an extra byte to append '\0' for in-memory convenience.
+ *
+ *      Array of 'extents' DumpExtent structures (each 16 bytes: start, end) follows the strings if extents > 0.
+ *      The semantic of each extent is an inclusive [start, end] logical sector (or unit) range contributed by
+ *      this hardware/software combination.
+ *
+ *  CRC semantics:
+ *   - crc64 covers exactly 'length' bytes immediately following the header.
+ *   - For legacy images with header.imageMajorVersion <= AARUF_VERSION_V1 the original C# writer produced a
+ *     byte-swapped CRC; the library compensates internally (see process_dumphw_block()).
+ *
+ *  Invariants / validation recommendations:
+ *   - identifier == BlockType::DumpHardwareBlock
+ *   - Accumulated size of all (entry + strings + extents arrays) == length
+ *   - All length fields are trusted only after bounds checking against remaining payload bytes
+ *   - Strings are raw UTF-8 data with no implicit terminator
+ *   - extents * sizeof(DumpExtent) fits inside remaining payload
+ *
+ *  Memory management notes (runtime library):
+ *   - Each string is malloc'ed with +1 byte for terminator during processing.
+ *   - Extents array is malloc'ed per entry when extents > 0.
+ *   - See aaruformatContext::dumpHardwareEntriesWithData for owning pointers.
+ *
+ *  \warning Structures are packed; never rely on natural alignment when mapping from a byte buffer.
+ *  \see DumpHardwareHeader
+ *  \see DumpHardwareEntry
+ *  \see DumpExtent (in context.h)
+ *  \see BlockType
+ */
+
+/** \struct DumpHardwareHeader
+ *  \brief Header that precedes a sequence of dump hardware entries and their variable-length payload.
+ */
+typedef struct DumpHardwareHeader
+{
+    uint32_t identifier;  ///< Block identifier, must be BlockType::DumpHardwareBlock.
+    uint16_t entries;     ///< Number of DumpHardwareEntry records that follow.
+    uint32_t length;      ///< Total payload bytes after this header (sum of entries, strings, and extents arrays).
+    uint64_t crc64;       ///< CRC64-ECMA of the payload (byte-swapped for legacy v1 images, handled automatically).
 } DumpHardwareHeader;

-/**Dump hardware entry, contains length of strings that follow, in the same order as the length, this structure */
-typedef struct DumpHardwareEntry {
-    /**Length of UTF-8 manufacturer string */
-    uint32_t manufacturerLength;
-    /**Length of UTF-8 model string */
-    uint32_t modelLength;
-    /**Length of UTF-8 revision string */
-    uint32_t revisionLength;
-    /**Length of UTF-8 firmware version string */
-    uint32_t firmwareLength;
-    /**Length of UTF-8 serial string */
-    uint32_t serialLength;
-    /**Length of UTF-8 software name string */
-    uint32_t softwareNameLength;
-    /**Length of UTF-8 software version string */
-    uint32_t softwareVersionLength;
-    /**Length of UTF-8 software operating system string */
-    uint32_t softwareOperatingSystemLength;
-    /**How many extents are after the strings */
-    uint32_t extents;
+/** \struct DumpHardwareEntry
+ *  \brief Per-environment length table describing subsequent UTF-8 strings and optional extent array.
+ *
+ *  Immediately after this structure the variable-length UTF‑8 strings appear in the documented order, each
+ *  present only if its corresponding length is non-zero. No padding is present between strings. When all
+ *  strings are consumed, an array of \ref DumpExtent follows if \ref extents > 0.
+ *
+ *  All length fields measure bytes (not characters) and exclude any in-memory NUL terminator added by the reader.
+ *
+ *  Typical semantics:
+ *   - manufacturer/model/revision/firmware/serial identify the hardware device.
+ *   - softwareName/softwareVersion/softwareOperatingSystem identify the acquisition software environment.
+ *   - extents list which logical ranges this environment actually dumped (useful for multi-device composites).
+ */
+typedef struct DumpHardwareEntry
+{
+    uint32_t manufacturerLength;             ///< Length in bytes of manufacturer UTF-8 string.
+    uint32_t modelLength;                    ///< Length in bytes of model UTF-8 string.
+    uint32_t revisionLength;                 ///< Length in bytes of revision / hardware revision string.
+    uint32_t firmwareLength;                 ///< Length in bytes of firmware version string.
+    uint32_t serialLength;                   ///< Length in bytes of device serial number string.
+    uint32_t softwareNameLength;             ///< Length in bytes of dumping software name string.
+    uint32_t softwareVersionLength;          ///< Length in bytes of dumping software version string.
+    uint32_t softwareOperatingSystemLength;  ///< Length in bytes of host operating system string.
+    uint32_t extents;                        ///< Number of DumpExtent records following the strings (0 = none).
 } DumpHardwareEntry;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_DUMP_H
+#endif  // LIBAARUFORMAT_DUMP_H
--- a/include/aaruformat/structs/header.h
+++ b/include/aaruformat/structs/header.h
@@ -19,73 +19,111 @@
 #ifndef LIBAARUFORMAT_HEADER_H
 #define LIBAARUFORMAT_HEADER_H

-#define AARU_HEADER_APP_NAME_LEN 64
-#define GUID_SIZE 16
+/** \file aaruformat/structs/header.h
+ *  \brief On-disk container header structures (v1 and v2) for Aaru images.
+ *
+ *  These packed headers appear at the very beginning (offset 0) of every Aaru image file and
+ *  advertise container format version, creator application, indexing offset and optional extended
+ *  feature capability bitfields (v2+). All multi-byte integers are little-endian. Strings stored
+ *  in the fixed-size application field are UTF‑16LE and zero padded (not necessarily NUL-terminated
+ *  if fully filled). The GUID field (v2) allows derivative / child images to reference an origin.
+ *
+ *  Version progression:
+ *   - v1: \ref AaruHeader (no GUID, no alignment or shift metadata, no feature bitfields).
+ *   - v2: \ref AaruHeaderV2 introduces GUID, block/data/table shift hints (mirroring DDT metadata),
+ *         and three 64‑bit feature bitmaps to negotiate reader/writer compatibility.
+ *
+ *  Compatibility handling (recommended logic for consumers):
+ *   1. If any bit set in featureIncompatible is not implemented by the reader: abort (cannot safely read/write).
+ *   2. Else if any bit set in featureCompatibleRo is not implemented: allow read‑only operations.
+ *   3. Bits only present in featureCompatible but not implemented MAY be ignored for both read/write while
+ *      still preserving round‑trip capability (writer should not clear unknown bits when re‑saving).
+ *
+ *  Alignment & shift semantics (duplicated here for quick reference, see DdtHeader2 for full details):
+ *   - blockAlignmentShift: underlying blocks are aligned to 2^blockAlignmentShift bytes.
+ *   - dataShift: data pointer / DDT entry low bits encode offsets modulo 2^dataShift sectors/items.
+ *   - tableShift: primary DDT entries span 2^tableShift logical sectors (0 implies single-level tables).
+ *
+ *  Invariants:
+ *   - identifier == AARU_MAGIC (external constant; not defined here).
+ *   - For v1: sizeof(AaruHeader) exact and indexOffset > 0 (indexOffset == 0 => corrupt/unreadable image).
+ *   - For v2: sizeof(AaruHeaderV2) exact; indexOffset > 0; blockAlignmentShift, dataShift, tableShift within
+ *             sane bounds (e.g. < 63). Zero is permissible only for the shift fields (not for indexOffset).
+ *
+ *  Security / robustness considerations:
+ *   - Always bounds-check indexOffset against file size before seeking.
+ *   - Treat application field as untrusted UTF‑16LE; validate surrogate pairs if necessary.
+ *   - Unknown feature bits MUST be preserved if a file is rewritten to avoid capability loss.
+ */
+
+#define AARU_HEADER_APP_NAME_LEN 64 /**< Size in bytes (UTF-16LE) of application name field (32 UTF-16 code units). */
+#define GUID_SIZE                16 /**< Size in bytes of GUID / UUID-like binary identifier. */

 #pragma pack(push, 1)

-/**Header, at start of file */
-typedef struct AaruHeader {
-    /**Header identifier, <see cref="AARU_MAGIC" /> */
-    uint64_t identifier;
-    /**UTF-16LE name of the application that created the image */
-    uint8_t application[AARU_HEADER_APP_NAME_LEN];
-    /**Image format major version. A new major version means a possibly incompatible change of format */
-    uint8_t imageMajorVersion;
-    /**Image format minor version. A new minor version indicates a compatible change of format */
-    uint8_t imageMinorVersion;
-    /**Major version of the application that created the image */
-    uint8_t applicationMajorVersion;
-    /**Minor version of the application that created the image */
-    uint8_t applicationMinorVersion;
-    /**Type of media contained on image */
-    uint32_t mediaType;
-    /**Offset to index */
-    uint64_t indexOffset;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
-    int64_t creationTime;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
-    int64_t lastWrittenTime;
+/** \struct AaruHeader
+ *  \brief Version 1 container header placed at offset 0 for legacy / initial format.
+ *
+ *  Field summary:
+ *   - identifier: magic signature (AARU_MAGIC) identifying the container.
+ *   - application: UTF‑16LE creator application name (fixed 64 bytes, zero padded).
+ *   - imageMajorVersion / imageMinorVersion: container format version of the file itself (not the app).
+ *   - applicationMajorVersion / applicationMinorVersion: version of the creating application.
+ *   - mediaType: media type enumeration (\ref MediaType).
+ *   - indexOffset: byte offset to the first index block (must be > 0).
+ *   - creationTime / lastWrittenTime: 64-bit Windows FILETIME timestamps (100 ns intervals since 1601-01-01 UTC).
+ */
+typedef struct AaruHeader
+{
+    uint64_t identifier;                             ///< File magic (AARU_MAGIC).
+    uint8_t  application[AARU_HEADER_APP_NAME_LEN];  ///< UTF-16LE creator application name (fixed-size buffer).
+    uint8_t  imageMajorVersion;        ///< Container format major version (incompatible changes when incremented).
+    uint8_t  imageMinorVersion;        ///< Container format minor version (backward compatible evolutions).
+    uint8_t  applicationMajorVersion;  ///< Creator application major version.
+    uint8_t  applicationMinorVersion;  ///< Creator application minor / patch version.
+    uint32_t mediaType;                ///< Media type enumeration (value from \ref MediaType).
+    uint64_t indexOffset;      ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
+    int64_t  creationTime;     ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
+    int64_t  lastWrittenTime;  ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
 } AaruHeader;

-/**Header, at start of file */
-typedef struct AaruHeaderV2 {
-    /**Header identifier, see AARU_MAGIC */
-    uint64_t identifier;
-    /**UTF-16LE name of the application that created the image */
-    uint8_t application[AARU_HEADER_APP_NAME_LEN];
-    /**Image format major version. A new major version means a possibly incompatible change of format */
-    uint8_t imageMajorVersion;
-    /**Image format minor version. A new minor version indicates a compatible change of format */
-    uint8_t imageMinorVersion;
-    /**Major version of the application that created the image */
-    uint8_t applicationMajorVersion;
-    /**Minor version of the application that created the image */
-    uint8_t applicationMinorVersion;
-    /**Type of media contained on image */
-    uint32_t mediaType;
-    /**Offset to index */
-    uint64_t indexOffset;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
-    int64_t creationTime;
-    /**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
-    int64_t lastWrittenTime;
-    /**Unique identifier that allows children images to recognize and find this image.*/
-    uint8_t guid[GUID_SIZE];
-    /**Block alignment shift. All blocks in the image are aligned at 2 << blockAlignmentShift bytes */
-    uint8_t blockAlignmentShift;
-    /**Data shift. All data blocks in the image contain 2 << dataShift items at most */
-    uint8_t dataShift;
-    /**Table shift. All deduplication tables in the image use this shift to calculate the position of an item */
-    uint8_t tableShift;
-    /**Features used in this image that if unsupported are still compatible for reading and writing implementations */
-    uint64_t featureCompatible;
-    /**Features used in this image that if unsupported are still compatible for reading implementations but not for writing */
-    uint64_t featureCompatibleRo;
-    /**Featured used in this image that if unsupported prevent reading or writing the image*/
-    uint64_t featureIncompatible;
+/** \struct AaruHeaderV2
+ *  \brief Version 2 container header with GUID, alignment shifts, and feature negotiation bitmaps.
+ *
+ *  Additions over v1:
+ *   - guid: stable 128-bit identifier enabling linkage by derivative images.
+ *   - blockAlignmentShift / dataShift / tableShift: global structural hints copied into data & DDT blocks.
+ *   - featureCompatible / featureCompatibleRo / featureIncompatible: capability bitmasks.
+ *
+ *  Feature bitmask semantics:
+ *   - featureCompatible: Optional features; absence of implementation should not impact R/W correctness.
+ *   - featureCompatibleRo: If unimplemented, image MAY be opened read-only.
+ *   - featureIncompatible: If any bit unimplemented, image MUST NOT be opened (prevent misinterpretation).
+ *
+ *  Readers should AND their supported bit set with the header masks to decide access level (see file
+ *  documentation). Writers must preserve unknown bits when saving an existing image.
+ */
+typedef struct AaruHeaderV2
+{
+    uint64_t identifier;                             ///< File magic (AARU_MAGIC).
+    uint8_t  application[AARU_HEADER_APP_NAME_LEN];  ///< UTF-16LE creator application name (fixed 64 bytes).
+    uint8_t  imageMajorVersion;                      ///< Container format major version.
+    uint8_t  imageMinorVersion;                      ///< Container format minor version.
+    uint8_t  applicationMajorVersion;                ///< Creator application major version.
+    uint8_t  applicationMinorVersion;                ///< Creator application minor / patch version.
+    uint32_t mediaType;                              ///< Media type enumeration (value from \ref MediaType).
+    uint64_t indexOffset;      ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
+    int64_t  creationTime;     ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
+    int64_t  lastWrittenTime;  ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
+    uint8_t  guid[GUID_SIZE];  ///< 128-bit image GUID (binary, not text); stable across children.
+    uint8_t  blockAlignmentShift;  ///< log2 block alignment (block size alignment = 2^blockAlignmentShift bytes).
+    uint8_t  dataShift;            ///< log2 sectors/items per block-index increment in DDT entries (2^dataShift).
+    uint8_t  tableShift;           ///< log2 sectors spanned by each primary DDT entry (0 = single-level).
+    uint64_t featureCompatible;    ///< Feature bits: unimplemented bits are ignorable (still R/W safe).
+    uint64_t featureCompatibleRo;  ///< Feature bits: unimplemented -> degrade to read-only access.
+    uint64_t featureIncompatible;  ///< Feature bits: any unimplemented -> abort (cannot open safely).
 } AaruHeaderV2;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_HEADER_H
+#endif  // LIBAARUFORMAT_HEADER_H
--- a/include/aaruformat/structs/index.h
+++ b/include/aaruformat/structs/index.h
@@ -21,50 +21,95 @@

 #pragma pack(push, 1)

-/**Header for the index, followed by entries */
+/** \file aaruformat/structs/index.h
+ *  \brief On‑disk index block header and entry structures (versions 1, 2 and 3).
+ *
+ *  The index provides a directory of all blocks contained in an Aaru image. Each index block starts with
+ *  a versioned header (IndexHeader / IndexHeader2 / IndexHeader3) followed by a contiguous array of
+ *  fixed‑size \ref IndexEntry records. Version 3 adds support for hierarchical (chained / nested) subindexes.
+ *
+ *  Version mapping by block identifier (see \ref BlockType):
+ *   - IndexBlock  (v1) -> \ref IndexHeader  followed by 16‑bit entry count entries.
+ *   - IndexBlock2 (v2) -> \ref IndexHeader2 followed by 64‑bit entry count entries.
+ *   - IndexBlock3 (v3) -> \ref IndexHeader3 with optional hierarchical subindex references.
+ *
+ *  CRC coverage & endianness:
+ *   - The crc64 field stores a CRC64-ECMA over the entries array ONLY (header bytes are excluded).
+ *   - For images with imageMajorVersion <= AARUF_VERSION_V1 a legacy writer byte-swapped the CRC; readers
+ *     compensate (see verify_index_v1/v2/v3). The value in the header remains whatever was originally written.
+ *
+ *  Hierarchical (v3) behavior:
+ *   - Entries whose blockType == IndexBlock3 refer to subindex blocks; readers recursively load and flatten.
+ *   - IndexHeader3::previous can point to a preceding index segment (for append / incremental scenarios) or 0.
+ *   - CRC of the main index does NOT cover subindex contents; each subindex has its own header + CRC.
+ *
+ *  Invariants / validation recommendations:
+ *   - identifier must equal the expected BlockType variant for that version.
+ *   - entries > 0 implies the entries array byte size == entries * sizeof(IndexEntry).
+ *   - crc64 must match recomputed CRC64( entries array ) (after legacy byte swap handling if required).
+ *   - For v3, if previous != 0 it should point to another IndexBlock3 header (optional best‑effort check).
+ *
+ *  Notes:
+ *   - Structures are packed (1‑byte alignment). All multi-byte integers are little‑endian on disk.
+ *   - The index does not store per-entry CRC; integrity relies on each individual block's own CRC plus the index CRC.
+ *   - dataType in \ref IndexEntry is meaningful only for block types that carry typed data (e.g. DataBlock,
+ * DumpHardwareBlock, etc.).
+ *
+ *  See also: verify_index_v1(), verify_index_v2(), verify_index_v3() for integrity procedures.
+ */
+
+/** \struct IndexHeader
+ *  \brief Index header (version 1) for legacy images (identifier == IndexBlock).
+ *
+ *  Uses a 16‑bit entry counter limiting the number of indexable blocks in v1.
+ */
 typedef struct IndexHeader
 {
-    /**Identifier, <see cref="BlockType.Index" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint16_t entries;
-    /**CRC64-ECMA of the index */
-    uint64_t crc64;
+    uint32_t identifier;  ///< Block identifier (must be BlockType::IndexBlock).
+    uint16_t entries;     ///< Number of \ref IndexEntry records that follow immediately.
+    uint64_t crc64;       ///< CRC64-ECMA of the entries array (legacy byte-swapped for early images).
 } IndexHeader;

-/**Header for the index, followed by entries */
+/** \struct IndexHeader2
+ *  \brief Index header (version 2) with 64‑bit entry counter (identifier == IndexBlock2).
+ *
+ *  Enlarges the entry count field to 64 bits for large images; otherwise structurally identical to v1.
+ */
 typedef struct IndexHeader2
 {
-    /**Identifier, <see cref="BlockType.Index" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint64_t entries;
-    /**CRC64-ECMA of the index */
-    uint64_t crc64;
+    uint32_t identifier;  ///< Block identifier (must be BlockType::IndexBlock2).
+    uint64_t entries;     ///< Number of \ref IndexEntry records that follow immediately.
+    uint64_t crc64;  ///< CRC64-ECMA of the entries array (legacy byte-swapped rule still applies for old versions).
 } IndexHeader2;

-/**Header for the index, followed by entries */
+/** \struct IndexHeader3
+ *  \brief Index header (version 3) adding hierarchical chaining (identifier == IndexBlock3).
+ *
+ *  Supports flattened hierarchical indexes: entries referencing additional IndexBlock3 subindexes.
+ *  The 'previous' pointer allows chaining earlier index segments (e.g., incremental append) enabling
+ *  cumulative discovery without rewriting earlier headers.
+ */
 typedef struct IndexHeader3
 {
-    /**Identifier, <see cref="BlockType.Index" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint64_t entries;
-    /**CRC64-ECMA of the index */
-    uint64_t crc64;
-    /**Pointer to the previous index header */
-    uint64_t previous;
+    uint32_t identifier;  ///< Block identifier (must be BlockType::IndexBlock3).
+    uint64_t entries;     ///< Number of \ref IndexEntry records that follow in this (sub)index block.
+    uint64_t crc64;       ///< CRC64-ECMA of the local entries array (does NOT cover subindexes or previous chains).
+    uint64_t previous;    ///< File offset of a previous IndexBlock3 header (0 if none / root segment).
 } IndexHeader3;

-/**Index entry */
+/** \struct IndexEntry
+ *  \brief Single index entry describing a block's type, (optional) data classification, and file offset.
+ *
+ *  Semantics by blockType (see \ref BlockType):
+ *   - DataBlock / GeometryBlock / ChecksumBlock / etc.: dataType conveys specific stored data category (\ref DataType).
+ *   - Deduplication (DDT) or Index blocks: dataType may be ignored or set to a sentinel.
+ *   - IndexBlock3: this entry refers to a subindex; offset points to another IndexHeader3.
+ */
 typedef struct IndexEntry
 {
-    /**Type of item pointed by this entry */
-    uint32_t blockType;
-    /**Type of data contained by the block pointed by this entry */
-    uint16_t dataType;
-    /**Offset in file where item is stored */
-    uint64_t offset;
+    uint32_t blockType;  ///< Block identifier of the referenced block (value from \ref BlockType).
+    uint16_t dataType;   ///< Data classification (value from \ref DataType) or unused for untyped blocks.
+    uint64_t offset;     ///< Absolute byte offset in the image where the referenced block header begins.
 } IndexEntry;

 #pragma pack(pop)
--- a/include/aaruformat/structs/metadata.h
+++ b/include/aaruformat/structs/metadata.h
@@ -21,73 +21,95 @@

 #pragma pack(push, 1)

-/**Metadata block, contains metadata */
-typedef struct MetadataBlockHeader {
-    /**Identifier, <see cref="BlockType.MetadataBlock" /> */
-    uint32_t identifier;
-    /**Size in uint8_ts of this whole metadata block */
-    uint32_t blockSize;
-    /**Sequence of media set this media belongs to */
-    int32_t mediaSequence;
-    /**Total number of media on the media set this media belongs to */
-    int32_t lastMediaSequence;
-    /**Offset to start of creator string from start of this block */
-    uint32_t creatorOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t creatorLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t commentsOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t commentsLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaTitleOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaTitleLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaManufacturerOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaManufacturerLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaModelOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaModelLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaSerialNumberOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaSerialNumberLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaBarcodeOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaBarcodeLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t mediaPartNumberOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t mediaPartNumberLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveManufacturerOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveManufacturerLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveModelOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveModelLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveSerialNumberOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveSerialNumberLength;
-    /**Offset to start of creator string from start of this block */
-    uint32_t driveFirmwareRevisionOffset;
-    /**Length in uint8_ts of the null-terminated UTF-16LE creator string */
-    uint32_t driveFirmwareRevisionLength;
+/** \file aaruformat/structs/metadata.h
+ *  \brief Packed on-disk metadata block headers for descriptive strings and CICM XML (if present).
+ *
+ *  Two metadata-related block header layouts are defined:
+ *   - \ref MetadataBlockHeader (BlockType::MetadataBlock): offsets + lengths for several UTF-16LE strings.
+ *   - \ref CicmMetadataBlock (BlockType::CicmBlock): length of embedded CICM XML metadata payload.
+ *
+ *  All multi-byte integers are little-endian. Structures are packed (1-byte alignment). All textual fields
+ *  referenced by offsets are UTF-16LE, null-terminated (0x0000). Length fields include the terminating
+ *  null (i.e. length >= 2 and an even number). Offsets are relative to the start of the corresponding block
+ *  header (byte 0 = first byte of the header). No padding is implicitly added between strings; producers
+ *  may pack them tightly or align them manually (alignment not required by the specification).
+ *
+ *  Metadata block layout (conceptual):
+ *    MetadataBlockHeader (fixed size)
+ *    <variable region holding each present UTF-16LE string in any order chosen by the writer>
+ *
+ *  Invariants / validation recommendations for MetadataBlockHeader:
+ *   - identifier == BlockType::MetadataBlock
+ *   - blockSize >= sizeof(MetadataBlockHeader)
+ *   - For every (offset,length) pair where length > 0:
+ *       * offset >= sizeof(MetadataBlockHeader)
+ *       * offset + length <= blockSize
+ *       * length % 2 == 0
+ *       * The 16-bit code unit at (offset + length - 2) == 0x0000 (null terminator)
+ *   - mediaSequence >= 0 and lastMediaSequence >= 0; if lastMediaSequence > 0 then 0 <= mediaSequence <
+ * lastMediaSequence
+ *
+ *  CICM metadata block layout:
+ *    CicmMetadataBlock (header)
+ *    <length bytes of UTF-8 or XML text payload (implementation-defined, not null-terminated)>
+ *
+ *  NOTE: The library code reading these blocks must not assume strings are present; a zero length means the
+ *  corresponding field is omitted. Offsets for omitted fields MAY be zero or arbitrary; readers should skip them
+ *  whenever length == 0.
+ */
+
+/** \struct MetadataBlockHeader
+ *  \brief Header for a metadata block containing offsets and lengths to UTF-16LE descriptive strings.
+ *
+ *  Descriptive fields (all optional): creator, comments, media title/manufacturer/model/serial/barcode/part number,
+ *  drive manufacturer/model/serial/firmware revision. Strings can be used to describe both physical medium and
+ *  acquisition hardware. Length values include the UTF-16LE null terminator (two zero bytes).
+ */
+typedef struct MetadataBlockHeader
+{
+    uint32_t identifier;         ///< Block identifier, must be BlockType::MetadataBlock.
+    uint32_t blockSize;          ///< Total size in bytes of the entire metadata block (header + strings).
+    int32_t  mediaSequence;      ///< Sequence number within a multi-disc / multi-volume set (0-based or 1-based as
+                                 ///< producer defines).
+    int32_t  lastMediaSequence;  ///< Total number of media in the set; 0 or 1 if single item.
+    uint32_t creatorOffset;      ///< Offset to UTF-16LE creator string (or undefined if creatorLength==0).
+    uint32_t creatorLength;      ///< Length in bytes (including null) of creator string (0 if absent).
+    uint32_t commentsOffset;     ///< Offset to UTF-16LE comments string.
+    uint32_t commentsLength;     ///< Length in bytes (including null) of comments string.
+    uint32_t mediaTitleOffset;   ///< Offset to UTF-16LE media title string.
+    uint32_t mediaTitleLength;   ///< Length in bytes (including null) of media title string.
+    uint32_t mediaManufacturerOffset;      ///< Offset to UTF-16LE media manufacturer string.
+    uint32_t mediaManufacturerLength;      ///< Length in bytes (including null) of media manufacturer string.
+    uint32_t mediaModelOffset;             ///< Offset to UTF-16LE media model string.
+    uint32_t mediaModelLength;             ///< Length in bytes (including null) of media model string.
+    uint32_t mediaSerialNumberOffset;      ///< Offset to UTF-16LE media serial number string.
+    uint32_t mediaSerialNumberLength;      ///< Length in bytes (including null) of media serial number string.
+    uint32_t mediaBarcodeOffset;           ///< Offset to UTF-16LE media barcode string.
+    uint32_t mediaBarcodeLength;           ///< Length in bytes (including null) of media barcode string.
+    uint32_t mediaPartNumberOffset;        ///< Offset to UTF-16LE media part number string.
+    uint32_t mediaPartNumberLength;        ///< Length in bytes (including null) of media part number string.
+    uint32_t driveManufacturerOffset;      ///< Offset to UTF-16LE drive manufacturer string.
+    uint32_t driveManufacturerLength;      ///< Length in bytes (including null) of drive manufacturer string.
+    uint32_t driveModelOffset;             ///< Offset to UTF-16LE drive model string.
+    uint32_t driveModelLength;             ///< Length in bytes (including null) of drive model string.
+    uint32_t driveSerialNumberOffset;      ///< Offset to UTF-16LE drive serial number string.
+    uint32_t driveSerialNumberLength;      ///< Length in bytes (including null) of drive serial number string.
+    uint32_t driveFirmwareRevisionOffset;  ///< Offset to UTF-16LE drive firmware revision string.
+    uint32_t driveFirmwareRevisionLength;  ///< Length in bytes (including null) of drive firmware revision string.
 } MetadataBlockHeader;

-/**Geometry block, contains physical geometry information */
-typedef struct CicmMetadataBlock {
-    /**Identifier, <see cref="BlockType.CicmBlock" /> */
-    uint32_t identifier;
-    uint32_t length;
+/** \struct CicmMetadataBlock
+ *  \brief Header for a CICM XML metadata block (identifier == BlockType::CicmBlock).
+ *
+ *  The following 'length' bytes immediately after the header contain the CICM XML payload. Encoding is typically
+ *  UTF-8; the payload is not required to be null-terminated.
+ */
+typedef struct CicmMetadataBlock
+{
+    uint32_t identifier;  ///< Block identifier, must be BlockType::CicmBlock.
+    uint32_t length;      ///< Length in bytes of the CICM metadata payload that follows.
 } CicmMetadataBlock;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_METADATA_H
+#endif  // LIBAARUFORMAT_METADATA_H
--- a/include/aaruformat/structs/optical.h
+++ b/include/aaruformat/structs/optical.h
@@ -21,36 +21,65 @@

 #pragma pack(push, 1)

-/**Contains list of optical disc tracks */
-typedef struct TracksHeader {
-    /**Identifier, <see cref="BlockType.TracksBlock" /> */
-    uint32_t identifier;
-    /**How many entries follow this header */
-    uint16_t entries;
-    /**CRC64-ECMA of the block */
-    uint64_t crc64;
+/** \file aaruformat/structs/optical.h
+ *  \brief On-disk structures describing optical disc tracks (Track list block).
+ *
+ *  An optical tracks block (identifier == BlockType::TracksBlock) stores a list of \ref TrackEntry
+ *  records describing the logical layout of tracks and sessions for CD/DVD/BD and similar media.
+ *
+ *  Layout:
+ *    TracksHeader (fixed)
+ *    TrackEntry[ entries ] (array, packed)
+ *
+ *  CRC semantics:
+ *   - TracksHeader::crc64 is a CRC64-ECMA over the contiguous TrackEntry array ONLY (header excluded).
+ *   - For legacy images (imageMajorVersion <= AARUF_VERSION_V1) a byte swap is applied when verifying.
+ *
+ *  Field semantics (TrackEntry):
+ *   - sequence: Logical track number (1..99 typical for CD). Values outside that range may encode extras.
+ *   - type: Value from \ref TrackType (Audio, Data, Mode variants, etc.).
+ *   - start / end: Inclusive Logical Block Address (LBA) bounds for the track. end >= start.
+ *   - pregap: Number of sectors of pre-gap *preceding* the track's first user-accessible sector (can be 0 or negative
+ *             if representing lead-in semantics; negative interpretation is implementation-defined).
+ *   - session: Session number starting at 1 for multi-session discs (1 for single session).
+ *   - isrc: 13-byte ISRC (raw code, no terminating null). If fewer significant characters, remaining bytes are 0.
+ *   - flags: Bitmask of track/control flags. Unless otherwise specified, recommended mapping (mirrors CD subchannel Q
+ *            control bits) is: bit0 Pre-emphasis, bit1 Copy permitted, bit2 Data track, bit3 Four-channel audio,
+ *            bits4-7 reserved. Actual semantics may be extended by the format specification.
+ *
+ *  Invariants / validation recommendations:
+ *   - identifier == BlockType::TracksBlock
+ *   - entries * sizeof(TrackEntry) bytes are present after the header in the block image.
+ *   - 1 <= sequence <= 99 for standard CD tracks (non-conforming values allowed but should be documented).
+ *   - start <= end; pregap >= 0 (if negative pregaps unsupported in implementation).
+ *   - ISRC bytes either all zero (no ISRC) or printable ASCII (A-Z 0-9 -) per ISO 3901 (without hyphen formatting).
+ */
+
+/** \struct TracksHeader
+ *  \brief Header for an optical tracks block listing track entries.
+ */
+typedef struct TracksHeader
+{
+    uint32_t identifier;  ///< Block identifier (must be BlockType::TracksBlock).
+    uint16_t entries;     ///< Number of TrackEntry records following this header.
+    uint64_t crc64;  ///< CRC64-ECMA of the TrackEntry array (header excluded, legacy byte-swap for early versions).
 } TracksHeader;

-/**Optical disc track */
-typedef struct TrackEntry {
-    /**Track sequence */
-    uint8_t sequence;
-    /**Track type */
-    uint8_t type;
-    /**Track starting LBA */
-    int64_t start;
-    /**Track last LBA */
-    int64_t end;
-    /**Track pregap in sectors */
-    int64_t pregap;
-    /**Track session */
-    uint8_t session;
-    /**Track's ISRC in ASCII */
-    uint8_t isrc[13];
-    /**Track flags */
-    uint8_t flags;
+/** \struct TrackEntry
+ *  \brief Single optical disc track descriptor (sequence, type, LBAs, session, ISRC, flags).
+ */
+typedef struct TrackEntry
+{
+    uint8_t sequence;  ///< Track number (1..99 typical for CD audio/data). 0 may indicate placeholder/non-standard.
+    uint8_t type;      ///< Track type (value from \ref TrackType).
+    int64_t start;     ///< Inclusive starting LBA of the track.
+    int64_t end;       ///< Inclusive ending LBA of the track.
+    int64_t pregap;    ///< Pre-gap length in sectors preceding track start (0 if none).
+    uint8_t session;   ///< Session number (1-based). 1 for single-session discs.
+    uint8_t isrc[13];  ///< ISRC raw 13-byte code (no null terminator). All zeros if not present.
+    uint8_t flags;     ///< Control / attribute bitfield (see file documentation for suggested bit mapping).
 } TrackEntry;

 #pragma pack(pop)

-#endif //LIBAARUFORMAT_OPTICAL_H
+#endif  // LIBAARUFORMAT_OPTICAL_H
--- a/include/aaruformat/structs/options.h
+++ b/include/aaruformat/structs/options.h
@@ -19,19 +19,214 @@
 #ifndef LIBAARUFORMAT_OPTIONS_H
 #define LIBAARUFORMAT_OPTIONS_H

+#include <stdbool.h>  ///< For bool type used in aaru_options.
+#include <stdint.h>   ///< For fixed-width integer types.
+
+/** \file aaruformat/structs/options.h
+ *  \brief Image creation / open tuning options structure and related semantics.
+ *
+ *  The library accepts a semicolon-delimited key=value options string (see parse_options()). Recognized keys:
+ *    compress=true|false          Enable/disable block compression (LZMA for data blocks, FLAC for audio tracks).
+ *    deduplicate=true|false       If true, identical (duplicate) sectors are stored once (DDT entries point to same
+ *                                 physical block). If false, duplicates are still tracked in DDT but each occurrence
+ *                                 is stored independently (no storage savings). DDT itself is always present.
+ *    dictionary=<bytes>           LZMA dictionary size in bytes (fallback default 33554432 if 0 or invalid).
+ *    table_shift=<n>              DDT v2 table shift (default 9) (items per primary entry = 2^n when multi-level).
+ *    data_shift=<n>               Global data shift (default 12). Defines per-block address granularity: the low
+ *                                 2^n range encodes the sector (or unit) offset within a block; higher bits combine
+ *                                 with block_alignment to derive block file offsets. Used by DDT but not limited to it.
+ *    block_alignment=<n>          log2 alignment of underlying data blocks (default 9 => 512 bytes) (block size = 2^n).
+ *    md5=true|false               Generate MD5 checksum (stored in checksum block if true).
+ *    sha1=true|false              Generate SHA-1 checksum.
+ *    sha256=true|false            Generate SHA-256 checksum.
+ *    blake3=true|false            Generate BLAKE3 checksum (may require build-time support; ignored if unsupported).
+ *    spamsum=true|false           Generate SpamSum fuzzy hash.
+ *
+ *  Defaults (when option string NULL or key omitted):
+ *    compress=true, deduplicate=true, dictionary=33554432, table_shift=9, data_shift=12,
+ *    block_alignment=9, md5=false, sha1=false, sha256=false, blake3=false, spamsum=false.
+ *
+ *  Validation / normalization done in parse_options():
+ *   - Zero / missing dictionary resets to default 33554432.
+ *   - Zero table_shift resets to 9.
+ *   - Zero data_shift resets to 12.
+ *   - Zero block_alignment resets to 9.
+ *
+ *  Rationale:
+ *   - table_shift, data_shift and block_alignment mirror fields stored in on-disk headers (see AaruHeaderV2 &
+ * DdtHeader2); data_shift is a global per-block granularity exponent (not DDT-specific) governing how in-block offsets
+ * are encoded.
+ *   - compress selects adaptive codec usage: LZMA applied to generic/data blocks, FLAC applied to audio track payloads.
+ *   - deduplicate toggles storage optimization only: the DDT directory is always built for addressing; disabling simply
+ *     forces each sector's content to be written even if already present (useful for forensic byte-for-byte
+ * duplication).
+ *   - dictionary tunes compression ratio/memory use; large values increase memory footprint.
+ *   - Checksums are optional; enabling multiple increases CPU time at write finalization.
+ *
+ *  Performance / space trade-offs (deduplicate=false):
+ *   - Significantly larger image size: every repeated sector payload is written again.
+ *   - Higher write I/O and longer creation time for highly redundant sources (e.g., zero-filled regions) compared to
+ *     deduplicate=true, although CPU time spent on duplicate detection/hash lookups is reduced.
+ *   - Potentially simpler post-process forensic validation (physical ordering preserved without logical coalescing).
+ *   - Use when exact physical repetition is more critical than storage efficiency, or to benchmark raw device
+ * throughput.
+ *   - For typical archival use-cases with large zero / repeated patterns, deduplicate=true markedly reduces footprint.
+ *
+ *  Approximate in-RAM hash map usage for deduplication (deduplicate=true):
+ *   The on-disk DDT can span many secondary tables, but only the primary table plus a currently loaded secondary (and
+ *   possibly a small cache) reside in memory; their footprint is typically <<5% of total indexed media space and is
+ * often negligible compared to the hash map used to detect duplicate sectors. Therefore we focus here on the hash /
+ * lookup structure ("hash_map") memory, not the entire DDT on-disk size.
+ *
+ *   Worst-case (all sectors unique) per 1 GiB of user data:
+ *     sectors_per_GiB = 2^30 / sector_size
+ *     hash_bytes ≈ sectors_per_GiB * H   (H ≈ 16 bytes: 8-byte fingerprint + ~8 bytes map overhead)
+ *
+ *   Resulting hash_map RAM per GiB (unique sectors):
+ *     +--------------+------------------+------------------------------+
+ *     | Sector size  | Sectors / GiB    | Hash map (~16 B / sector)    |
+ *     +--------------+------------------+------------------------------+
+ *     |   512 bytes  | 2,097,152        | ~33.5 MiB  (≈32.0–36.0 MiB)  |
+ *     |  2048 bytes  |   524,288        | ~ 8.0 MiB  (≈7.5–8.5  MiB)   |
+ *     |  4096 bytes  |   262,144        | ~ 4.0 MiB  (≈3.8–4.3  MiB)   |
+ *     +--------------+------------------+------------------------------+
+ *
+ *   (Range reflects allocator + load factor variation.)
+ *
+ *   Targeted projections (hash map only, R=1):
+ *     2048‑byte sectors (~8 MiB per GiB unique)
+ *       Capacity | Hash map (MiB) | Hash map (GiB)
+ *       ---------+---------------+----------------
+ *         25 GiB |     ~200       |   0.20
+ *         50 GiB |     ~400       |   0.39
+ *
+ *     512‑byte sectors (~34 MiB per GiB unique; using 33.5 MiB for calc)
+ *       Capacity | Hash map (MiB) | Hash map (GiB)
+ *       ---------+---------------+----------------
+ *        128 GiB |   ~4288        |   4.19
+ *        500 GiB |  ~16750        |  16.36
+ *      1   TiB*  |  ~34304        |  33.50
+ *      2   TiB*  |  ~68608        |  67.00
+ *
+ *     *TiB = 1024 GiB binary. For decimal TB reduce by ~7% (×0.93).
+ *
+ *   Duplicate ratio scaling:
+ *     Effective hash RAM ≈ table_value * R, where R = unique_sectors / total_sectors.
+ *     Example: 500 GiB @512 B, R=0.4 ⇒ ~16750 MiB * 0.4 ≈ 6700 MiB (~6.54 GiB).
+ *
+ *   Quick rule of thumb (hash only):
+ *     hash_bytes_per_GiB ≈ 16 * (2^30 / sector_size) ≈ (17.1799e9 / sector_size) bytes
+ *       → ≈ 33.6 MiB (512 B), 8.4 MiB (2048 B), 4.2 MiB (4096 B) per GiB unique.
+ *
+ *   Memory planning tip:
+ *     If projected hash_map usage risks exceeding available RAM, consider:
+ *       - Increasing table_shift (reduces simultaneous secondary loads / contention)
+ *       - Lowering data_shift (if practical) to encourage earlier big DDT adoption with fewer unique blocks
+ *       - Segmenting the dump into phases (if workflow permits)
+ *       - Accepting higher duplicate ratio by pre-zero detection or sparse treatment externally.
+ *       - Resuming the dump in multiple passes: each resume rebuilds the hash_map from scratch, so peak RAM still
+ *         matches a single-pass estimate, but average RAM over total wall time can drop if you unload between passes.
+ *
+ *   NOTE: DDT in-RAM portion (primary + one secondary) usually adds only a few additional MiB even for very large
+ * images, hence omitted from sizing tables. Include +5% safety margin if extremely tight on memory.
+ *
+ *  Guidance for table_shift / data_shift selection:
+ *   Let:
+ *     S = total logical sectors expected in image (estimate if unknown).
+ *     T = table_shift (items per primary DDT entry = 2^T when multi-level; 0 => single-level).
+ *     D = data_shift (in-block sector offset span = 2^D).
+ *     BA = block_alignment (bytes) = 2^block_alignment.
+ *     SS = sector size (bytes).
+ *
+ *   1. data_shift constraints:
+ *      - For SMALL DDT entries (12 payload bits after status): D must satisfy 0 < D < 12 and (12 - D) >= 1 so that at
+ *        least one bit remains for block index. Practical range for small DDT: 6..10 (leaves 2+ bits for block index).
+ *      - For BIG DDT entries (28 payload bits after status): D may be larger (up to 27) but values >16 rarely useful.
+ *      - Effective address granularity inside a block = min(2^D * SS, physical block span implied by BA).
+ *      - Choosing D too large wastes bits (larger offset range than block actually contains) and reduces the number of
+ *        block index bits within a small entry, potentially forcing upgrade to big DDT earlier.
+ *
+ *      Recommended starting points:
+ *        * 512‑byte sectors, 512‑byte block alignment: D=9 (512 offsets) or D=8 (256 offsets) keeps small DDT viable.
+ *        * 2048‑byte optical sectors, 2048‑byte alignment: D=8 (256 offsets) typically sufficient.
+ *        * Mixed / large logical block sizes: keep D so that (2^D * SS) ≈ typical dedup block region you want
+ * addressable.
+ *
+ *   2. block capacity within an entry:
+ *      - SMALL DDT: usable block index bits = 12 - D.
+ *        Max representable block index (small) = 2^(12-D) - 1.
+ *      - BIG DDT: usable block index bits = 28 - D.
+ *        Max representable block index (big)   = 2^(28-D) - 1.
+ *      - If (requiredBlockIndex > max) you must either reduce D or rely on big DDT.
+ *
+ *      Approximate requiredBlockIndex ≈ (TotalUniqueBlocks) where
+ *        TotalUniqueBlocks ≈ (S * SS) / (BA * (2^D * SS / (SS))) = S / (2^D * (BA / SS))
+ *        Simplified (assuming BA = SS): TotalUniqueBlocks ≈ S / 2^D.
+ *
+ *   3. table_shift considerations (multi-level DDT):
+ *      - Primary entries count ≈ ceil(S / 2^T). Choose T so this count fits memory and keeps lookup fast.
+ *      - Larger T reduces primary table size, increasing secondary table dereferences.
+ *      - Typical balanced values: T in [8..12] (256..4096 sectors per primary entry).
+ *      - Set T=0 for single-level when S is small enough that all entries fit comfortably in memory.
+ *
+ *      Memory rough estimate for single-level SMALL DDT:
+ *        bytes ≈ S * 2  (each small entry 2 bytes). For BIG DDT: bytes ≈ S * 4.
+ *      Multi-level: primary table bytes ≈ (S / 2^T) * entrySize + sum(secondary tables).
+ *
+ *   4. Example scenarios:
+ *      - 50M sectors (≈25 GiB @512B), want small DDT: pick D=8 (256); block index bits=4 (max 16 blocks) insufficient.
+ *        Need either D=6 (1024 block indices) or accept BIG DDT (28-8=20 bits => million+ blocks). So prefer BIG DDT
+ * here.
+ *      - 2M sectors, 2048B alignment, optical: D=8 gives S/2^D ≈ 7812 unique offsets; small DDT block index bits=4 (max
+ * 16) inadequate → choose D=6 (offset span 64 sectors) giving 6 block index bits (max 64) or just use big DDT.
+ *
+ *   5. Practical recommendations:
+ *      - If unsure and image > ~1M sectors: keep defaults (data_shift=12, table_shift=9) and allow big DDT.
+ *      - For small archival (<100k sectors): T=0 (single-level), D≈8..10 to keep small DDT feasible.
+ *      - Benchmark before lowering D purely to stay in small DDT; increased secondary lookups or larger primary tables
+ * can offset saved space.
+ *
+ *   Recommended presets (approximate bands):
+ *     +----------------------+----------------------+---------------------------+-------------------------------+
+ *     | Total logical sectors | table_shift (T)      | data_shift (D)            | Notes                         |
+ *     +----------------------+----------------------+---------------------------+-------------------------------+
+ *     |   <   50,000          | 0                    | 8 – 10                    | Single-level small DDT likely |
+ *     | 50K –   1,000,000     | 8 – 9                | 9 – 10                    | Still feasible small DDT      |
+ *     | 1M  –  10,000,000     | 9 – 10               | 10 – 12                   | Borderline small -> big DDT   |
+ *     | 10M – 100,000,000     | 10 – 11              | 11 – 12                   | Prefer big DDT; tune T for mem|
+ *     |   > 100,000,000       | 11 – 12              | 12                        | Big DDT; higher T saves memory|
+ *     +----------------------+----------------------+---------------------------+-------------------------------+
+ *     Ranges show typical stable regions; pick the lower end of table_shift if memory is ample, higher if minimizing
+ *     primary table size. Always validate actual unique block count vs payload bits.
+ *
+ *   NOTE: The library will automatically fall back to BIG DDT where needed; these settings bias structure, they do not
+ *         guarantee small DDT retention.
+ *
+ *  Thread-safety: aaru_options is a plain POD struct; caller may copy freely. parse_options() returns by value.
+ *
+ *  Future compatibility: unknown keys are ignored by current parser; consumers should preserve original option
+ *  strings if round-tripping is required.
+ */
+
+/** \struct aaru_options
+ *  \brief Parsed user-specified tunables controlling compression, deduplication, hashing and DDT geometry.
+ *
+ *  All shifts are exponents of two.
+ */
 typedef struct
 {
-    bool     compress;
-    bool     deduplicate;
-    uint32_t dictionary;
-    uint8_t  table_shift;
-    uint8_t  data_shift;
-    uint8_t  block_alignment;
-    bool     md5;
-    bool     sha1;
-    bool     sha256;
-    bool     blake3;
-    bool     spamsum;
+    bool     compress;     ///< Enable adaptive compression (LZMA for data blocks, FLAC for audio). Default: true.
+    bool     deduplicate;  ///< Storage dedup flag (DDT always exists). true=share identical sector content, false=store
+                           ///< each instance.
+    uint32_t dictionary;   ///< LZMA dictionary size in bytes (>= 4096 recommended). Default: 33554432 (32 MiB).
+    uint8_t  table_shift;  ///< DDT table shift (multi-level fan-out exponent). Default: 9.
+    uint8_t  data_shift;   ///< Global data shift: low bits encode sector offset inside a block (2^data_shift span).
+    uint8_t  block_alignment;  ///< log2 underlying block alignment (2^n bytes). Default: 9 (512 bytes).
+    bool     md5;              ///< Generate MD5 checksum (ChecksumAlgorithm::Md5) when finalizing image.
+    bool     sha1;             ///< Generate SHA-1 checksum (ChecksumAlgorithm::Sha1) when finalizing image.
+    bool     sha256;           ///< Generate SHA-256 checksum (ChecksumAlgorithm::Sha256) when finalizing image.
+    bool     blake3;           ///< Generate BLAKE3 checksum if supported (not stored if algorithm unavailable).
+    bool     spamsum;          ///< Generate SpamSum fuzzy hash (ChecksumAlgorithm::SpamSum) if enabled.
 } aaru_options;

 #endif  // LIBAARUFORMAT_OPTIONS_H