Enhance documentation for various structures with detailed descriptions and formatting improvements

This commit is contained in:
2025-10-01 05:35:39 +01:00
parent 1f91ad1e08
commit 41aee42c53
16 changed files with 1935 additions and 1273 deletions

View File

@@ -19,29 +19,80 @@
#ifndef LIBAARUFORMAT_CHECKSUM_H
#define LIBAARUFORMAT_CHECKSUM_H
#include <stdint.h> // Fixed-width integer types for on-disk structures.
#pragma pack(push, 1)
/**
* Checksum block, contains a checksum of all user data sectors (except for optical discs that is 2352 uint8_ts raw
* sector if available
* */
typedef struct ChecksumHeader {
/**Identifier, <see cref="BlockType.ChecksumBlock" /> */
uint32_t identifier;
/**Length in uint8_ts of the block */
uint32_t length;
/**How many checksums follow */
uint8_t entries;
* \file aaruformat/structs/checksum.h
* \brief On-disk layout definitions for the checksum block (BlockType::ChecksumBlock).
*
* A checksum block stores one or more whole-image (user data) checksums. For optical media the
* user data definition follows the format's raw sector rules (e.g. 2352-byte raw sector when available).
*
* Binary layout (all integers are little-endian, structure is packed):
*
* +------------------------------+-------------------------------+
* | Field | Size (bytes) |
* +==============================+===============================+
* | ChecksumHeader | sizeof(ChecksumHeader)=9 |
* | identifier | 4 (BlockType::ChecksumBlock) |
* | length | 4 (payload bytes that follow)|
* | entries | 1 (number of checksum entries)|
* +------------------------------+-------------------------------+
* | Repeated for each entry: |
* | ChecksumEntry | sizeof(ChecksumEntry)=5 |
* | type | 1 (ChecksumAlgorithm) |
* | length | 4 (digest length) |
* | digest bytes | length |
* +------------------------------+-------------------------------+
*
* Thus, the payload size (ChecksumHeader.length) MUST equal the sum over all entries of:
* sizeof(ChecksumEntry) + entry.length.
*
* Typical digest lengths:
* - Md5: 16 bytes
* - Sha1: 20 bytes
* - Sha256: 32 bytes
* - SpamSum: variable length ASCII, NOT null-terminated on disk (a terminating '\0' may be appended in memory).
*
* \warning The structures are packed; never rely on host compiler default padding or directly casting from a buffer
* without ensuring correct endianness if porting to big-endian systems (current implementation assumes LE).
*
* \see BlockType
* \see ChecksumAlgorithm
*/
/**
* \struct ChecksumHeader
* \brief Header that precedes the sequence of checksum entries for a checksum block.
*
* After this header, exactly \ref ChecksumHeader::length bytes follow containing \ref ChecksumHeader::entries
* consecutive \ref ChecksumEntry records, each immediately followed by its digest payload.
*/
typedef struct ChecksumHeader
{
uint32_t identifier; ///< Block identifier, must be BlockType::ChecksumBlock.
uint32_t length; ///< Length in bytes of the payload (all entries + their digest data, excluding this header).
uint8_t entries; ///< Number of checksum entries that follow in the payload.
} ChecksumHeader;
/**Checksum entry, followed by checksum data itself */
typedef struct ChecksumEntry {
/**Checksum algorithm */
uint8_t type;
/**Length in uint8_ts of checksum that follows this structure */
uint32_t length;
/**
* \struct ChecksumEntry
* \brief Per-checksum metadata immediately followed by the digest / signature bytes.
*
* For fixed-length algorithms the \ref length MUST match the known digest size. For SpamSum it is variable.
* The bytes immediately following this structure (not null-terminated) constitute the digest and are exactly
* \ref length bytes long.
*
* Order of entries is not mandated; readers should scan all entries and match by \ref type.
*/
typedef struct ChecksumEntry
{
uint8_t type; ///< Algorithm used (value from \ref ChecksumAlgorithm).
uint32_t length; ///< Length in bytes of the digest that immediately follows this structure.
} ChecksumEntry;
#pragma pack(pop)
#endif //LIBAARUFORMAT_CHECKSUM_H
#endif // LIBAARUFORMAT_CHECKSUM_H

View File

@@ -19,37 +19,82 @@
#ifndef LIBAARUFORMAT_DATA_H
#define LIBAARUFORMAT_DATA_H
#include <stdint.h> // Fixed width integer types used in on-disk packed structs.
#pragma pack(push, 1)
/**Block header, precedes block data */
typedef struct BlockHeader {
/**Identifier, <see cref="BlockType.DataBlock" /> */
uint32_t identifier;
/**Type of data contained by this block */
uint16_t type;
/**Compression algorithm used to compress the block */
uint16_t compression;
/**Size in uint8_ts of each sector contained in this block */
uint32_t sectorSize;
/**Compressed length for the block */
uint32_t cmpLength;
/**Uncompressed length for the block */
uint32_t length;
/**CRC64-ECMA of the compressed block */
uint64_t cmpCrc64;
/**CRC64-ECMA of the uncompressed block */
uint64_t crc64;
/**
* \file aaruformat/structs/data.h
* \brief On-disk layout structures for data-bearing and geometry blocks.
*
* These packed structures describe the headers that precede variable-length payloads
* inside blocks whose identifiers are enumerated in \ref BlockType.
* All integer fields are stored little-endian on disk. The library currently assumes a
* little-endian host; if ported to a big-endian architecture explicit byte swapping will be required.
*
* Layout of a data block (BlockType::DataBlock):
* BlockHeader (sizeof(BlockHeader) bytes)
* Compressed payload (cmpLength bytes)
*
* Payload decoding:
* - Apply the algorithm indicated by \ref BlockHeader::compression (\ref CompressionType) to the
* cmpLength bytes following the header to obtain exactly \ref BlockHeader::length bytes.
* - The uncompressed data MUST be an integer multiple of \ref BlockHeader::sectorSize.
* - A CRC64-ECMA is provided for both compressed (cmpCrc64) and uncompressed (crc64) forms to allow
* validation at either stage of the pipeline.
*
* Geometry block (BlockType::GeometryBlock) has a \ref GeometryBlockHeader followed by no additional
* fixed payload in the current format version; it conveys legacy CHS-style logical geometry metadata.
*
* \warning These structs are packed; do not take their address and assume natural alignment.
* \see BlockType
* \see DataType
* \see CompressionType
*/
/**
* \struct BlockHeader
* \brief Header preceding the compressed data payload of a data block (BlockType::DataBlock).
*
* Invariants:
* - cmpLength > 0 unless length == 0 (empty block)
* - length == 0 implies cmpLength == 0
* - If compression == CompressionType::None then cmpLength == length
* - length % sectorSize == 0
*
* Validation strategy (recommended for readers):
* 1. Verify identifier == BlockType::DataBlock.
* 2. Verify sectorSize is non-zero and a power-of-two or a commonly used size (512/1024/2048/4096/2352).
* 3. Verify invariants above and CRCs after (de)compression.
*/
typedef struct BlockHeader
{
uint32_t identifier; ///< Block identifier, must be BlockType::DataBlock.
uint16_t type; ///< Logical data classification (value from \ref DataType).
uint16_t compression; ///< Compression algorithm used (value from \ref CompressionType).
uint32_t sectorSize; ///< Size in bytes of each logical sector represented in this block.
uint32_t cmpLength; ///< Size in bytes of the compressed payload immediately following this header.
uint32_t length; ///< Size in bytes of the uncompressed payload resulting after decompression.
uint64_t cmpCrc64; ///< CRC64-ECMA of the compressed payload (cmpLength bytes).
uint64_t crc64; ///< CRC64-ECMA of the uncompressed payload (length bytes).
} BlockHeader;
/**Geometry block, contains physical geometry information */
typedef struct GeometryBlockHeader {
/**Identifier, <see cref="BlockType.GeometryBlock" /> */
uint32_t identifier;
uint32_t cylinders;
uint32_t heads;
uint32_t sectorsPerTrack;
/**
* \struct GeometryBlockHeader
* \brief Legacy CHS style logical geometry metadata (BlockType::GeometryBlock).
*
* Total logical sectors implied by this header is cylinders * heads * sectorsPerTrack.
* Sector size is not included here and must be derived from context (e.g., accompanying metadata
* or defaulting to 512 for many block devices).
*/
typedef struct GeometryBlockHeader
{
uint32_t identifier; ///< Block identifier, must be BlockType::GeometryBlock.
uint32_t cylinders; ///< Number of cylinders.
uint32_t heads; ///< Number of heads (tracks per cylinder).
uint32_t sectorsPerTrack; ///< Number of sectors per track.
} GeometryBlockHeader;
#pragma pack(pop)
#endif //LIBAARUFORMAT_DATA_H
#endif // LIBAARUFORMAT_DATA_H

View File

@@ -19,71 +19,149 @@
#ifndef LIBAARUFORMAT_DDT_H
#define LIBAARUFORMAT_DDT_H
#include <stdint.h> // fixed-width types for on-disk layout
#pragma pack(push, 1)
/**Header for a deduplication table. Table follows it */
/** \file aaruformat/structs/ddt.h
* \brief On-disk headers for Deduplication Data Tables (DDT) versions 1 and 2.
*
* A DDT maps logical sector indices (LBAs within an image's logical address space) to (block, sector)
* pairs plus a base file offset, enabling content de-duplication inside the container. Two generations
* exist:
* - DdtHeader ("version 1") flat table.
* - DdtHeader2 ("version 2") hierarchical, multi-level subtables for scalability.
*
* All integers are little-endian. Structures are packed (1-byte alignment). When porting to a big-endian
* architecture callers must perform byte swapping. Do not rely on compiler-introduced padding.
*
* Compression of the table body (entries array) follows the same conventions as data blocks: first
* decompress according to the compression enum, then validate CRC64 for uncompressed contents.
*
* Related enumerations:
* - BlockType::DeDuplicationTable / BlockType::DeDuplicationTable2
* - CompressionType
* - DataType
* - DdtSizeType (for DdtHeader2::sizeType)
*/
/**
* \struct DdtHeader
* \brief Header preceding a version 1 (flat) deduplication table body.
*
* Immediately after this header there are \ref entries table records (compressed if \ref compression != None).
* Each table record encodes a pointer using an 8-bit file offset component and a sector offset inside a block:
* logicalEntryValue = ((uint64_t)fileByteOffset << shift) + sectorOffsetWithinBlock
* where fileByteOffset is measured in bytes (granularity depends on shift) and sectorOffsetWithinBlock is
* relative to the start of the referenced data block. The sector size must be taken from the corresponding
* data block(s) (see BlockHeader::sectorSize) or higher-level metadata.
*
* Invariants:
* - cmpLength == length if compression == CompressionType::None
* - length % (entrySize) == 0 after decompression (implementation-defined entry size)
* - entries * entrySize == length
* - entries > 0 implies length > 0
*/
typedef struct DdtHeader
{
/**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
uint32_t identifier;
/**Type of data pointed by this DDT */
uint16_t type;
/**Compression algorithm used to compress the DDT */
uint16_t compression;
/**Each entry is ((uint8_t offset in file) &lt;&lt; shift) + (sector offset in block) */
uint8_t shift;
/**How many entries are in the table */
uint64_t entries;
/**Compressed length for the DDT */
uint64_t cmpLength;
/**Uncompressed length for the DDT */
uint64_t length;
/**CRC64-ECMA of the compressed DDT */
uint64_t cmpCrc64;
/**CRC64-ECMA of the uncompressed DDT */
uint64_t crc64;
uint32_t identifier; ///< Block identifier, must be BlockType::DeDuplicationTable.
uint16_t type; ///< Data classification (\ref DataType) for sectors referenced by this table.
uint16_t compression; ///< Compression algorithm for the table body (\ref CompressionType).
uint8_t shift; ///< Left shift applied to per-entry file offset component forming logicalEntryValue.
uint64_t entries; ///< Number of deduplication entries contained in (uncompressed) table.
uint64_t cmpLength; ///< Size in bytes of compressed entries payload.
uint64_t length; ///< Size in bytes of uncompressed entries payload.
uint64_t cmpCrc64; ///< CRC64-ECMA of the compressed payload.
uint64_t crc64; ///< CRC64-ECMA of the uncompressed payload.
} DdtHeader;
/**
* \struct DdtHeader2
* \brief Header preceding a version 2 hierarchical deduplication table.
*
* Version 2 introduces multi-level tables to efficiently address very large images by subdividing
* the logical address space. Tables at higher levels partition regions; leaves contain direct
* (block, sector) entry mappings. Navigation uses \ref tableLevel (0 = root) and \ref levels (total depth).
*
* Logical sector (LBA) mapping (actual implementation in decode_ddt_{single,multi}_level_v2):
* 1. Let L be the requested logical sector (can be negative externally). Internal index I = L + negative.
* Valid range: 0 <= I < blocks. (Total user-data sectors often = blocks - negative - overflow.)
* 2. If tableShift == 0 (single-level): entryIndex = I.
* Else (multi-level):
* itemsPerPrimaryEntry = 1 << tableShift
* primaryIndex = I / itemsPerPrimaryEntry
* secondaryIndex = I % itemsPerPrimaryEntry
* The primary table entry at primaryIndex yields a secondary DDT file offset (scaled by 2^blockAlignmentShift),
* whose table entries are then indexed by secondaryIndex.
* 3. Read raw DDT entry value E (16-bit if sizeType == SmallDdtSizeType, 32-bit if BigDdtSizeType).
* 4. If E == 0: sector_status = SectorStatusNotDumped; offset=block_offset=0.
* Otherwise extract:
* statusBits = E >> 12 (small) or E >> 28 (big)
* baseBits = E & 0x0FFF (small) or E & 0x0FFFFFFF (big)
* sectorOffsetWithinBlock = baseBits & ((1 << dataShift) - 1)
* blockIndex = baseBits >> dataShift
* block_offset (bytes) = blockIndex << blockAlignmentShift
* offset (sector units inside block) = sectorOffsetWithinBlock
* 5. The consumer combines block_offset, offset, and the (external) logical sector size to locate data.
*
* Field roles:
* - negative: Count of leading negative LBAs supported; added to L to form internal index.
* - overflow: Count of trailing LBAs beyond the user area upper bound that are still dumped and have
* normal DDT entries (e.g. optical disc lead-out). Symmetrical to 'negative' on the high end.
* - start: For secondary tables, base internal index covered (written when creating new tables). Current decoding
* logic does not consult this field (future-proof placeholder).
* - blockAlignmentShift: log2 alignment of stored data blocks (byte granularity of block_offset).
* - dataShift: log2 of the number of addressable sectors per increment of blockIndex bitfield unit.
* - tableShift: log2 of number of logical sectors covered by a single primary-table pointer (multi-level only).
* - sizeType: Selects entry width (small=16b, big=32b) impacting available bits for blockIndex+offset.
*
* Notes & current limitations:
* - User area sector count = blocks - negative - overflow.
* - Valid external LBA range exposed by the image = [-negative, (blocks - negative - 1)].
* * Negative range: [-negative, -1]
* * User area range: [0, (blocks - negative - overflow - 1)]
* * Overflow range: [(blocks - negative - overflow), (blocks - negative - 1)]
* - Both negative and overflow ranges are stored with normal DDT entries (if present), enabling complete
* reproduction of lead-in / lead-out or similar padding regions.
* - start is presently ignored during decoding; integrity checks against it may be added in future revisions.
* - No masking is applied to I besides array bounds; callers must ensure L is within representable range.
*
* Example (Compact Disc):
* Disc has 360000 user sectors. Lead-in captured as 15000 negative sectors and lead-out as 15000 overflow sectors.
* negative = 15000
* overflow = 15000
* user sectors = 360000
* blocks (internal span) = negative + user + overflow = 390000
* External LBA spans: -15000 .. 374999
* * Negative: -15000 .. -1 (15000 sectors)
* * User: 0 .. 359999 (360000 sectors)
* * Overflow: 360000 .. 374999 (15000 sectors)
* Internal index I for any external L is I = L + negative.
* User area sector count reported to callers (ctx->imageInfo.Sectors) = blocks - negative - overflow = 360000.
*/
typedef struct DdtHeader2
{
/**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
uint32_t identifier;
/**Type of data pointed by this DDT */
uint16_t type;
/**Compression algorithm used to compress the DDT */
uint16_t compression;
/**How many levels of subtables are present */
uint8_t levels;
/**Which level this table belongs to */
uint8_t tableLevel;
/**Pointer to absolute byte offset in file where the previous level table is located */
uint64_t previousLevelOffset;
/**Negative displacement of LBAs */
uint16_t negative;
/**Number of blocks in media */
uint64_t blocks;
/**Positive overflow displacement of LBAs */
uint16_t overflow;
/**First LBA contained in this table */
uint64_t start;
/**Block alignment boundaries */
uint8_t blockAlignmentShift;
/**Data shift */
uint8_t dataShift;
/**Table shift */
uint8_t tableShift;
/**Size type */
uint8_t sizeType;
/**Entries in this table */
uint64_t entries;
/**Compressed length for the DDT */
uint64_t cmpLength;
/**Uncompressed length for the DDT */
uint64_t length;
/**CRC64-ECMA of the compressed DDT */
uint64_t cmpCrc64;
/**CRC64-ECMA of the uncompressed DDT */
uint64_t crc64;
uint32_t identifier; ///< Block identifier, must be BlockType::DeDuplicationTable2.
uint16_t type; ///< Data classification (\ref DataType) for sectors referenced by this table.
uint16_t compression; ///< Compression algorithm for this table body (\ref CompressionType).
uint8_t levels; ///< Total number of hierarchy levels (root depth); > 0.
uint8_t tableLevel; ///< Zero-based level index of this table (0 = root, increases downward).
uint64_t previousLevelOffset; ///< Absolute byte offset of the parent (previous) level table; 0 if root.
uint16_t negative; ///< Leading negative LBA count; added to external L to build internal index.
uint64_t blocks; ///< Total internal span (negative + usable + overflow) in logical sectors.
uint16_t overflow; ///< Trailing dumped sectors beyond user area (overflow range), still mapped with entries.
uint64_t
start; ///< Base internal index covered by this table (used for secondary tables; currently informational).
uint8_t blockAlignmentShift; ///< 2^blockAlignmentShift = block alignment boundary in bytes.
uint8_t dataShift; ///< 2^dataShift = sectors represented per increment in blockIndex field.
uint8_t tableShift; ///< 2^tableShift = number of logical sectors per primary entry (multi-level only; 0 for
///< single-level or secondary tables).
uint8_t sizeType; ///< Entry size variant (\ref DdtSizeType) controlling width of E.
uint64_t entries; ///< Number of entries contained in (uncompressed) table payload.
uint64_t cmpLength; ///< Compressed payload size in bytes.
uint64_t length; ///< Uncompressed payload size in bytes.
uint64_t cmpCrc64; ///< CRC64-ECMA of compressed table payload.
uint64_t crc64; ///< CRC64-ECMA of uncompressed table payload.
} DdtHeader2;
#pragma pack(pop)

View File

@@ -19,42 +19,109 @@
#ifndef LIBAARUFORMAT_DUMP_H
#define LIBAARUFORMAT_DUMP_H
#include <stdint.h> /* Fixed-width integer types for ondisk packed structures */
#pragma pack(push, 1)
/**Dump hardware block, contains a list of hardware used to dump the media on this image */
typedef struct DumpHardwareHeader {
/**Identifier, <see cref="BlockType.DumpHardwareBlock" /> */
uint32_t identifier;
/**How many entries follow this header */
uint16_t entries;
/**Size of the whole block, not including this header, in uint8_ts */
uint32_t length;
/**CRC64-ECMA of the block */
uint64_t crc64;
/** \file aaruformat/structs/dump.h
* \brief Packed on-disk structures describing hardware and software used during image acquisition.
*
* A Dump Hardware block (identifier = BlockType::DumpHardwareBlock) records one or more dump "environments"
* typically combinations of a physical device (drive, controller, adapter) and the software stack that
* performed the read operation. Each environment is represented by a \ref DumpHardwareEntry followed by a
* sequence of UTF8 strings and an optional array of extent ranges (\ref DumpExtent, defined in context.h) that
* delimit portions of the medium this environment contributed to.
*
* Binary layout (little-endian, packed, all multi-byte integers LE):
*
* DumpHardwareHeader (sizeof = 16 bytes)
* identifier (4) -> BlockType::DumpHardwareBlock
* entries (2) -> number of following hardware entries
* length (4) -> total bytes of payload that follow this header
* crc64 (8) -> CRC64-ECMA of the payload bytes
*
* Repeated for i in [0, entries):
* DumpHardwareEntry (36 bytes)
* manufacturerLength (4)
* modelLength (4)
* revisionLength (4)
* firmwareLength (4)
* serialLength (4)
* softwareNameLength (4)
* softwareVersionLength (4)
* softwareOperatingSystemLength (4)
* extents (4) -> number of DumpExtent structs after the strings
*
* Variable-length UTF-8 strings (not NUL-terminated on disk) appear immediately after the entry, in the
* exact order of the length fields above; each string is present only if its length > 0. The reader allocates
* an extra byte to append '\0' for in-memory convenience.
*
* Array of 'extents' DumpExtent structures (each 16 bytes: start, end) follows the strings if extents > 0.
* The semantic of each extent is an inclusive [start, end] logical sector (or unit) range contributed by
* this hardware/software combination.
*
* CRC semantics:
* - crc64 covers exactly 'length' bytes immediately following the header.
* - For legacy images with header.imageMajorVersion <= AARUF_VERSION_V1 the original C# writer produced a
* byte-swapped CRC; the library compensates internally (see process_dumphw_block()).
*
* Invariants / validation recommendations:
* - identifier == BlockType::DumpHardwareBlock
* - Accumulated size of all (entry + strings + extents arrays) == length
* - All length fields are trusted only after bounds checking against remaining payload bytes
* - Strings are raw UTF-8 data with no implicit terminator
* - extents * sizeof(DumpExtent) fits inside remaining payload
*
* Memory management notes (runtime library):
* - Each string is malloc'ed with +1 byte for terminator during processing.
* - Extents array is malloc'ed per entry when extents > 0.
* - See aaruformatContext::dumpHardwareEntriesWithData for owning pointers.
*
* \warning Structures are packed; never rely on natural alignment when mapping from a byte buffer.
* \see DumpHardwareHeader
* \see DumpHardwareEntry
* \see DumpExtent (in context.h)
* \see BlockType
*/
/** \struct DumpHardwareHeader
* \brief Header that precedes a sequence of dump hardware entries and their variable-length payload.
*/
typedef struct DumpHardwareHeader
{
uint32_t identifier; ///< Block identifier, must be BlockType::DumpHardwareBlock.
uint16_t entries; ///< Number of DumpHardwareEntry records that follow.
uint32_t length; ///< Total payload bytes after this header (sum of entries, strings, and extents arrays).
uint64_t crc64; ///< CRC64-ECMA of the payload (byte-swapped for legacy v1 images, handled automatically).
} DumpHardwareHeader;
/**Dump hardware entry, contains length of strings that follow, in the same order as the length, this structure */
typedef struct DumpHardwareEntry {
/**Length of UTF-8 manufacturer string */
uint32_t manufacturerLength;
/**Length of UTF-8 model string */
uint32_t modelLength;
/**Length of UTF-8 revision string */
uint32_t revisionLength;
/**Length of UTF-8 firmware version string */
uint32_t firmwareLength;
/**Length of UTF-8 serial string */
uint32_t serialLength;
/**Length of UTF-8 software name string */
uint32_t softwareNameLength;
/**Length of UTF-8 software version string */
uint32_t softwareVersionLength;
/**Length of UTF-8 software operating system string */
uint32_t softwareOperatingSystemLength;
/**How many extents are after the strings */
uint32_t extents;
/** \struct DumpHardwareEntry
* \brief Per-environment length table describing subsequent UTF-8 strings and optional extent array.
*
* Immediately after this structure the variable-length UTF8 strings appear in the documented order, each
* present only if its corresponding length is non-zero. No padding is present between strings. When all
* strings are consumed, an array of \ref DumpExtent follows if \ref extents > 0.
*
* All length fields measure bytes (not characters) and exclude any in-memory NUL terminator added by the reader.
*
* Typical semantics:
* - manufacturer/model/revision/firmware/serial identify the hardware device.
* - softwareName/softwareVersion/softwareOperatingSystem identify the acquisition software environment.
* - extents list which logical ranges this environment actually dumped (useful for multi-device composites).
*/
typedef struct DumpHardwareEntry
{
uint32_t manufacturerLength; ///< Length in bytes of manufacturer UTF-8 string.
uint32_t modelLength; ///< Length in bytes of model UTF-8 string.
uint32_t revisionLength; ///< Length in bytes of revision / hardware revision string.
uint32_t firmwareLength; ///< Length in bytes of firmware version string.
uint32_t serialLength; ///< Length in bytes of device serial number string.
uint32_t softwareNameLength; ///< Length in bytes of dumping software name string.
uint32_t softwareVersionLength; ///< Length in bytes of dumping software version string.
uint32_t softwareOperatingSystemLength; ///< Length in bytes of host operating system string.
uint32_t extents; ///< Number of DumpExtent records following the strings (0 = none).
} DumpHardwareEntry;
#pragma pack(pop)
#endif //LIBAARUFORMAT_DUMP_H
#endif // LIBAARUFORMAT_DUMP_H

View File

@@ -19,73 +19,111 @@
#ifndef LIBAARUFORMAT_HEADER_H
#define LIBAARUFORMAT_HEADER_H
#define AARU_HEADER_APP_NAME_LEN 64
#define GUID_SIZE 16
/** \file aaruformat/structs/header.h
* \brief On-disk container header structures (v1 and v2) for Aaru images.
*
* These packed headers appear at the very beginning (offset 0) of every Aaru image file and
* advertise container format version, creator application, indexing offset and optional extended
* feature capability bitfields (v2+). All multi-byte integers are little-endian. Strings stored
* in the fixed-size application field are UTF16LE and zero padded (not necessarily NUL-terminated
* if fully filled). The GUID field (v2) allows derivative / child images to reference an origin.
*
* Version progression:
* - v1: \ref AaruHeader (no GUID, no alignment or shift metadata, no feature bitfields).
* - v2: \ref AaruHeaderV2 introduces GUID, block/data/table shift hints (mirroring DDT metadata),
* and three 64bit feature bitmaps to negotiate reader/writer compatibility.
*
* Compatibility handling (recommended logic for consumers):
* 1. If any bit set in featureIncompatible is not implemented by the reader: abort (cannot safely read/write).
* 2. Else if any bit set in featureCompatibleRo is not implemented: allow readonly operations.
* 3. Bits only present in featureCompatible but not implemented MAY be ignored for both read/write while
* still preserving roundtrip capability (writer should not clear unknown bits when resaving).
*
* Alignment & shift semantics (duplicated here for quick reference, see DdtHeader2 for full details):
* - blockAlignmentShift: underlying blocks are aligned to 2^blockAlignmentShift bytes.
* - dataShift: data pointer / DDT entry low bits encode offsets modulo 2^dataShift sectors/items.
* - tableShift: primary DDT entries span 2^tableShift logical sectors (0 implies single-level tables).
*
* Invariants:
* - identifier == AARU_MAGIC (external constant; not defined here).
* - For v1: sizeof(AaruHeader) exact and indexOffset > 0 (indexOffset == 0 => corrupt/unreadable image).
* - For v2: sizeof(AaruHeaderV2) exact; indexOffset > 0; blockAlignmentShift, dataShift, tableShift within
* sane bounds (e.g. < 63). Zero is permissible only for the shift fields (not for indexOffset).
*
* Security / robustness considerations:
* - Always bounds-check indexOffset against file size before seeking.
* - Treat application field as untrusted UTF16LE; validate surrogate pairs if necessary.
* - Unknown feature bits MUST be preserved if a file is rewritten to avoid capability loss.
*/
#define AARU_HEADER_APP_NAME_LEN 64 /**< Size in bytes (UTF-16LE) of application name field (32 UTF-16 code units). */
#define GUID_SIZE 16 /**< Size in bytes of GUID / UUID-like binary identifier. */
#pragma pack(push, 1)
/**Header, at start of file */
typedef struct AaruHeader {
/**Header identifier, <see cref="AARU_MAGIC" /> */
uint64_t identifier;
/**UTF-16LE name of the application that created the image */
uint8_t application[AARU_HEADER_APP_NAME_LEN];
/**Image format major version. A new major version means a possibly incompatible change of format */
uint8_t imageMajorVersion;
/**Image format minor version. A new minor version indicates a compatible change of format */
uint8_t imageMinorVersion;
/**Major version of the application that created the image */
uint8_t applicationMajorVersion;
/**Minor version of the application that created the image */
uint8_t applicationMinorVersion;
/**Type of media contained on image */
uint32_t mediaType;
/**Offset to index */
uint64_t indexOffset;
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
int64_t creationTime;
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
int64_t lastWrittenTime;
/** \struct AaruHeader
* \brief Version 1 container header placed at offset 0 for legacy / initial format.
*
* Field summary:
* - identifier: magic signature (AARU_MAGIC) identifying the container.
* - application: UTF16LE creator application name (fixed 64 bytes, zero padded).
* - imageMajorVersion / imageMinorVersion: container format version of the file itself (not the app).
* - applicationMajorVersion / applicationMinorVersion: version of the creating application.
* - mediaType: media type enumeration (\ref MediaType).
* - indexOffset: byte offset to the first index block (must be > 0).
* - creationTime / lastWrittenTime: 64-bit Windows FILETIME timestamps (100 ns intervals since 1601-01-01 UTC).
*/
typedef struct AaruHeader
{
uint64_t identifier; ///< File magic (AARU_MAGIC).
uint8_t application[AARU_HEADER_APP_NAME_LEN]; ///< UTF-16LE creator application name (fixed-size buffer).
uint8_t imageMajorVersion; ///< Container format major version (incompatible changes when incremented).
uint8_t imageMinorVersion; ///< Container format minor version (backward compatible evolutions).
uint8_t applicationMajorVersion; ///< Creator application major version.
uint8_t applicationMinorVersion; ///< Creator application minor / patch version.
uint32_t mediaType; ///< Media type enumeration (value from \ref MediaType).
uint64_t indexOffset; ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
int64_t creationTime; ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
int64_t lastWrittenTime; ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
} AaruHeader;
/**Header, at start of file */
typedef struct AaruHeaderV2 {
/**Header identifier, see AARU_MAGIC */
uint64_t identifier;
/**UTF-16LE name of the application that created the image */
uint8_t application[AARU_HEADER_APP_NAME_LEN];
/**Image format major version. A new major version means a possibly incompatible change of format */
uint8_t imageMajorVersion;
/**Image format minor version. A new minor version indicates a compatible change of format */
uint8_t imageMinorVersion;
/**Major version of the application that created the image */
uint8_t applicationMajorVersion;
/**Minor version of the application that created the image */
uint8_t applicationMinorVersion;
/**Type of media contained on image */
uint32_t mediaType;
/**Offset to index */
uint64_t indexOffset;
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
int64_t creationTime;
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
int64_t lastWrittenTime;
/**Unique identifier that allows children images to recognize and find this image.*/
uint8_t guid[GUID_SIZE];
/**Block alignment shift. All blocks in the image are aligned at 2 << blockAlignmentShift bytes */
uint8_t blockAlignmentShift;
/**Data shift. All data blocks in the image contain 2 << dataShift items at most */
uint8_t dataShift;
/**Table shift. All deduplication tables in the image use this shift to calculate the position of an item */
uint8_t tableShift;
/**Features used in this image that if unsupported are still compatible for reading and writing implementations */
uint64_t featureCompatible;
/**Features used in this image that if unsupported are still compatible for reading implementations but not for writing */
uint64_t featureCompatibleRo;
/**Featured used in this image that if unsupported prevent reading or writing the image*/
uint64_t featureIncompatible;
/** \struct AaruHeaderV2
* \brief Version 2 container header with GUID, alignment shifts, and feature negotiation bitmaps.
*
* Additions over v1:
* - guid: stable 128-bit identifier enabling linkage by derivative images.
* - blockAlignmentShift / dataShift / tableShift: global structural hints copied into data & DDT blocks.
* - featureCompatible / featureCompatibleRo / featureIncompatible: capability bitmasks.
*
* Feature bitmask semantics:
* - featureCompatible: Optional features; absence of implementation should not impact R/W correctness.
* - featureCompatibleRo: If unimplemented, image MAY be opened read-only.
* - featureIncompatible: If any bit unimplemented, image MUST NOT be opened (prevent misinterpretation).
*
* Readers should AND their supported bit set with the header masks to decide access level (see file
* documentation). Writers must preserve unknown bits when saving an existing image.
*/
typedef struct AaruHeaderV2
{
uint64_t identifier; ///< File magic (AARU_MAGIC).
uint8_t application[AARU_HEADER_APP_NAME_LEN]; ///< UTF-16LE creator application name (fixed 64 bytes).
uint8_t imageMajorVersion; ///< Container format major version.
uint8_t imageMinorVersion; ///< Container format minor version.
uint8_t applicationMajorVersion; ///< Creator application major version.
uint8_t applicationMinorVersion; ///< Creator application minor / patch version.
uint32_t mediaType; ///< Media type enumeration (value from \ref MediaType).
uint64_t indexOffset; ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
int64_t creationTime; ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
int64_t lastWrittenTime; ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
uint8_t guid[GUID_SIZE]; ///< 128-bit image GUID (binary, not text); stable across children.
uint8_t blockAlignmentShift; ///< log2 block alignment (block size alignment = 2^blockAlignmentShift bytes).
uint8_t dataShift; ///< log2 sectors/items per block-index increment in DDT entries (2^dataShift).
uint8_t tableShift; ///< log2 sectors spanned by each primary DDT entry (0 = single-level).
uint64_t featureCompatible; ///< Feature bits: unimplemented bits are ignorable (still R/W safe).
uint64_t featureCompatibleRo; ///< Feature bits: unimplemented -> degrade to read-only access.
uint64_t featureIncompatible; ///< Feature bits: any unimplemented -> abort (cannot open safely).
} AaruHeaderV2;
#pragma pack(pop)
#endif //LIBAARUFORMAT_HEADER_H
#endif // LIBAARUFORMAT_HEADER_H

View File

@@ -21,50 +21,95 @@
#pragma pack(push, 1)
/**Header for the index, followed by entries */
/** \file aaruformat/structs/index.h
* \brief Ondisk index block header and entry structures (versions 1, 2 and 3).
*
* The index provides a directory of all blocks contained in an Aaru image. Each index block starts with
* a versioned header (IndexHeader / IndexHeader2 / IndexHeader3) followed by a contiguous array of
* fixedsize \ref IndexEntry records. Version 3 adds support for hierarchical (chained / nested) subindexes.
*
* Version mapping by block identifier (see \ref BlockType):
* - IndexBlock (v1) -> \ref IndexHeader followed by 16bit entry count entries.
* - IndexBlock2 (v2) -> \ref IndexHeader2 followed by 64bit entry count entries.
* - IndexBlock3 (v3) -> \ref IndexHeader3 with optional hierarchical subindex references.
*
* CRC coverage & endianness:
* - The crc64 field stores a CRC64-ECMA over the entries array ONLY (header bytes are excluded).
* - For images with imageMajorVersion <= AARUF_VERSION_V1 a legacy writer byte-swapped the CRC; readers
* compensate (see verify_index_v1/v2/v3). The value in the header remains whatever was originally written.
*
* Hierarchical (v3) behavior:
* - Entries whose blockType == IndexBlock3 refer to subindex blocks; readers recursively load and flatten.
* - IndexHeader3::previous can point to a preceding index segment (for append / incremental scenarios) or 0.
* - CRC of the main index does NOT cover subindex contents; each subindex has its own header + CRC.
*
* Invariants / validation recommendations:
* - identifier must equal the expected BlockType variant for that version.
* - entries > 0 implies the entries array byte size == entries * sizeof(IndexEntry).
* - crc64 must match recomputed CRC64( entries array ) (after legacy byte swap handling if required).
* - For v3, if previous != 0 it should point to another IndexBlock3 header (optional besteffort check).
*
* Notes:
* - Structures are packed (1byte alignment). All multi-byte integers are littleendian on disk.
* - The index does not store per-entry CRC; integrity relies on each individual block's own CRC plus the index CRC.
* - dataType in \ref IndexEntry is meaningful only for block types that carry typed data (e.g. DataBlock,
* DumpHardwareBlock, etc.).
*
* See also: verify_index_v1(), verify_index_v2(), verify_index_v3() for integrity procedures.
*/
/** \struct IndexHeader
* \brief Index header (version 1) for legacy images (identifier == IndexBlock).
*
* Uses a 16bit entry counter limiting the number of indexable blocks in v1.
*/
typedef struct IndexHeader
{
/**Identifier, <see cref="BlockType.Index" /> */
uint32_t identifier;
/**How many entries follow this header */
uint16_t entries;
/**CRC64-ECMA of the index */
uint64_t crc64;
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock).
uint16_t entries; ///< Number of \ref IndexEntry records that follow immediately.
uint64_t crc64; ///< CRC64-ECMA of the entries array (legacy byte-swapped for early images).
} IndexHeader;
/**Header for the index, followed by entries */
/** \struct IndexHeader2
* \brief Index header (version 2) with 64bit entry counter (identifier == IndexBlock2).
*
* Enlarges the entry count field to 64 bits for large images; otherwise structurally identical to v1.
*/
typedef struct IndexHeader2
{
/**Identifier, <see cref="BlockType.Index" /> */
uint32_t identifier;
/**How many entries follow this header */
uint64_t entries;
/**CRC64-ECMA of the index */
uint64_t crc64;
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock2).
uint64_t entries; ///< Number of \ref IndexEntry records that follow immediately.
uint64_t crc64; ///< CRC64-ECMA of the entries array (legacy byte-swapped rule still applies for old versions).
} IndexHeader2;
/**Header for the index, followed by entries */
/** \struct IndexHeader3
* \brief Index header (version 3) adding hierarchical chaining (identifier == IndexBlock3).
*
* Supports flattened hierarchical indexes: entries referencing additional IndexBlock3 subindexes.
* The 'previous' pointer allows chaining earlier index segments (e.g., incremental append) enabling
* cumulative discovery without rewriting earlier headers.
*/
typedef struct IndexHeader3
{
/**Identifier, <see cref="BlockType.Index" /> */
uint32_t identifier;
/**How many entries follow this header */
uint64_t entries;
/**CRC64-ECMA of the index */
uint64_t crc64;
/**Pointer to the previous index header */
uint64_t previous;
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock3).
uint64_t entries; ///< Number of \ref IndexEntry records that follow in this (sub)index block.
uint64_t crc64; ///< CRC64-ECMA of the local entries array (does NOT cover subindexes or previous chains).
uint64_t previous; ///< File offset of a previous IndexBlock3 header (0 if none / root segment).
} IndexHeader3;
/**Index entry */
/** \struct IndexEntry
* \brief Single index entry describing a block's type, (optional) data classification, and file offset.
*
* Semantics by blockType (see \ref BlockType):
* - DataBlock / GeometryBlock / ChecksumBlock / etc.: dataType conveys specific stored data category (\ref DataType).
* - Deduplication (DDT) or Index blocks: dataType may be ignored or set to a sentinel.
* - IndexBlock3: this entry refers to a subindex; offset points to another IndexHeader3.
*/
typedef struct IndexEntry
{
/**Type of item pointed by this entry */
uint32_t blockType;
/**Type of data contained by the block pointed by this entry */
uint16_t dataType;
/**Offset in file where item is stored */
uint64_t offset;
uint32_t blockType; ///< Block identifier of the referenced block (value from \ref BlockType).
uint16_t dataType; ///< Data classification (value from \ref DataType) or unused for untyped blocks.
uint64_t offset; ///< Absolute byte offset in the image where the referenced block header begins.
} IndexEntry;
#pragma pack(pop)

View File

@@ -21,73 +21,95 @@
#pragma pack(push, 1)
/**Metadata block, contains metadata */
typedef struct MetadataBlockHeader {
/**Identifier, <see cref="BlockType.MetadataBlock" /> */
uint32_t identifier;
/**Size in uint8_ts of this whole metadata block */
uint32_t blockSize;
/**Sequence of media set this media belongs to */
int32_t mediaSequence;
/**Total number of media on the media set this media belongs to */
int32_t lastMediaSequence;
/**Offset to start of creator string from start of this block */
uint32_t creatorOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t creatorLength;
/**Offset to start of creator string from start of this block */
uint32_t commentsOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t commentsLength;
/**Offset to start of creator string from start of this block */
uint32_t mediaTitleOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t mediaTitleLength;
/**Offset to start of creator string from start of this block */
uint32_t mediaManufacturerOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t mediaManufacturerLength;
/**Offset to start of creator string from start of this block */
uint32_t mediaModelOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t mediaModelLength;
/**Offset to start of creator string from start of this block */
uint32_t mediaSerialNumberOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t mediaSerialNumberLength;
/**Offset to start of creator string from start of this block */
uint32_t mediaBarcodeOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t mediaBarcodeLength;
/**Offset to start of creator string from start of this block */
uint32_t mediaPartNumberOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t mediaPartNumberLength;
/**Offset to start of creator string from start of this block */
uint32_t driveManufacturerOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t driveManufacturerLength;
/**Offset to start of creator string from start of this block */
uint32_t driveModelOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t driveModelLength;
/**Offset to start of creator string from start of this block */
uint32_t driveSerialNumberOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t driveSerialNumberLength;
/**Offset to start of creator string from start of this block */
uint32_t driveFirmwareRevisionOffset;
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
uint32_t driveFirmwareRevisionLength;
/** \file aaruformat/structs/metadata.h
* \brief Packed on-disk metadata block headers for descriptive strings and CICM XML (if present).
*
* Two metadata-related block header layouts are defined:
* - \ref MetadataBlockHeader (BlockType::MetadataBlock): offsets + lengths for several UTF-16LE strings.
* - \ref CicmMetadataBlock (BlockType::CicmBlock): length of embedded CICM XML metadata payload.
*
* All multi-byte integers are little-endian. Structures are packed (1-byte alignment). All textual fields
* referenced by offsets are UTF-16LE, null-terminated (0x0000). Length fields include the terminating
* null (i.e. length >= 2 and an even number). Offsets are relative to the start of the corresponding block
* header (byte 0 = first byte of the header). No padding is implicitly added between strings; producers
* may pack them tightly or align them manually (alignment not required by the specification).
*
* Metadata block layout (conceptual):
* MetadataBlockHeader (fixed size)
* <variable region holding each present UTF-16LE string in any order chosen by the writer>
*
* Invariants / validation recommendations for MetadataBlockHeader:
* - identifier == BlockType::MetadataBlock
* - blockSize >= sizeof(MetadataBlockHeader)
* - For every (offset,length) pair where length > 0:
* * offset >= sizeof(MetadataBlockHeader)
* * offset + length <= blockSize
* * length % 2 == 0
* * The 16-bit code unit at (offset + length - 2) == 0x0000 (null terminator)
* - mediaSequence >= 0 and lastMediaSequence >= 0; if lastMediaSequence > 0 then 0 <= mediaSequence <
* lastMediaSequence
*
* CICM metadata block layout:
* CicmMetadataBlock (header)
* <length bytes of UTF-8 or XML text payload (implementation-defined, not null-terminated)>
*
* NOTE: The library code reading these blocks must not assume strings are present; a zero length means the
* corresponding field is omitted. Offsets for omitted fields MAY be zero or arbitrary; readers should skip them
* whenever length == 0.
*/
/** \struct MetadataBlockHeader
* \brief Header for a metadata block containing offsets and lengths to UTF-16LE descriptive strings.
*
* Descriptive fields (all optional): creator, comments, media title/manufacturer/model/serial/barcode/part number,
* drive manufacturer/model/serial/firmware revision. Strings can be used to describe both physical medium and
* acquisition hardware. Length values include the UTF-16LE null terminator (two zero bytes).
*/
typedef struct MetadataBlockHeader
{
uint32_t identifier; ///< Block identifier, must be BlockType::MetadataBlock.
uint32_t blockSize; ///< Total size in bytes of the entire metadata block (header + strings).
int32_t mediaSequence; ///< Sequence number within a multi-disc / multi-volume set (0-based or 1-based as
///< producer defines).
int32_t lastMediaSequence; ///< Total number of media in the set; 0 or 1 if single item.
uint32_t creatorOffset; ///< Offset to UTF-16LE creator string (or undefined if creatorLength==0).
uint32_t creatorLength; ///< Length in bytes (including null) of creator string (0 if absent).
uint32_t commentsOffset; ///< Offset to UTF-16LE comments string.
uint32_t commentsLength; ///< Length in bytes (including null) of comments string.
uint32_t mediaTitleOffset; ///< Offset to UTF-16LE media title string.
uint32_t mediaTitleLength; ///< Length in bytes (including null) of media title string.
uint32_t mediaManufacturerOffset; ///< Offset to UTF-16LE media manufacturer string.
uint32_t mediaManufacturerLength; ///< Length in bytes (including null) of media manufacturer string.
uint32_t mediaModelOffset; ///< Offset to UTF-16LE media model string.
uint32_t mediaModelLength; ///< Length in bytes (including null) of media model string.
uint32_t mediaSerialNumberOffset; ///< Offset to UTF-16LE media serial number string.
uint32_t mediaSerialNumberLength; ///< Length in bytes (including null) of media serial number string.
uint32_t mediaBarcodeOffset; ///< Offset to UTF-16LE media barcode string.
uint32_t mediaBarcodeLength; ///< Length in bytes (including null) of media barcode string.
uint32_t mediaPartNumberOffset; ///< Offset to UTF-16LE media part number string.
uint32_t mediaPartNumberLength; ///< Length in bytes (including null) of media part number string.
uint32_t driveManufacturerOffset; ///< Offset to UTF-16LE drive manufacturer string.
uint32_t driveManufacturerLength; ///< Length in bytes (including null) of drive manufacturer string.
uint32_t driveModelOffset; ///< Offset to UTF-16LE drive model string.
uint32_t driveModelLength; ///< Length in bytes (including null) of drive model string.
uint32_t driveSerialNumberOffset; ///< Offset to UTF-16LE drive serial number string.
uint32_t driveSerialNumberLength; ///< Length in bytes (including null) of drive serial number string.
uint32_t driveFirmwareRevisionOffset; ///< Offset to UTF-16LE drive firmware revision string.
uint32_t driveFirmwareRevisionLength; ///< Length in bytes (including null) of drive firmware revision string.
} MetadataBlockHeader;
/**Geometry block, contains physical geometry information */
typedef struct CicmMetadataBlock {
/**Identifier, <see cref="BlockType.CicmBlock" /> */
uint32_t identifier;
uint32_t length;
/** \struct CicmMetadataBlock
* \brief Header for a CICM XML metadata block (identifier == BlockType::CicmBlock).
*
* The following 'length' bytes immediately after the header contain the CICM XML payload. Encoding is typically
* UTF-8; the payload is not required to be null-terminated.
*/
typedef struct CicmMetadataBlock
{
uint32_t identifier; ///< Block identifier, must be BlockType::CicmBlock.
uint32_t length; ///< Length in bytes of the CICM metadata payload that follows.
} CicmMetadataBlock;
#pragma pack(pop)
#endif //LIBAARUFORMAT_METADATA_H
#endif // LIBAARUFORMAT_METADATA_H

View File

@@ -21,36 +21,65 @@
#pragma pack(push, 1)
/**Contains list of optical disc tracks */
typedef struct TracksHeader {
/**Identifier, <see cref="BlockType.TracksBlock" /> */
uint32_t identifier;
/**How many entries follow this header */
uint16_t entries;
/**CRC64-ECMA of the block */
uint64_t crc64;
/** \file aaruformat/structs/optical.h
* \brief On-disk structures describing optical disc tracks (Track list block).
*
* An optical tracks block (identifier == BlockType::TracksBlock) stores a list of \ref TrackEntry
* records describing the logical layout of tracks and sessions for CD/DVD/BD and similar media.
*
* Layout:
* TracksHeader (fixed)
* TrackEntry[ entries ] (array, packed)
*
* CRC semantics:
* - TracksHeader::crc64 is a CRC64-ECMA over the contiguous TrackEntry array ONLY (header excluded).
* - For legacy images (imageMajorVersion <= AARUF_VERSION_V1) a byte swap is applied when verifying.
*
* Field semantics (TrackEntry):
* - sequence: Logical track number (1..99 typical for CD). Values outside that range may encode extras.
* - type: Value from \ref TrackType (Audio, Data, Mode variants, etc.).
* - start / end: Inclusive Logical Block Address (LBA) bounds for the track. end >= start.
* - pregap: Number of sectors of pre-gap *preceding* the track's first user-accessible sector (can be 0 or negative
* if representing lead-in semantics; negative interpretation is implementation-defined).
* - session: Session number starting at 1 for multi-session discs (1 for single session).
* - isrc: 13-byte ISRC (raw code, no terminating null). If fewer significant characters, remaining bytes are 0.
* - flags: Bitmask of track/control flags. Unless otherwise specified, recommended mapping (mirrors CD subchannel Q
* control bits) is: bit0 Pre-emphasis, bit1 Copy permitted, bit2 Data track, bit3 Four-channel audio,
* bits4-7 reserved. Actual semantics may be extended by the format specification.
*
* Invariants / validation recommendations:
* - identifier == BlockType::TracksBlock
* - entries * sizeof(TrackEntry) bytes are present after the header in the block image.
* - 1 <= sequence <= 99 for standard CD tracks (non-conforming values allowed but should be documented).
* - start <= end; pregap >= 0 (if negative pregaps unsupported in implementation).
* - ISRC bytes either all zero (no ISRC) or printable ASCII (A-Z 0-9 -) per ISO 3901 (without hyphen formatting).
*/
/** \struct TracksHeader
* \brief Header for an optical tracks block listing track entries.
*/
typedef struct TracksHeader
{
uint32_t identifier; ///< Block identifier (must be BlockType::TracksBlock).
uint16_t entries; ///< Number of TrackEntry records following this header.
uint64_t crc64; ///< CRC64-ECMA of the TrackEntry array (header excluded, legacy byte-swap for early versions).
} TracksHeader;
/**Optical disc track */
typedef struct TrackEntry {
/**Track sequence */
uint8_t sequence;
/**Track type */
uint8_t type;
/**Track starting LBA */
int64_t start;
/**Track last LBA */
int64_t end;
/**Track pregap in sectors */
int64_t pregap;
/**Track session */
uint8_t session;
/**Track's ISRC in ASCII */
uint8_t isrc[13];
/**Track flags */
uint8_t flags;
/** \struct TrackEntry
* \brief Single optical disc track descriptor (sequence, type, LBAs, session, ISRC, flags).
*/
typedef struct TrackEntry
{
uint8_t sequence; ///< Track number (1..99 typical for CD audio/data). 0 may indicate placeholder/non-standard.
uint8_t type; ///< Track type (value from \ref TrackType).
int64_t start; ///< Inclusive starting LBA of the track.
int64_t end; ///< Inclusive ending LBA of the track.
int64_t pregap; ///< Pre-gap length in sectors preceding track start (0 if none).
uint8_t session; ///< Session number (1-based). 1 for single-session discs.
uint8_t isrc[13]; ///< ISRC raw 13-byte code (no null terminator). All zeros if not present.
uint8_t flags; ///< Control / attribute bitfield (see file documentation for suggested bit mapping).
} TrackEntry;
#pragma pack(pop)
#endif //LIBAARUFORMAT_OPTICAL_H
#endif // LIBAARUFORMAT_OPTICAL_H

View File

@@ -19,19 +19,214 @@
#ifndef LIBAARUFORMAT_OPTIONS_H
#define LIBAARUFORMAT_OPTIONS_H
#include <stdbool.h> ///< For bool type used in aaru_options.
#include <stdint.h> ///< For fixed-width integer types.
/** \file aaruformat/structs/options.h
* \brief Image creation / open tuning options structure and related semantics.
*
* The library accepts a semicolon-delimited key=value options string (see parse_options()). Recognized keys:
* compress=true|false Enable/disable block compression (LZMA for data blocks, FLAC for audio tracks).
* deduplicate=true|false If true, identical (duplicate) sectors are stored once (DDT entries point to same
* physical block). If false, duplicates are still tracked in DDT but each occurrence
* is stored independently (no storage savings). DDT itself is always present.
* dictionary=<bytes> LZMA dictionary size in bytes (fallback default 33554432 if 0 or invalid).
* table_shift=<n> DDT v2 table shift (default 9) (items per primary entry = 2^n when multi-level).
* data_shift=<n> Global data shift (default 12). Defines per-block address granularity: the low
* 2^n range encodes the sector (or unit) offset within a block; higher bits combine
* with block_alignment to derive block file offsets. Used by DDT but not limited to it.
* block_alignment=<n> log2 alignment of underlying data blocks (default 9 => 512 bytes) (block size = 2^n).
* md5=true|false Generate MD5 checksum (stored in checksum block if true).
* sha1=true|false Generate SHA-1 checksum.
* sha256=true|false Generate SHA-256 checksum.
* blake3=true|false Generate BLAKE3 checksum (may require build-time support; ignored if unsupported).
* spamsum=true|false Generate SpamSum fuzzy hash.
*
* Defaults (when option string NULL or key omitted):
* compress=true, deduplicate=true, dictionary=33554432, table_shift=9, data_shift=12,
* block_alignment=9, md5=false, sha1=false, sha256=false, blake3=false, spamsum=false.
*
* Validation / normalization done in parse_options():
* - Zero / missing dictionary resets to default 33554432.
* - Zero table_shift resets to 9.
* - Zero data_shift resets to 12.
* - Zero block_alignment resets to 9.
*
* Rationale:
* - table_shift, data_shift and block_alignment mirror fields stored in on-disk headers (see AaruHeaderV2 &
* DdtHeader2); data_shift is a global per-block granularity exponent (not DDT-specific) governing how in-block offsets
* are encoded.
* - compress selects adaptive codec usage: LZMA applied to generic/data blocks, FLAC applied to audio track payloads.
* - deduplicate toggles storage optimization only: the DDT directory is always built for addressing; disabling simply
* forces each sector's content to be written even if already present (useful for forensic byte-for-byte
* duplication).
* - dictionary tunes compression ratio/memory use; large values increase memory footprint.
* - Checksums are optional; enabling multiple increases CPU time at write finalization.
*
* Performance / space trade-offs (deduplicate=false):
* - Significantly larger image size: every repeated sector payload is written again.
* - Higher write I/O and longer creation time for highly redundant sources (e.g., zero-filled regions) compared to
* deduplicate=true, although CPU time spent on duplicate detection/hash lookups is reduced.
* - Potentially simpler post-process forensic validation (physical ordering preserved without logical coalescing).
* - Use when exact physical repetition is more critical than storage efficiency, or to benchmark raw device
* throughput.
* - For typical archival use-cases with large zero / repeated patterns, deduplicate=true markedly reduces footprint.
*
* Approximate in-RAM hash map usage for deduplication (deduplicate=true):
* The on-disk DDT can span many secondary tables, but only the primary table plus a currently loaded secondary (and
* possibly a small cache) reside in memory; their footprint is typically <<5% of total indexed media space and is
* often negligible compared to the hash map used to detect duplicate sectors. Therefore we focus here on the hash /
* lookup structure ("hash_map") memory, not the entire DDT on-disk size.
*
* Worst-case (all sectors unique) per 1 GiB of user data:
* sectors_per_GiB = 2^30 / sector_size
* hash_bytes ≈ sectors_per_GiB * H (H ≈ 16 bytes: 8-byte fingerprint + ~8 bytes map overhead)
*
* Resulting hash_map RAM per GiB (unique sectors):
* +--------------+------------------+------------------------------+
* | Sector size | Sectors / GiB | Hash map (~16 B / sector) |
* +--------------+------------------+------------------------------+
* | 512 bytes | 2,097,152 | ~33.5 MiB (≈32.036.0 MiB) |
* | 2048 bytes | 524,288 | ~ 8.0 MiB (≈7.58.5 MiB) |
* | 4096 bytes | 262,144 | ~ 4.0 MiB (≈3.84.3 MiB) |
* +--------------+------------------+------------------------------+
*
* (Range reflects allocator + load factor variation.)
*
* Targeted projections (hash map only, R=1):
* 2048byte sectors (~8 MiB per GiB unique)
* Capacity | Hash map (MiB) | Hash map (GiB)
* ---------+---------------+----------------
* 25 GiB | ~200 | 0.20
* 50 GiB | ~400 | 0.39
*
* 512byte sectors (~34 MiB per GiB unique; using 33.5 MiB for calc)
* Capacity | Hash map (MiB) | Hash map (GiB)
* ---------+---------------+----------------
* 128 GiB | ~4288 | 4.19
* 500 GiB | ~16750 | 16.36
* 1 TiB* | ~34304 | 33.50
* 2 TiB* | ~68608 | 67.00
*
* *TiB = 1024 GiB binary. For decimal TB reduce by ~7% (×0.93).
*
* Duplicate ratio scaling:
* Effective hash RAM ≈ table_value * R, where R = unique_sectors / total_sectors.
* Example: 500 GiB @512 B, R=0.4 ⇒ ~16750 MiB * 0.4 ≈ 6700 MiB (~6.54 GiB).
*
* Quick rule of thumb (hash only):
* hash_bytes_per_GiB ≈ 16 * (2^30 / sector_size) ≈ (17.1799e9 / sector_size) bytes
* → ≈ 33.6 MiB (512 B), 8.4 MiB (2048 B), 4.2 MiB (4096 B) per GiB unique.
*
* Memory planning tip:
* If projected hash_map usage risks exceeding available RAM, consider:
* - Increasing table_shift (reduces simultaneous secondary loads / contention)
* - Lowering data_shift (if practical) to encourage earlier big DDT adoption with fewer unique blocks
* - Segmenting the dump into phases (if workflow permits)
* - Accepting higher duplicate ratio by pre-zero detection or sparse treatment externally.
* - Resuming the dump in multiple passes: each resume rebuilds the hash_map from scratch, so peak RAM still
* matches a single-pass estimate, but average RAM over total wall time can drop if you unload between passes.
*
* NOTE: DDT in-RAM portion (primary + one secondary) usually adds only a few additional MiB even for very large
* images, hence omitted from sizing tables. Include +5% safety margin if extremely tight on memory.
*
* Guidance for table_shift / data_shift selection:
* Let:
* S = total logical sectors expected in image (estimate if unknown).
* T = table_shift (items per primary DDT entry = 2^T when multi-level; 0 => single-level).
* D = data_shift (in-block sector offset span = 2^D).
* BA = block_alignment (bytes) = 2^block_alignment.
* SS = sector size (bytes).
*
* 1. data_shift constraints:
* - For SMALL DDT entries (12 payload bits after status): D must satisfy 0 < D < 12 and (12 - D) >= 1 so that at
* least one bit remains for block index. Practical range for small DDT: 6..10 (leaves 2+ bits for block index).
* - For BIG DDT entries (28 payload bits after status): D may be larger (up to 27) but values >16 rarely useful.
* - Effective address granularity inside a block = min(2^D * SS, physical block span implied by BA).
* - Choosing D too large wastes bits (larger offset range than block actually contains) and reduces the number of
* block index bits within a small entry, potentially forcing upgrade to big DDT earlier.
*
* Recommended starting points:
* * 512byte sectors, 512byte block alignment: D=9 (512 offsets) or D=8 (256 offsets) keeps small DDT viable.
* * 2048byte optical sectors, 2048byte alignment: D=8 (256 offsets) typically sufficient.
* * Mixed / large logical block sizes: keep D so that (2^D * SS) ≈ typical dedup block region you want
* addressable.
*
* 2. block capacity within an entry:
* - SMALL DDT: usable block index bits = 12 - D.
* Max representable block index (small) = 2^(12-D) - 1.
* - BIG DDT: usable block index bits = 28 - D.
* Max representable block index (big) = 2^(28-D) - 1.
* - If (requiredBlockIndex > max) you must either reduce D or rely on big DDT.
*
* Approximate requiredBlockIndex ≈ (TotalUniqueBlocks) where
* TotalUniqueBlocks ≈ (S * SS) / (BA * (2^D * SS / (SS))) = S / (2^D * (BA / SS))
* Simplified (assuming BA = SS): TotalUniqueBlocks ≈ S / 2^D.
*
* 3. table_shift considerations (multi-level DDT):
* - Primary entries count ≈ ceil(S / 2^T). Choose T so this count fits memory and keeps lookup fast.
* - Larger T reduces primary table size, increasing secondary table dereferences.
* - Typical balanced values: T in [8..12] (256..4096 sectors per primary entry).
* - Set T=0 for single-level when S is small enough that all entries fit comfortably in memory.
*
* Memory rough estimate for single-level SMALL DDT:
* bytes ≈ S * 2 (each small entry 2 bytes). For BIG DDT: bytes ≈ S * 4.
* Multi-level: primary table bytes ≈ (S / 2^T) * entrySize + sum(secondary tables).
*
* 4. Example scenarios:
* - 50M sectors (≈25 GiB @512B), want small DDT: pick D=8 (256); block index bits=4 (max 16 blocks) insufficient.
* Need either D=6 (1024 block indices) or accept BIG DDT (28-8=20 bits => million+ blocks). So prefer BIG DDT
* here.
* - 2M sectors, 2048B alignment, optical: D=8 gives S/2^D ≈ 7812 unique offsets; small DDT block index bits=4 (max
* 16) inadequate → choose D=6 (offset span 64 sectors) giving 6 block index bits (max 64) or just use big DDT.
*
* 5. Practical recommendations:
* - If unsure and image > ~1M sectors: keep defaults (data_shift=12, table_shift=9) and allow big DDT.
* - For small archival (<100k sectors): T=0 (single-level), D≈8..10 to keep small DDT feasible.
* - Benchmark before lowering D purely to stay in small DDT; increased secondary lookups or larger primary tables
* can offset saved space.
*
* Recommended presets (approximate bands):
* +----------------------+----------------------+---------------------------+-------------------------------+
* | Total logical sectors | table_shift (T) | data_shift (D) | Notes |
* +----------------------+----------------------+---------------------------+-------------------------------+
* | < 50,000 | 0 | 8 10 | Single-level small DDT likely |
* | 50K 1,000,000 | 8 9 | 9 10 | Still feasible small DDT |
* | 1M 10,000,000 | 9 10 | 10 12 | Borderline small -> big DDT |
* | 10M 100,000,000 | 10 11 | 11 12 | Prefer big DDT; tune T for mem|
* | > 100,000,000 | 11 12 | 12 | Big DDT; higher T saves memory|
* +----------------------+----------------------+---------------------------+-------------------------------+
* Ranges show typical stable regions; pick the lower end of table_shift if memory is ample, higher if minimizing
* primary table size. Always validate actual unique block count vs payload bits.
*
* NOTE: The library will automatically fall back to BIG DDT where needed; these settings bias structure, they do not
* guarantee small DDT retention.
*
* Thread-safety: aaru_options is a plain POD struct; caller may copy freely. parse_options() returns by value.
*
* Future compatibility: unknown keys are ignored by current parser; consumers should preserve original option
* strings if round-tripping is required.
*/
/** \struct aaru_options
* \brief Parsed user-specified tunables controlling compression, deduplication, hashing and DDT geometry.
*
* All shifts are exponents of two.
*/
typedef struct
{
bool compress;
bool deduplicate;
uint32_t dictionary;
uint8_t table_shift;
uint8_t data_shift;
uint8_t block_alignment;
bool md5;
bool sha1;
bool sha256;
bool blake3;
bool spamsum;
bool compress; ///< Enable adaptive compression (LZMA for data blocks, FLAC for audio). Default: true.
bool deduplicate; ///< Storage dedup flag (DDT always exists). true=share identical sector content, false=store
///< each instance.
uint32_t dictionary; ///< LZMA dictionary size in bytes (>= 4096 recommended). Default: 33554432 (32 MiB).
uint8_t table_shift; ///< DDT table shift (multi-level fan-out exponent). Default: 9.
uint8_t data_shift; ///< Global data shift: low bits encode sector offset inside a block (2^data_shift span).
uint8_t block_alignment; ///< log2 underlying block alignment (2^n bytes). Default: 9 (512 bytes).
bool md5; ///< Generate MD5 checksum (ChecksumAlgorithm::Md5) when finalizing image.
bool sha1; ///< Generate SHA-1 checksum (ChecksumAlgorithm::Sha1) when finalizing image.
bool sha256; ///< Generate SHA-256 checksum (ChecksumAlgorithm::Sha256) when finalizing image.
bool blake3; ///< Generate BLAKE3 checksum if supported (not stored if algorithm unavailable).
bool spamsum; ///< Generate SpamSum fuzzy hash (ChecksumAlgorithm::SpamSum) if enabled.
} aaru_options;
#endif // LIBAARUFORMAT_OPTIONS_H