mirror of
https://github.com/aaru-dps/libaaruformat.git
synced 2025-12-16 19:24:40 +00:00
Enhance documentation for various structures with detailed descriptions and formatting improvements
This commit is contained in:
@@ -19,29 +19,80 @@
|
||||
#ifndef LIBAARUFORMAT_CHECKSUM_H
|
||||
#define LIBAARUFORMAT_CHECKSUM_H
|
||||
|
||||
#include <stdint.h> // Fixed-width integer types for on-disk structures.
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**
|
||||
* Checksum block, contains a checksum of all user data sectors (except for optical discs that is 2352 uint8_ts raw
|
||||
* sector if available
|
||||
* */
|
||||
typedef struct ChecksumHeader {
|
||||
/**Identifier, <see cref="BlockType.ChecksumBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**Length in uint8_ts of the block */
|
||||
uint32_t length;
|
||||
/**How many checksums follow */
|
||||
uint8_t entries;
|
||||
* \file aaruformat/structs/checksum.h
|
||||
* \brief On-disk layout definitions for the checksum block (BlockType::ChecksumBlock).
|
||||
*
|
||||
* A checksum block stores one or more whole-image (user data) checksums. For optical media the
|
||||
* user data definition follows the format's raw sector rules (e.g. 2352-byte raw sector when available).
|
||||
*
|
||||
* Binary layout (all integers are little-endian, structure is packed):
|
||||
*
|
||||
* +------------------------------+-------------------------------+
|
||||
* | Field | Size (bytes) |
|
||||
* +==============================+===============================+
|
||||
* | ChecksumHeader | sizeof(ChecksumHeader)=9 |
|
||||
* | identifier | 4 (BlockType::ChecksumBlock) |
|
||||
* | length | 4 (payload bytes that follow)|
|
||||
* | entries | 1 (number of checksum entries)|
|
||||
* +------------------------------+-------------------------------+
|
||||
* | Repeated for each entry: |
|
||||
* | ChecksumEntry | sizeof(ChecksumEntry)=5 |
|
||||
* | type | 1 (ChecksumAlgorithm) |
|
||||
* | length | 4 (digest length) |
|
||||
* | digest bytes | length |
|
||||
* +------------------------------+-------------------------------+
|
||||
*
|
||||
* Thus, the payload size (ChecksumHeader.length) MUST equal the sum over all entries of:
|
||||
* sizeof(ChecksumEntry) + entry.length.
|
||||
*
|
||||
* Typical digest lengths:
|
||||
* - Md5: 16 bytes
|
||||
* - Sha1: 20 bytes
|
||||
* - Sha256: 32 bytes
|
||||
* - SpamSum: variable length ASCII, NOT null-terminated on disk (a terminating '\0' may be appended in memory).
|
||||
*
|
||||
* \warning The structures are packed; never rely on host compiler default padding or directly casting from a buffer
|
||||
* without ensuring correct endianness if porting to big-endian systems (current implementation assumes LE).
|
||||
*
|
||||
* \see BlockType
|
||||
* \see ChecksumAlgorithm
|
||||
*/
|
||||
|
||||
/**
|
||||
* \struct ChecksumHeader
|
||||
* \brief Header that precedes the sequence of checksum entries for a checksum block.
|
||||
*
|
||||
* After this header, exactly \ref ChecksumHeader::length bytes follow containing \ref ChecksumHeader::entries
|
||||
* consecutive \ref ChecksumEntry records, each immediately followed by its digest payload.
|
||||
*/
|
||||
typedef struct ChecksumHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::ChecksumBlock.
|
||||
uint32_t length; ///< Length in bytes of the payload (all entries + their digest data, excluding this header).
|
||||
uint8_t entries; ///< Number of checksum entries that follow in the payload.
|
||||
} ChecksumHeader;
|
||||
|
||||
/**Checksum entry, followed by checksum data itself */
|
||||
typedef struct ChecksumEntry {
|
||||
/**Checksum algorithm */
|
||||
uint8_t type;
|
||||
/**Length in uint8_ts of checksum that follows this structure */
|
||||
uint32_t length;
|
||||
/**
|
||||
* \struct ChecksumEntry
|
||||
* \brief Per-checksum metadata immediately followed by the digest / signature bytes.
|
||||
*
|
||||
* For fixed-length algorithms the \ref length MUST match the known digest size. For SpamSum it is variable.
|
||||
* The bytes immediately following this structure (not null-terminated) constitute the digest and are exactly
|
||||
* \ref length bytes long.
|
||||
*
|
||||
* Order of entries is not mandated; readers should scan all entries and match by \ref type.
|
||||
*/
|
||||
typedef struct ChecksumEntry
|
||||
{
|
||||
uint8_t type; ///< Algorithm used (value from \ref ChecksumAlgorithm).
|
||||
uint32_t length; ///< Length in bytes of the digest that immediately follows this structure.
|
||||
} ChecksumEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_CHECKSUM_H
|
||||
#endif // LIBAARUFORMAT_CHECKSUM_H
|
||||
|
||||
@@ -19,37 +19,82 @@
|
||||
#ifndef LIBAARUFORMAT_DATA_H
|
||||
#define LIBAARUFORMAT_DATA_H
|
||||
|
||||
#include <stdint.h> // Fixed width integer types used in on-disk packed structs.
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Block header, precedes block data */
|
||||
typedef struct BlockHeader {
|
||||
/**Identifier, <see cref="BlockType.DataBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**Type of data contained by this block */
|
||||
uint16_t type;
|
||||
/**Compression algorithm used to compress the block */
|
||||
uint16_t compression;
|
||||
/**Size in uint8_ts of each sector contained in this block */
|
||||
uint32_t sectorSize;
|
||||
/**Compressed length for the block */
|
||||
uint32_t cmpLength;
|
||||
/**Uncompressed length for the block */
|
||||
uint32_t length;
|
||||
/**CRC64-ECMA of the compressed block */
|
||||
uint64_t cmpCrc64;
|
||||
/**CRC64-ECMA of the uncompressed block */
|
||||
uint64_t crc64;
|
||||
/**
|
||||
* \file aaruformat/structs/data.h
|
||||
* \brief On-disk layout structures for data-bearing and geometry blocks.
|
||||
*
|
||||
* These packed structures describe the headers that precede variable-length payloads
|
||||
* inside blocks whose identifiers are enumerated in \ref BlockType.
|
||||
* All integer fields are stored little-endian on disk. The library currently assumes a
|
||||
* little-endian host; if ported to a big-endian architecture explicit byte swapping will be required.
|
||||
*
|
||||
* Layout of a data block (BlockType::DataBlock):
|
||||
* BlockHeader (sizeof(BlockHeader) bytes)
|
||||
* Compressed payload (cmpLength bytes)
|
||||
*
|
||||
* Payload decoding:
|
||||
* - Apply the algorithm indicated by \ref BlockHeader::compression (\ref CompressionType) to the
|
||||
* cmpLength bytes following the header to obtain exactly \ref BlockHeader::length bytes.
|
||||
* - The uncompressed data MUST be an integer multiple of \ref BlockHeader::sectorSize.
|
||||
* - A CRC64-ECMA is provided for both compressed (cmpCrc64) and uncompressed (crc64) forms to allow
|
||||
* validation at either stage of the pipeline.
|
||||
*
|
||||
* Geometry block (BlockType::GeometryBlock) has a \ref GeometryBlockHeader followed by no additional
|
||||
* fixed payload in the current format version; it conveys legacy CHS-style logical geometry metadata.
|
||||
*
|
||||
* \warning These structs are packed; do not take their address and assume natural alignment.
|
||||
* \see BlockType
|
||||
* \see DataType
|
||||
* \see CompressionType
|
||||
*/
|
||||
|
||||
/**
|
||||
* \struct BlockHeader
|
||||
* \brief Header preceding the compressed data payload of a data block (BlockType::DataBlock).
|
||||
*
|
||||
* Invariants:
|
||||
* - cmpLength > 0 unless length == 0 (empty block)
|
||||
* - length == 0 implies cmpLength == 0
|
||||
* - If compression == CompressionType::None then cmpLength == length
|
||||
* - length % sectorSize == 0
|
||||
*
|
||||
* Validation strategy (recommended for readers):
|
||||
* 1. Verify identifier == BlockType::DataBlock.
|
||||
* 2. Verify sectorSize is non-zero and a power-of-two or a commonly used size (512/1024/2048/4096/2352).
|
||||
* 3. Verify invariants above and CRCs after (de)compression.
|
||||
*/
|
||||
typedef struct BlockHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DataBlock.
|
||||
uint16_t type; ///< Logical data classification (value from \ref DataType).
|
||||
uint16_t compression; ///< Compression algorithm used (value from \ref CompressionType).
|
||||
uint32_t sectorSize; ///< Size in bytes of each logical sector represented in this block.
|
||||
uint32_t cmpLength; ///< Size in bytes of the compressed payload immediately following this header.
|
||||
uint32_t length; ///< Size in bytes of the uncompressed payload resulting after decompression.
|
||||
uint64_t cmpCrc64; ///< CRC64-ECMA of the compressed payload (cmpLength bytes).
|
||||
uint64_t crc64; ///< CRC64-ECMA of the uncompressed payload (length bytes).
|
||||
} BlockHeader;
|
||||
|
||||
/**Geometry block, contains physical geometry information */
|
||||
typedef struct GeometryBlockHeader {
|
||||
/**Identifier, <see cref="BlockType.GeometryBlock" /> */
|
||||
uint32_t identifier;
|
||||
uint32_t cylinders;
|
||||
uint32_t heads;
|
||||
uint32_t sectorsPerTrack;
|
||||
/**
|
||||
* \struct GeometryBlockHeader
|
||||
* \brief Legacy CHS style logical geometry metadata (BlockType::GeometryBlock).
|
||||
*
|
||||
* Total logical sectors implied by this header is cylinders * heads * sectorsPerTrack.
|
||||
* Sector size is not included here and must be derived from context (e.g., accompanying metadata
|
||||
* or defaulting to 512 for many block devices).
|
||||
*/
|
||||
typedef struct GeometryBlockHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::GeometryBlock.
|
||||
uint32_t cylinders; ///< Number of cylinders.
|
||||
uint32_t heads; ///< Number of heads (tracks per cylinder).
|
||||
uint32_t sectorsPerTrack; ///< Number of sectors per track.
|
||||
} GeometryBlockHeader;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_DATA_H
|
||||
#endif // LIBAARUFORMAT_DATA_H
|
||||
|
||||
@@ -19,71 +19,149 @@
|
||||
#ifndef LIBAARUFORMAT_DDT_H
|
||||
#define LIBAARUFORMAT_DDT_H
|
||||
|
||||
#include <stdint.h> // fixed-width types for on-disk layout
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Header for a deduplication table. Table follows it */
|
||||
/** \file aaruformat/structs/ddt.h
|
||||
* \brief On-disk headers for Deduplication Data Tables (DDT) versions 1 and 2.
|
||||
*
|
||||
* A DDT maps logical sector indices (LBAs within an image's logical address space) to (block, sector)
|
||||
* pairs plus a base file offset, enabling content de-duplication inside the container. Two generations
|
||||
* exist:
|
||||
* - DdtHeader ("version 1") flat table.
|
||||
* - DdtHeader2 ("version 2") hierarchical, multi-level subtables for scalability.
|
||||
*
|
||||
* All integers are little-endian. Structures are packed (1-byte alignment). When porting to a big-endian
|
||||
* architecture callers must perform byte swapping. Do not rely on compiler-introduced padding.
|
||||
*
|
||||
* Compression of the table body (entries array) follows the same conventions as data blocks: first
|
||||
* decompress according to the compression enum, then validate CRC64 for uncompressed contents.
|
||||
*
|
||||
* Related enumerations:
|
||||
* - BlockType::DeDuplicationTable / BlockType::DeDuplicationTable2
|
||||
* - CompressionType
|
||||
* - DataType
|
||||
* - DdtSizeType (for DdtHeader2::sizeType)
|
||||
*/
|
||||
|
||||
/**
|
||||
* \struct DdtHeader
|
||||
* \brief Header preceding a version 1 (flat) deduplication table body.
|
||||
*
|
||||
* Immediately after this header there are \ref entries table records (compressed if \ref compression != None).
|
||||
* Each table record encodes a pointer using an 8-bit file offset component and a sector offset inside a block:
|
||||
* logicalEntryValue = ((uint64_t)fileByteOffset << shift) + sectorOffsetWithinBlock
|
||||
* where fileByteOffset is measured in bytes (granularity depends on shift) and sectorOffsetWithinBlock is
|
||||
* relative to the start of the referenced data block. The sector size must be taken from the corresponding
|
||||
* data block(s) (see BlockHeader::sectorSize) or higher-level metadata.
|
||||
*
|
||||
* Invariants:
|
||||
* - cmpLength == length if compression == CompressionType::None
|
||||
* - length % (entrySize) == 0 after decompression (implementation-defined entry size)
|
||||
* - entries * entrySize == length
|
||||
* - entries > 0 implies length > 0
|
||||
*/
|
||||
typedef struct DdtHeader
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
|
||||
uint32_t identifier;
|
||||
/**Type of data pointed by this DDT */
|
||||
uint16_t type;
|
||||
/**Compression algorithm used to compress the DDT */
|
||||
uint16_t compression;
|
||||
/**Each entry is ((uint8_t offset in file) << shift) + (sector offset in block) */
|
||||
uint8_t shift;
|
||||
/**How many entries are in the table */
|
||||
uint64_t entries;
|
||||
/**Compressed length for the DDT */
|
||||
uint64_t cmpLength;
|
||||
/**Uncompressed length for the DDT */
|
||||
uint64_t length;
|
||||
/**CRC64-ECMA of the compressed DDT */
|
||||
uint64_t cmpCrc64;
|
||||
/**CRC64-ECMA of the uncompressed DDT */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DeDuplicationTable.
|
||||
uint16_t type; ///< Data classification (\ref DataType) for sectors referenced by this table.
|
||||
uint16_t compression; ///< Compression algorithm for the table body (\ref CompressionType).
|
||||
uint8_t shift; ///< Left shift applied to per-entry file offset component forming logicalEntryValue.
|
||||
uint64_t entries; ///< Number of deduplication entries contained in (uncompressed) table.
|
||||
uint64_t cmpLength; ///< Size in bytes of compressed entries payload.
|
||||
uint64_t length; ///< Size in bytes of uncompressed entries payload.
|
||||
uint64_t cmpCrc64; ///< CRC64-ECMA of the compressed payload.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the uncompressed payload.
|
||||
} DdtHeader;
|
||||
|
||||
/**
|
||||
* \struct DdtHeader2
|
||||
* \brief Header preceding a version 2 hierarchical deduplication table.
|
||||
*
|
||||
* Version 2 introduces multi-level tables to efficiently address very large images by subdividing
|
||||
* the logical address space. Tables at higher levels partition regions; leaves contain direct
|
||||
* (block, sector) entry mappings. Navigation uses \ref tableLevel (0 = root) and \ref levels (total depth).
|
||||
*
|
||||
* Logical sector (LBA) mapping (actual implementation in decode_ddt_{single,multi}_level_v2):
|
||||
* 1. Let L be the requested logical sector (can be negative externally). Internal index I = L + negative.
|
||||
* Valid range: 0 <= I < blocks. (Total user-data sectors often = blocks - negative - overflow.)
|
||||
* 2. If tableShift == 0 (single-level): entryIndex = I.
|
||||
* Else (multi-level):
|
||||
* itemsPerPrimaryEntry = 1 << tableShift
|
||||
* primaryIndex = I / itemsPerPrimaryEntry
|
||||
* secondaryIndex = I % itemsPerPrimaryEntry
|
||||
* The primary table entry at primaryIndex yields a secondary DDT file offset (scaled by 2^blockAlignmentShift),
|
||||
* whose table entries are then indexed by secondaryIndex.
|
||||
* 3. Read raw DDT entry value E (16-bit if sizeType == SmallDdtSizeType, 32-bit if BigDdtSizeType).
|
||||
* 4. If E == 0: sector_status = SectorStatusNotDumped; offset=block_offset=0.
|
||||
* Otherwise extract:
|
||||
* statusBits = E >> 12 (small) or E >> 28 (big)
|
||||
* baseBits = E & 0x0FFF (small) or E & 0x0FFFFFFF (big)
|
||||
* sectorOffsetWithinBlock = baseBits & ((1 << dataShift) - 1)
|
||||
* blockIndex = baseBits >> dataShift
|
||||
* block_offset (bytes) = blockIndex << blockAlignmentShift
|
||||
* offset (sector units inside block) = sectorOffsetWithinBlock
|
||||
* 5. The consumer combines block_offset, offset, and the (external) logical sector size to locate data.
|
||||
*
|
||||
* Field roles:
|
||||
* - negative: Count of leading negative LBAs supported; added to L to form internal index.
|
||||
* - overflow: Count of trailing LBAs beyond the user area upper bound that are still dumped and have
|
||||
* normal DDT entries (e.g. optical disc lead-out). Symmetrical to 'negative' on the high end.
|
||||
* - start: For secondary tables, base internal index covered (written when creating new tables). Current decoding
|
||||
* logic does not consult this field (future-proof placeholder).
|
||||
* - blockAlignmentShift: log2 alignment of stored data blocks (byte granularity of block_offset).
|
||||
* - dataShift: log2 of the number of addressable sectors per increment of blockIndex bitfield unit.
|
||||
* - tableShift: log2 of number of logical sectors covered by a single primary-table pointer (multi-level only).
|
||||
* - sizeType: Selects entry width (small=16b, big=32b) impacting available bits for blockIndex+offset.
|
||||
*
|
||||
* Notes & current limitations:
|
||||
* - User area sector count = blocks - negative - overflow.
|
||||
* - Valid external LBA range exposed by the image = [-negative, (blocks - negative - 1)].
|
||||
* * Negative range: [-negative, -1]
|
||||
* * User area range: [0, (blocks - negative - overflow - 1)]
|
||||
* * Overflow range: [(blocks - negative - overflow), (blocks - negative - 1)]
|
||||
* - Both negative and overflow ranges are stored with normal DDT entries (if present), enabling complete
|
||||
* reproduction of lead-in / lead-out or similar padding regions.
|
||||
* - start is presently ignored during decoding; integrity checks against it may be added in future revisions.
|
||||
* - No masking is applied to I besides array bounds; callers must ensure L is within representable range.
|
||||
*
|
||||
* Example (Compact Disc):
|
||||
* Disc has 360000 user sectors. Lead-in captured as 15000 negative sectors and lead-out as 15000 overflow sectors.
|
||||
* negative = 15000
|
||||
* overflow = 15000
|
||||
* user sectors = 360000
|
||||
* blocks (internal span) = negative + user + overflow = 390000
|
||||
* External LBA spans: -15000 .. 374999
|
||||
* * Negative: -15000 .. -1 (15000 sectors)
|
||||
* * User: 0 .. 359999 (360000 sectors)
|
||||
* * Overflow: 360000 .. 374999 (15000 sectors)
|
||||
* Internal index I for any external L is I = L + negative.
|
||||
* User area sector count reported to callers (ctx->imageInfo.Sectors) = blocks - negative - overflow = 360000.
|
||||
*/
|
||||
typedef struct DdtHeader2
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
|
||||
uint32_t identifier;
|
||||
/**Type of data pointed by this DDT */
|
||||
uint16_t type;
|
||||
/**Compression algorithm used to compress the DDT */
|
||||
uint16_t compression;
|
||||
/**How many levels of subtables are present */
|
||||
uint8_t levels;
|
||||
/**Which level this table belongs to */
|
||||
uint8_t tableLevel;
|
||||
/**Pointer to absolute byte offset in file where the previous level table is located */
|
||||
uint64_t previousLevelOffset;
|
||||
/**Negative displacement of LBAs */
|
||||
uint16_t negative;
|
||||
/**Number of blocks in media */
|
||||
uint64_t blocks;
|
||||
/**Positive overflow displacement of LBAs */
|
||||
uint16_t overflow;
|
||||
/**First LBA contained in this table */
|
||||
uint64_t start;
|
||||
/**Block alignment boundaries */
|
||||
uint8_t blockAlignmentShift;
|
||||
/**Data shift */
|
||||
uint8_t dataShift;
|
||||
/**Table shift */
|
||||
uint8_t tableShift;
|
||||
/**Size type */
|
||||
uint8_t sizeType;
|
||||
/**Entries in this table */
|
||||
uint64_t entries;
|
||||
/**Compressed length for the DDT */
|
||||
uint64_t cmpLength;
|
||||
/**Uncompressed length for the DDT */
|
||||
uint64_t length;
|
||||
/**CRC64-ECMA of the compressed DDT */
|
||||
uint64_t cmpCrc64;
|
||||
/**CRC64-ECMA of the uncompressed DDT */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DeDuplicationTable2.
|
||||
uint16_t type; ///< Data classification (\ref DataType) for sectors referenced by this table.
|
||||
uint16_t compression; ///< Compression algorithm for this table body (\ref CompressionType).
|
||||
uint8_t levels; ///< Total number of hierarchy levels (root depth); > 0.
|
||||
uint8_t tableLevel; ///< Zero-based level index of this table (0 = root, increases downward).
|
||||
uint64_t previousLevelOffset; ///< Absolute byte offset of the parent (previous) level table; 0 if root.
|
||||
uint16_t negative; ///< Leading negative LBA count; added to external L to build internal index.
|
||||
uint64_t blocks; ///< Total internal span (negative + usable + overflow) in logical sectors.
|
||||
uint16_t overflow; ///< Trailing dumped sectors beyond user area (overflow range), still mapped with entries.
|
||||
uint64_t
|
||||
start; ///< Base internal index covered by this table (used for secondary tables; currently informational).
|
||||
uint8_t blockAlignmentShift; ///< 2^blockAlignmentShift = block alignment boundary in bytes.
|
||||
uint8_t dataShift; ///< 2^dataShift = sectors represented per increment in blockIndex field.
|
||||
uint8_t tableShift; ///< 2^tableShift = number of logical sectors per primary entry (multi-level only; 0 for
|
||||
///< single-level or secondary tables).
|
||||
uint8_t sizeType; ///< Entry size variant (\ref DdtSizeType) controlling width of E.
|
||||
uint64_t entries; ///< Number of entries contained in (uncompressed) table payload.
|
||||
uint64_t cmpLength; ///< Compressed payload size in bytes.
|
||||
uint64_t length; ///< Uncompressed payload size in bytes.
|
||||
uint64_t cmpCrc64; ///< CRC64-ECMA of compressed table payload.
|
||||
uint64_t crc64; ///< CRC64-ECMA of uncompressed table payload.
|
||||
} DdtHeader2;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -19,42 +19,109 @@
|
||||
#ifndef LIBAARUFORMAT_DUMP_H
|
||||
#define LIBAARUFORMAT_DUMP_H
|
||||
|
||||
#include <stdint.h> /* Fixed-width integer types for on‑disk packed structures */
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Dump hardware block, contains a list of hardware used to dump the media on this image */
|
||||
typedef struct DumpHardwareHeader {
|
||||
/**Identifier, <see cref="BlockType.DumpHardwareBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint16_t entries;
|
||||
/**Size of the whole block, not including this header, in uint8_ts */
|
||||
uint32_t length;
|
||||
/**CRC64-ECMA of the block */
|
||||
uint64_t crc64;
|
||||
/** \file aaruformat/structs/dump.h
|
||||
* \brief Packed on-disk structures describing hardware and software used during image acquisition.
|
||||
*
|
||||
* A Dump Hardware block (identifier = BlockType::DumpHardwareBlock) records one or more dump "environments" –
|
||||
* typically combinations of a physical device (drive, controller, adapter) and the software stack that
|
||||
* performed the read operation. Each environment is represented by a \ref DumpHardwareEntry followed by a
|
||||
* sequence of UTF‑8 strings and an optional array of extent ranges (\ref DumpExtent, defined in context.h) that
|
||||
* delimit portions of the medium this environment contributed to.
|
||||
*
|
||||
* Binary layout (little-endian, packed, all multi-byte integers LE):
|
||||
*
|
||||
* DumpHardwareHeader (sizeof = 16 bytes)
|
||||
* identifier (4) -> BlockType::DumpHardwareBlock
|
||||
* entries (2) -> number of following hardware entries
|
||||
* length (4) -> total bytes of payload that follow this header
|
||||
* crc64 (8) -> CRC64-ECMA of the payload bytes
|
||||
*
|
||||
* Repeated for i in [0, entries):
|
||||
* DumpHardwareEntry (36 bytes)
|
||||
* manufacturerLength (4)
|
||||
* modelLength (4)
|
||||
* revisionLength (4)
|
||||
* firmwareLength (4)
|
||||
* serialLength (4)
|
||||
* softwareNameLength (4)
|
||||
* softwareVersionLength (4)
|
||||
* softwareOperatingSystemLength (4)
|
||||
* extents (4) -> number of DumpExtent structs after the strings
|
||||
*
|
||||
* Variable-length UTF-8 strings (not NUL-terminated on disk) appear immediately after the entry, in the
|
||||
* exact order of the length fields above; each string is present only if its length > 0. The reader allocates
|
||||
* an extra byte to append '\0' for in-memory convenience.
|
||||
*
|
||||
* Array of 'extents' DumpExtent structures (each 16 bytes: start, end) follows the strings if extents > 0.
|
||||
* The semantic of each extent is an inclusive [start, end] logical sector (or unit) range contributed by
|
||||
* this hardware/software combination.
|
||||
*
|
||||
* CRC semantics:
|
||||
* - crc64 covers exactly 'length' bytes immediately following the header.
|
||||
* - For legacy images with header.imageMajorVersion <= AARUF_VERSION_V1 the original C# writer produced a
|
||||
* byte-swapped CRC; the library compensates internally (see process_dumphw_block()).
|
||||
*
|
||||
* Invariants / validation recommendations:
|
||||
* - identifier == BlockType::DumpHardwareBlock
|
||||
* - Accumulated size of all (entry + strings + extents arrays) == length
|
||||
* - All length fields are trusted only after bounds checking against remaining payload bytes
|
||||
* - Strings are raw UTF-8 data with no implicit terminator
|
||||
* - extents * sizeof(DumpExtent) fits inside remaining payload
|
||||
*
|
||||
* Memory management notes (runtime library):
|
||||
* - Each string is malloc'ed with +1 byte for terminator during processing.
|
||||
* - Extents array is malloc'ed per entry when extents > 0.
|
||||
* - See aaruformatContext::dumpHardwareEntriesWithData for owning pointers.
|
||||
*
|
||||
* \warning Structures are packed; never rely on natural alignment when mapping from a byte buffer.
|
||||
* \see DumpHardwareHeader
|
||||
* \see DumpHardwareEntry
|
||||
* \see DumpExtent (in context.h)
|
||||
* \see BlockType
|
||||
*/
|
||||
|
||||
/** \struct DumpHardwareHeader
|
||||
* \brief Header that precedes a sequence of dump hardware entries and their variable-length payload.
|
||||
*/
|
||||
typedef struct DumpHardwareHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DumpHardwareBlock.
|
||||
uint16_t entries; ///< Number of DumpHardwareEntry records that follow.
|
||||
uint32_t length; ///< Total payload bytes after this header (sum of entries, strings, and extents arrays).
|
||||
uint64_t crc64; ///< CRC64-ECMA of the payload (byte-swapped for legacy v1 images, handled automatically).
|
||||
} DumpHardwareHeader;
|
||||
|
||||
/**Dump hardware entry, contains length of strings that follow, in the same order as the length, this structure */
|
||||
typedef struct DumpHardwareEntry {
|
||||
/**Length of UTF-8 manufacturer string */
|
||||
uint32_t manufacturerLength;
|
||||
/**Length of UTF-8 model string */
|
||||
uint32_t modelLength;
|
||||
/**Length of UTF-8 revision string */
|
||||
uint32_t revisionLength;
|
||||
/**Length of UTF-8 firmware version string */
|
||||
uint32_t firmwareLength;
|
||||
/**Length of UTF-8 serial string */
|
||||
uint32_t serialLength;
|
||||
/**Length of UTF-8 software name string */
|
||||
uint32_t softwareNameLength;
|
||||
/**Length of UTF-8 software version string */
|
||||
uint32_t softwareVersionLength;
|
||||
/**Length of UTF-8 software operating system string */
|
||||
uint32_t softwareOperatingSystemLength;
|
||||
/**How many extents are after the strings */
|
||||
uint32_t extents;
|
||||
/** \struct DumpHardwareEntry
|
||||
* \brief Per-environment length table describing subsequent UTF-8 strings and optional extent array.
|
||||
*
|
||||
* Immediately after this structure the variable-length UTF‑8 strings appear in the documented order, each
|
||||
* present only if its corresponding length is non-zero. No padding is present between strings. When all
|
||||
* strings are consumed, an array of \ref DumpExtent follows if \ref extents > 0.
|
||||
*
|
||||
* All length fields measure bytes (not characters) and exclude any in-memory NUL terminator added by the reader.
|
||||
*
|
||||
* Typical semantics:
|
||||
* - manufacturer/model/revision/firmware/serial identify the hardware device.
|
||||
* - softwareName/softwareVersion/softwareOperatingSystem identify the acquisition software environment.
|
||||
* - extents list which logical ranges this environment actually dumped (useful for multi-device composites).
|
||||
*/
|
||||
typedef struct DumpHardwareEntry
|
||||
{
|
||||
uint32_t manufacturerLength; ///< Length in bytes of manufacturer UTF-8 string.
|
||||
uint32_t modelLength; ///< Length in bytes of model UTF-8 string.
|
||||
uint32_t revisionLength; ///< Length in bytes of revision / hardware revision string.
|
||||
uint32_t firmwareLength; ///< Length in bytes of firmware version string.
|
||||
uint32_t serialLength; ///< Length in bytes of device serial number string.
|
||||
uint32_t softwareNameLength; ///< Length in bytes of dumping software name string.
|
||||
uint32_t softwareVersionLength; ///< Length in bytes of dumping software version string.
|
||||
uint32_t softwareOperatingSystemLength; ///< Length in bytes of host operating system string.
|
||||
uint32_t extents; ///< Number of DumpExtent records following the strings (0 = none).
|
||||
} DumpHardwareEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_DUMP_H
|
||||
#endif // LIBAARUFORMAT_DUMP_H
|
||||
|
||||
@@ -19,73 +19,111 @@
|
||||
#ifndef LIBAARUFORMAT_HEADER_H
|
||||
#define LIBAARUFORMAT_HEADER_H
|
||||
|
||||
#define AARU_HEADER_APP_NAME_LEN 64
|
||||
#define GUID_SIZE 16
|
||||
/** \file aaruformat/structs/header.h
|
||||
* \brief On-disk container header structures (v1 and v2) for Aaru images.
|
||||
*
|
||||
* These packed headers appear at the very beginning (offset 0) of every Aaru image file and
|
||||
* advertise container format version, creator application, indexing offset and optional extended
|
||||
* feature capability bitfields (v2+). All multi-byte integers are little-endian. Strings stored
|
||||
* in the fixed-size application field are UTF‑16LE and zero padded (not necessarily NUL-terminated
|
||||
* if fully filled). The GUID field (v2) allows derivative / child images to reference an origin.
|
||||
*
|
||||
* Version progression:
|
||||
* - v1: \ref AaruHeader (no GUID, no alignment or shift metadata, no feature bitfields).
|
||||
* - v2: \ref AaruHeaderV2 introduces GUID, block/data/table shift hints (mirroring DDT metadata),
|
||||
* and three 64‑bit feature bitmaps to negotiate reader/writer compatibility.
|
||||
*
|
||||
* Compatibility handling (recommended logic for consumers):
|
||||
* 1. If any bit set in featureIncompatible is not implemented by the reader: abort (cannot safely read/write).
|
||||
* 2. Else if any bit set in featureCompatibleRo is not implemented: allow read‑only operations.
|
||||
* 3. Bits only present in featureCompatible but not implemented MAY be ignored for both read/write while
|
||||
* still preserving round‑trip capability (writer should not clear unknown bits when re‑saving).
|
||||
*
|
||||
* Alignment & shift semantics (duplicated here for quick reference, see DdtHeader2 for full details):
|
||||
* - blockAlignmentShift: underlying blocks are aligned to 2^blockAlignmentShift bytes.
|
||||
* - dataShift: data pointer / DDT entry low bits encode offsets modulo 2^dataShift sectors/items.
|
||||
* - tableShift: primary DDT entries span 2^tableShift logical sectors (0 implies single-level tables).
|
||||
*
|
||||
* Invariants:
|
||||
* - identifier == AARU_MAGIC (external constant; not defined here).
|
||||
* - For v1: sizeof(AaruHeader) exact and indexOffset > 0 (indexOffset == 0 => corrupt/unreadable image).
|
||||
* - For v2: sizeof(AaruHeaderV2) exact; indexOffset > 0; blockAlignmentShift, dataShift, tableShift within
|
||||
* sane bounds (e.g. < 63). Zero is permissible only for the shift fields (not for indexOffset).
|
||||
*
|
||||
* Security / robustness considerations:
|
||||
* - Always bounds-check indexOffset against file size before seeking.
|
||||
* - Treat application field as untrusted UTF‑16LE; validate surrogate pairs if necessary.
|
||||
* - Unknown feature bits MUST be preserved if a file is rewritten to avoid capability loss.
|
||||
*/
|
||||
|
||||
#define AARU_HEADER_APP_NAME_LEN 64 /**< Size in bytes (UTF-16LE) of application name field (32 UTF-16 code units). */
|
||||
#define GUID_SIZE 16 /**< Size in bytes of GUID / UUID-like binary identifier. */
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Header, at start of file */
|
||||
typedef struct AaruHeader {
|
||||
/**Header identifier, <see cref="AARU_MAGIC" /> */
|
||||
uint64_t identifier;
|
||||
/**UTF-16LE name of the application that created the image */
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN];
|
||||
/**Image format major version. A new major version means a possibly incompatible change of format */
|
||||
uint8_t imageMajorVersion;
|
||||
/**Image format minor version. A new minor version indicates a compatible change of format */
|
||||
uint8_t imageMinorVersion;
|
||||
/**Major version of the application that created the image */
|
||||
uint8_t applicationMajorVersion;
|
||||
/**Minor version of the application that created the image */
|
||||
uint8_t applicationMinorVersion;
|
||||
/**Type of media contained on image */
|
||||
uint32_t mediaType;
|
||||
/**Offset to index */
|
||||
uint64_t indexOffset;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
|
||||
int64_t creationTime;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
|
||||
int64_t lastWrittenTime;
|
||||
/** \struct AaruHeader
|
||||
* \brief Version 1 container header placed at offset 0 for legacy / initial format.
|
||||
*
|
||||
* Field summary:
|
||||
* - identifier: magic signature (AARU_MAGIC) identifying the container.
|
||||
* - application: UTF‑16LE creator application name (fixed 64 bytes, zero padded).
|
||||
* - imageMajorVersion / imageMinorVersion: container format version of the file itself (not the app).
|
||||
* - applicationMajorVersion / applicationMinorVersion: version of the creating application.
|
||||
* - mediaType: media type enumeration (\ref MediaType).
|
||||
* - indexOffset: byte offset to the first index block (must be > 0).
|
||||
* - creationTime / lastWrittenTime: 64-bit Windows FILETIME timestamps (100 ns intervals since 1601-01-01 UTC).
|
||||
*/
|
||||
typedef struct AaruHeader
|
||||
{
|
||||
uint64_t identifier; ///< File magic (AARU_MAGIC).
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN]; ///< UTF-16LE creator application name (fixed-size buffer).
|
||||
uint8_t imageMajorVersion; ///< Container format major version (incompatible changes when incremented).
|
||||
uint8_t imageMinorVersion; ///< Container format minor version (backward compatible evolutions).
|
||||
uint8_t applicationMajorVersion; ///< Creator application major version.
|
||||
uint8_t applicationMinorVersion; ///< Creator application minor / patch version.
|
||||
uint32_t mediaType; ///< Media type enumeration (value from \ref MediaType).
|
||||
uint64_t indexOffset; ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
|
||||
int64_t creationTime; ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
|
||||
int64_t lastWrittenTime; ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
|
||||
} AaruHeader;
|
||||
|
||||
/**Header, at start of file */
|
||||
typedef struct AaruHeaderV2 {
|
||||
/**Header identifier, see AARU_MAGIC */
|
||||
uint64_t identifier;
|
||||
/**UTF-16LE name of the application that created the image */
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN];
|
||||
/**Image format major version. A new major version means a possibly incompatible change of format */
|
||||
uint8_t imageMajorVersion;
|
||||
/**Image format minor version. A new minor version indicates a compatible change of format */
|
||||
uint8_t imageMinorVersion;
|
||||
/**Major version of the application that created the image */
|
||||
uint8_t applicationMajorVersion;
|
||||
/**Minor version of the application that created the image */
|
||||
uint8_t applicationMinorVersion;
|
||||
/**Type of media contained on image */
|
||||
uint32_t mediaType;
|
||||
/**Offset to index */
|
||||
uint64_t indexOffset;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
|
||||
int64_t creationTime;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
|
||||
int64_t lastWrittenTime;
|
||||
/**Unique identifier that allows children images to recognize and find this image.*/
|
||||
uint8_t guid[GUID_SIZE];
|
||||
/**Block alignment shift. All blocks in the image are aligned at 2 << blockAlignmentShift bytes */
|
||||
uint8_t blockAlignmentShift;
|
||||
/**Data shift. All data blocks in the image contain 2 << dataShift items at most */
|
||||
uint8_t dataShift;
|
||||
/**Table shift. All deduplication tables in the image use this shift to calculate the position of an item */
|
||||
uint8_t tableShift;
|
||||
/**Features used in this image that if unsupported are still compatible for reading and writing implementations */
|
||||
uint64_t featureCompatible;
|
||||
/**Features used in this image that if unsupported are still compatible for reading implementations but not for writing */
|
||||
uint64_t featureCompatibleRo;
|
||||
/**Featured used in this image that if unsupported prevent reading or writing the image*/
|
||||
uint64_t featureIncompatible;
|
||||
/** \struct AaruHeaderV2
|
||||
* \brief Version 2 container header with GUID, alignment shifts, and feature negotiation bitmaps.
|
||||
*
|
||||
* Additions over v1:
|
||||
* - guid: stable 128-bit identifier enabling linkage by derivative images.
|
||||
* - blockAlignmentShift / dataShift / tableShift: global structural hints copied into data & DDT blocks.
|
||||
* - featureCompatible / featureCompatibleRo / featureIncompatible: capability bitmasks.
|
||||
*
|
||||
* Feature bitmask semantics:
|
||||
* - featureCompatible: Optional features; absence of implementation should not impact R/W correctness.
|
||||
* - featureCompatibleRo: If unimplemented, image MAY be opened read-only.
|
||||
* - featureIncompatible: If any bit unimplemented, image MUST NOT be opened (prevent misinterpretation).
|
||||
*
|
||||
* Readers should AND their supported bit set with the header masks to decide access level (see file
|
||||
* documentation). Writers must preserve unknown bits when saving an existing image.
|
||||
*/
|
||||
typedef struct AaruHeaderV2
|
||||
{
|
||||
uint64_t identifier; ///< File magic (AARU_MAGIC).
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN]; ///< UTF-16LE creator application name (fixed 64 bytes).
|
||||
uint8_t imageMajorVersion; ///< Container format major version.
|
||||
uint8_t imageMinorVersion; ///< Container format minor version.
|
||||
uint8_t applicationMajorVersion; ///< Creator application major version.
|
||||
uint8_t applicationMinorVersion; ///< Creator application minor / patch version.
|
||||
uint32_t mediaType; ///< Media type enumeration (value from \ref MediaType).
|
||||
uint64_t indexOffset; ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
|
||||
int64_t creationTime; ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
|
||||
int64_t lastWrittenTime; ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
|
||||
uint8_t guid[GUID_SIZE]; ///< 128-bit image GUID (binary, not text); stable across children.
|
||||
uint8_t blockAlignmentShift; ///< log2 block alignment (block size alignment = 2^blockAlignmentShift bytes).
|
||||
uint8_t dataShift; ///< log2 sectors/items per block-index increment in DDT entries (2^dataShift).
|
||||
uint8_t tableShift; ///< log2 sectors spanned by each primary DDT entry (0 = single-level).
|
||||
uint64_t featureCompatible; ///< Feature bits: unimplemented bits are ignorable (still R/W safe).
|
||||
uint64_t featureCompatibleRo; ///< Feature bits: unimplemented -> degrade to read-only access.
|
||||
uint64_t featureIncompatible; ///< Feature bits: any unimplemented -> abort (cannot open safely).
|
||||
} AaruHeaderV2;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_HEADER_H
|
||||
#endif // LIBAARUFORMAT_HEADER_H
|
||||
|
||||
@@ -21,50 +21,95 @@
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Header for the index, followed by entries */
|
||||
/** \file aaruformat/structs/index.h
|
||||
* \brief On‑disk index block header and entry structures (versions 1, 2 and 3).
|
||||
*
|
||||
* The index provides a directory of all blocks contained in an Aaru image. Each index block starts with
|
||||
* a versioned header (IndexHeader / IndexHeader2 / IndexHeader3) followed by a contiguous array of
|
||||
* fixed‑size \ref IndexEntry records. Version 3 adds support for hierarchical (chained / nested) subindexes.
|
||||
*
|
||||
* Version mapping by block identifier (see \ref BlockType):
|
||||
* - IndexBlock (v1) -> \ref IndexHeader followed by 16‑bit entry count entries.
|
||||
* - IndexBlock2 (v2) -> \ref IndexHeader2 followed by 64‑bit entry count entries.
|
||||
* - IndexBlock3 (v3) -> \ref IndexHeader3 with optional hierarchical subindex references.
|
||||
*
|
||||
* CRC coverage & endianness:
|
||||
* - The crc64 field stores a CRC64-ECMA over the entries array ONLY (header bytes are excluded).
|
||||
* - For images with imageMajorVersion <= AARUF_VERSION_V1 a legacy writer byte-swapped the CRC; readers
|
||||
* compensate (see verify_index_v1/v2/v3). The value in the header remains whatever was originally written.
|
||||
*
|
||||
* Hierarchical (v3) behavior:
|
||||
* - Entries whose blockType == IndexBlock3 refer to subindex blocks; readers recursively load and flatten.
|
||||
* - IndexHeader3::previous can point to a preceding index segment (for append / incremental scenarios) or 0.
|
||||
* - CRC of the main index does NOT cover subindex contents; each subindex has its own header + CRC.
|
||||
*
|
||||
* Invariants / validation recommendations:
|
||||
* - identifier must equal the expected BlockType variant for that version.
|
||||
* - entries > 0 implies the entries array byte size == entries * sizeof(IndexEntry).
|
||||
* - crc64 must match recomputed CRC64( entries array ) (after legacy byte swap handling if required).
|
||||
* - For v3, if previous != 0 it should point to another IndexBlock3 header (optional best‑effort check).
|
||||
*
|
||||
* Notes:
|
||||
* - Structures are packed (1‑byte alignment). All multi-byte integers are little‑endian on disk.
|
||||
* - The index does not store per-entry CRC; integrity relies on each individual block's own CRC plus the index CRC.
|
||||
* - dataType in \ref IndexEntry is meaningful only for block types that carry typed data (e.g. DataBlock,
|
||||
* DumpHardwareBlock, etc.).
|
||||
*
|
||||
* See also: verify_index_v1(), verify_index_v2(), verify_index_v3() for integrity procedures.
|
||||
*/
|
||||
|
||||
/** \struct IndexHeader
|
||||
* \brief Index header (version 1) for legacy images (identifier == IndexBlock).
|
||||
*
|
||||
* Uses a 16‑bit entry counter limiting the number of indexable blocks in v1.
|
||||
*/
|
||||
typedef struct IndexHeader
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.Index" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint16_t entries;
|
||||
/**CRC64-ECMA of the index */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock).
|
||||
uint16_t entries; ///< Number of \ref IndexEntry records that follow immediately.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the entries array (legacy byte-swapped for early images).
|
||||
} IndexHeader;
|
||||
|
||||
/**Header for the index, followed by entries */
|
||||
/** \struct IndexHeader2
|
||||
* \brief Index header (version 2) with 64‑bit entry counter (identifier == IndexBlock2).
|
||||
*
|
||||
* Enlarges the entry count field to 64 bits for large images; otherwise structurally identical to v1.
|
||||
*/
|
||||
typedef struct IndexHeader2
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.Index" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint64_t entries;
|
||||
/**CRC64-ECMA of the index */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock2).
|
||||
uint64_t entries; ///< Number of \ref IndexEntry records that follow immediately.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the entries array (legacy byte-swapped rule still applies for old versions).
|
||||
} IndexHeader2;
|
||||
|
||||
/**Header for the index, followed by entries */
|
||||
/** \struct IndexHeader3
|
||||
* \brief Index header (version 3) adding hierarchical chaining (identifier == IndexBlock3).
|
||||
*
|
||||
* Supports flattened hierarchical indexes: entries referencing additional IndexBlock3 subindexes.
|
||||
* The 'previous' pointer allows chaining earlier index segments (e.g., incremental append) enabling
|
||||
* cumulative discovery without rewriting earlier headers.
|
||||
*/
|
||||
typedef struct IndexHeader3
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.Index" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint64_t entries;
|
||||
/**CRC64-ECMA of the index */
|
||||
uint64_t crc64;
|
||||
/**Pointer to the previous index header */
|
||||
uint64_t previous;
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock3).
|
||||
uint64_t entries; ///< Number of \ref IndexEntry records that follow in this (sub)index block.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the local entries array (does NOT cover subindexes or previous chains).
|
||||
uint64_t previous; ///< File offset of a previous IndexBlock3 header (0 if none / root segment).
|
||||
} IndexHeader3;
|
||||
|
||||
/**Index entry */
|
||||
/** \struct IndexEntry
|
||||
* \brief Single index entry describing a block's type, (optional) data classification, and file offset.
|
||||
*
|
||||
* Semantics by blockType (see \ref BlockType):
|
||||
* - DataBlock / GeometryBlock / ChecksumBlock / etc.: dataType conveys specific stored data category (\ref DataType).
|
||||
* - Deduplication (DDT) or Index blocks: dataType may be ignored or set to a sentinel.
|
||||
* - IndexBlock3: this entry refers to a subindex; offset points to another IndexHeader3.
|
||||
*/
|
||||
typedef struct IndexEntry
|
||||
{
|
||||
/**Type of item pointed by this entry */
|
||||
uint32_t blockType;
|
||||
/**Type of data contained by the block pointed by this entry */
|
||||
uint16_t dataType;
|
||||
/**Offset in file where item is stored */
|
||||
uint64_t offset;
|
||||
uint32_t blockType; ///< Block identifier of the referenced block (value from \ref BlockType).
|
||||
uint16_t dataType; ///< Data classification (value from \ref DataType) or unused for untyped blocks.
|
||||
uint64_t offset; ///< Absolute byte offset in the image where the referenced block header begins.
|
||||
} IndexEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -21,73 +21,95 @@
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Metadata block, contains metadata */
|
||||
typedef struct MetadataBlockHeader {
|
||||
/**Identifier, <see cref="BlockType.MetadataBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**Size in uint8_ts of this whole metadata block */
|
||||
uint32_t blockSize;
|
||||
/**Sequence of media set this media belongs to */
|
||||
int32_t mediaSequence;
|
||||
/**Total number of media on the media set this media belongs to */
|
||||
int32_t lastMediaSequence;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t creatorOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t creatorLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t commentsOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t commentsLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaTitleOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaTitleLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaManufacturerOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaManufacturerLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaModelOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaModelLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaSerialNumberOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaSerialNumberLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaBarcodeOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaBarcodeLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaPartNumberOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaPartNumberLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveManufacturerOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveManufacturerLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveModelOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveModelLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveSerialNumberOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveSerialNumberLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveFirmwareRevisionOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveFirmwareRevisionLength;
|
||||
/** \file aaruformat/structs/metadata.h
|
||||
* \brief Packed on-disk metadata block headers for descriptive strings and CICM XML (if present).
|
||||
*
|
||||
* Two metadata-related block header layouts are defined:
|
||||
* - \ref MetadataBlockHeader (BlockType::MetadataBlock): offsets + lengths for several UTF-16LE strings.
|
||||
* - \ref CicmMetadataBlock (BlockType::CicmBlock): length of embedded CICM XML metadata payload.
|
||||
*
|
||||
* All multi-byte integers are little-endian. Structures are packed (1-byte alignment). All textual fields
|
||||
* referenced by offsets are UTF-16LE, null-terminated (0x0000). Length fields include the terminating
|
||||
* null (i.e. length >= 2 and an even number). Offsets are relative to the start of the corresponding block
|
||||
* header (byte 0 = first byte of the header). No padding is implicitly added between strings; producers
|
||||
* may pack them tightly or align them manually (alignment not required by the specification).
|
||||
*
|
||||
* Metadata block layout (conceptual):
|
||||
* MetadataBlockHeader (fixed size)
|
||||
* <variable region holding each present UTF-16LE string in any order chosen by the writer>
|
||||
*
|
||||
* Invariants / validation recommendations for MetadataBlockHeader:
|
||||
* - identifier == BlockType::MetadataBlock
|
||||
* - blockSize >= sizeof(MetadataBlockHeader)
|
||||
* - For every (offset,length) pair where length > 0:
|
||||
* * offset >= sizeof(MetadataBlockHeader)
|
||||
* * offset + length <= blockSize
|
||||
* * length % 2 == 0
|
||||
* * The 16-bit code unit at (offset + length - 2) == 0x0000 (null terminator)
|
||||
* - mediaSequence >= 0 and lastMediaSequence >= 0; if lastMediaSequence > 0 then 0 <= mediaSequence <
|
||||
* lastMediaSequence
|
||||
*
|
||||
* CICM metadata block layout:
|
||||
* CicmMetadataBlock (header)
|
||||
* <length bytes of UTF-8 or XML text payload (implementation-defined, not null-terminated)>
|
||||
*
|
||||
* NOTE: The library code reading these blocks must not assume strings are present; a zero length means the
|
||||
* corresponding field is omitted. Offsets for omitted fields MAY be zero or arbitrary; readers should skip them
|
||||
* whenever length == 0.
|
||||
*/
|
||||
|
||||
/** \struct MetadataBlockHeader
|
||||
* \brief Header for a metadata block containing offsets and lengths to UTF-16LE descriptive strings.
|
||||
*
|
||||
* Descriptive fields (all optional): creator, comments, media title/manufacturer/model/serial/barcode/part number,
|
||||
* drive manufacturer/model/serial/firmware revision. Strings can be used to describe both physical medium and
|
||||
* acquisition hardware. Length values include the UTF-16LE null terminator (two zero bytes).
|
||||
*/
|
||||
typedef struct MetadataBlockHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::MetadataBlock.
|
||||
uint32_t blockSize; ///< Total size in bytes of the entire metadata block (header + strings).
|
||||
int32_t mediaSequence; ///< Sequence number within a multi-disc / multi-volume set (0-based or 1-based as
|
||||
///< producer defines).
|
||||
int32_t lastMediaSequence; ///< Total number of media in the set; 0 or 1 if single item.
|
||||
uint32_t creatorOffset; ///< Offset to UTF-16LE creator string (or undefined if creatorLength==0).
|
||||
uint32_t creatorLength; ///< Length in bytes (including null) of creator string (0 if absent).
|
||||
uint32_t commentsOffset; ///< Offset to UTF-16LE comments string.
|
||||
uint32_t commentsLength; ///< Length in bytes (including null) of comments string.
|
||||
uint32_t mediaTitleOffset; ///< Offset to UTF-16LE media title string.
|
||||
uint32_t mediaTitleLength; ///< Length in bytes (including null) of media title string.
|
||||
uint32_t mediaManufacturerOffset; ///< Offset to UTF-16LE media manufacturer string.
|
||||
uint32_t mediaManufacturerLength; ///< Length in bytes (including null) of media manufacturer string.
|
||||
uint32_t mediaModelOffset; ///< Offset to UTF-16LE media model string.
|
||||
uint32_t mediaModelLength; ///< Length in bytes (including null) of media model string.
|
||||
uint32_t mediaSerialNumberOffset; ///< Offset to UTF-16LE media serial number string.
|
||||
uint32_t mediaSerialNumberLength; ///< Length in bytes (including null) of media serial number string.
|
||||
uint32_t mediaBarcodeOffset; ///< Offset to UTF-16LE media barcode string.
|
||||
uint32_t mediaBarcodeLength; ///< Length in bytes (including null) of media barcode string.
|
||||
uint32_t mediaPartNumberOffset; ///< Offset to UTF-16LE media part number string.
|
||||
uint32_t mediaPartNumberLength; ///< Length in bytes (including null) of media part number string.
|
||||
uint32_t driveManufacturerOffset; ///< Offset to UTF-16LE drive manufacturer string.
|
||||
uint32_t driveManufacturerLength; ///< Length in bytes (including null) of drive manufacturer string.
|
||||
uint32_t driveModelOffset; ///< Offset to UTF-16LE drive model string.
|
||||
uint32_t driveModelLength; ///< Length in bytes (including null) of drive model string.
|
||||
uint32_t driveSerialNumberOffset; ///< Offset to UTF-16LE drive serial number string.
|
||||
uint32_t driveSerialNumberLength; ///< Length in bytes (including null) of drive serial number string.
|
||||
uint32_t driveFirmwareRevisionOffset; ///< Offset to UTF-16LE drive firmware revision string.
|
||||
uint32_t driveFirmwareRevisionLength; ///< Length in bytes (including null) of drive firmware revision string.
|
||||
} MetadataBlockHeader;
|
||||
|
||||
/**Geometry block, contains physical geometry information */
|
||||
typedef struct CicmMetadataBlock {
|
||||
/**Identifier, <see cref="BlockType.CicmBlock" /> */
|
||||
uint32_t identifier;
|
||||
uint32_t length;
|
||||
/** \struct CicmMetadataBlock
|
||||
* \brief Header for a CICM XML metadata block (identifier == BlockType::CicmBlock).
|
||||
*
|
||||
* The following 'length' bytes immediately after the header contain the CICM XML payload. Encoding is typically
|
||||
* UTF-8; the payload is not required to be null-terminated.
|
||||
*/
|
||||
typedef struct CicmMetadataBlock
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::CicmBlock.
|
||||
uint32_t length; ///< Length in bytes of the CICM metadata payload that follows.
|
||||
} CicmMetadataBlock;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_METADATA_H
|
||||
#endif // LIBAARUFORMAT_METADATA_H
|
||||
|
||||
@@ -21,36 +21,65 @@
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Contains list of optical disc tracks */
|
||||
typedef struct TracksHeader {
|
||||
/**Identifier, <see cref="BlockType.TracksBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint16_t entries;
|
||||
/**CRC64-ECMA of the block */
|
||||
uint64_t crc64;
|
||||
/** \file aaruformat/structs/optical.h
|
||||
* \brief On-disk structures describing optical disc tracks (Track list block).
|
||||
*
|
||||
* An optical tracks block (identifier == BlockType::TracksBlock) stores a list of \ref TrackEntry
|
||||
* records describing the logical layout of tracks and sessions for CD/DVD/BD and similar media.
|
||||
*
|
||||
* Layout:
|
||||
* TracksHeader (fixed)
|
||||
* TrackEntry[ entries ] (array, packed)
|
||||
*
|
||||
* CRC semantics:
|
||||
* - TracksHeader::crc64 is a CRC64-ECMA over the contiguous TrackEntry array ONLY (header excluded).
|
||||
* - For legacy images (imageMajorVersion <= AARUF_VERSION_V1) a byte swap is applied when verifying.
|
||||
*
|
||||
* Field semantics (TrackEntry):
|
||||
* - sequence: Logical track number (1..99 typical for CD). Values outside that range may encode extras.
|
||||
* - type: Value from \ref TrackType (Audio, Data, Mode variants, etc.).
|
||||
* - start / end: Inclusive Logical Block Address (LBA) bounds for the track. end >= start.
|
||||
* - pregap: Number of sectors of pre-gap *preceding* the track's first user-accessible sector (can be 0 or negative
|
||||
* if representing lead-in semantics; negative interpretation is implementation-defined).
|
||||
* - session: Session number starting at 1 for multi-session discs (1 for single session).
|
||||
* - isrc: 13-byte ISRC (raw code, no terminating null). If fewer significant characters, remaining bytes are 0.
|
||||
* - flags: Bitmask of track/control flags. Unless otherwise specified, recommended mapping (mirrors CD subchannel Q
|
||||
* control bits) is: bit0 Pre-emphasis, bit1 Copy permitted, bit2 Data track, bit3 Four-channel audio,
|
||||
* bits4-7 reserved. Actual semantics may be extended by the format specification.
|
||||
*
|
||||
* Invariants / validation recommendations:
|
||||
* - identifier == BlockType::TracksBlock
|
||||
* - entries * sizeof(TrackEntry) bytes are present after the header in the block image.
|
||||
* - 1 <= sequence <= 99 for standard CD tracks (non-conforming values allowed but should be documented).
|
||||
* - start <= end; pregap >= 0 (if negative pregaps unsupported in implementation).
|
||||
* - ISRC bytes either all zero (no ISRC) or printable ASCII (A-Z 0-9 -) per ISO 3901 (without hyphen formatting).
|
||||
*/
|
||||
|
||||
/** \struct TracksHeader
|
||||
* \brief Header for an optical tracks block listing track entries.
|
||||
*/
|
||||
typedef struct TracksHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::TracksBlock).
|
||||
uint16_t entries; ///< Number of TrackEntry records following this header.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the TrackEntry array (header excluded, legacy byte-swap for early versions).
|
||||
} TracksHeader;
|
||||
|
||||
/**Optical disc track */
|
||||
typedef struct TrackEntry {
|
||||
/**Track sequence */
|
||||
uint8_t sequence;
|
||||
/**Track type */
|
||||
uint8_t type;
|
||||
/**Track starting LBA */
|
||||
int64_t start;
|
||||
/**Track last LBA */
|
||||
int64_t end;
|
||||
/**Track pregap in sectors */
|
||||
int64_t pregap;
|
||||
/**Track session */
|
||||
uint8_t session;
|
||||
/**Track's ISRC in ASCII */
|
||||
uint8_t isrc[13];
|
||||
/**Track flags */
|
||||
uint8_t flags;
|
||||
/** \struct TrackEntry
|
||||
* \brief Single optical disc track descriptor (sequence, type, LBAs, session, ISRC, flags).
|
||||
*/
|
||||
typedef struct TrackEntry
|
||||
{
|
||||
uint8_t sequence; ///< Track number (1..99 typical for CD audio/data). 0 may indicate placeholder/non-standard.
|
||||
uint8_t type; ///< Track type (value from \ref TrackType).
|
||||
int64_t start; ///< Inclusive starting LBA of the track.
|
||||
int64_t end; ///< Inclusive ending LBA of the track.
|
||||
int64_t pregap; ///< Pre-gap length in sectors preceding track start (0 if none).
|
||||
uint8_t session; ///< Session number (1-based). 1 for single-session discs.
|
||||
uint8_t isrc[13]; ///< ISRC raw 13-byte code (no null terminator). All zeros if not present.
|
||||
uint8_t flags; ///< Control / attribute bitfield (see file documentation for suggested bit mapping).
|
||||
} TrackEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_OPTICAL_H
|
||||
#endif // LIBAARUFORMAT_OPTICAL_H
|
||||
|
||||
@@ -19,19 +19,214 @@
|
||||
#ifndef LIBAARUFORMAT_OPTIONS_H
|
||||
#define LIBAARUFORMAT_OPTIONS_H
|
||||
|
||||
#include <stdbool.h> ///< For bool type used in aaru_options.
|
||||
#include <stdint.h> ///< For fixed-width integer types.
|
||||
|
||||
/** \file aaruformat/structs/options.h
|
||||
* \brief Image creation / open tuning options structure and related semantics.
|
||||
*
|
||||
* The library accepts a semicolon-delimited key=value options string (see parse_options()). Recognized keys:
|
||||
* compress=true|false Enable/disable block compression (LZMA for data blocks, FLAC for audio tracks).
|
||||
* deduplicate=true|false If true, identical (duplicate) sectors are stored once (DDT entries point to same
|
||||
* physical block). If false, duplicates are still tracked in DDT but each occurrence
|
||||
* is stored independently (no storage savings). DDT itself is always present.
|
||||
* dictionary=<bytes> LZMA dictionary size in bytes (fallback default 33554432 if 0 or invalid).
|
||||
* table_shift=<n> DDT v2 table shift (default 9) (items per primary entry = 2^n when multi-level).
|
||||
* data_shift=<n> Global data shift (default 12). Defines per-block address granularity: the low
|
||||
* 2^n range encodes the sector (or unit) offset within a block; higher bits combine
|
||||
* with block_alignment to derive block file offsets. Used by DDT but not limited to it.
|
||||
* block_alignment=<n> log2 alignment of underlying data blocks (default 9 => 512 bytes) (block size = 2^n).
|
||||
* md5=true|false Generate MD5 checksum (stored in checksum block if true).
|
||||
* sha1=true|false Generate SHA-1 checksum.
|
||||
* sha256=true|false Generate SHA-256 checksum.
|
||||
* blake3=true|false Generate BLAKE3 checksum (may require build-time support; ignored if unsupported).
|
||||
* spamsum=true|false Generate SpamSum fuzzy hash.
|
||||
*
|
||||
* Defaults (when option string NULL or key omitted):
|
||||
* compress=true, deduplicate=true, dictionary=33554432, table_shift=9, data_shift=12,
|
||||
* block_alignment=9, md5=false, sha1=false, sha256=false, blake3=false, spamsum=false.
|
||||
*
|
||||
* Validation / normalization done in parse_options():
|
||||
* - Zero / missing dictionary resets to default 33554432.
|
||||
* - Zero table_shift resets to 9.
|
||||
* - Zero data_shift resets to 12.
|
||||
* - Zero block_alignment resets to 9.
|
||||
*
|
||||
* Rationale:
|
||||
* - table_shift, data_shift and block_alignment mirror fields stored in on-disk headers (see AaruHeaderV2 &
|
||||
* DdtHeader2); data_shift is a global per-block granularity exponent (not DDT-specific) governing how in-block offsets
|
||||
* are encoded.
|
||||
* - compress selects adaptive codec usage: LZMA applied to generic/data blocks, FLAC applied to audio track payloads.
|
||||
* - deduplicate toggles storage optimization only: the DDT directory is always built for addressing; disabling simply
|
||||
* forces each sector's content to be written even if already present (useful for forensic byte-for-byte
|
||||
* duplication).
|
||||
* - dictionary tunes compression ratio/memory use; large values increase memory footprint.
|
||||
* - Checksums are optional; enabling multiple increases CPU time at write finalization.
|
||||
*
|
||||
* Performance / space trade-offs (deduplicate=false):
|
||||
* - Significantly larger image size: every repeated sector payload is written again.
|
||||
* - Higher write I/O and longer creation time for highly redundant sources (e.g., zero-filled regions) compared to
|
||||
* deduplicate=true, although CPU time spent on duplicate detection/hash lookups is reduced.
|
||||
* - Potentially simpler post-process forensic validation (physical ordering preserved without logical coalescing).
|
||||
* - Use when exact physical repetition is more critical than storage efficiency, or to benchmark raw device
|
||||
* throughput.
|
||||
* - For typical archival use-cases with large zero / repeated patterns, deduplicate=true markedly reduces footprint.
|
||||
*
|
||||
* Approximate in-RAM hash map usage for deduplication (deduplicate=true):
|
||||
* The on-disk DDT can span many secondary tables, but only the primary table plus a currently loaded secondary (and
|
||||
* possibly a small cache) reside in memory; their footprint is typically <<5% of total indexed media space and is
|
||||
* often negligible compared to the hash map used to detect duplicate sectors. Therefore we focus here on the hash /
|
||||
* lookup structure ("hash_map") memory, not the entire DDT on-disk size.
|
||||
*
|
||||
* Worst-case (all sectors unique) per 1 GiB of user data:
|
||||
* sectors_per_GiB = 2^30 / sector_size
|
||||
* hash_bytes ≈ sectors_per_GiB * H (H ≈ 16 bytes: 8-byte fingerprint + ~8 bytes map overhead)
|
||||
*
|
||||
* Resulting hash_map RAM per GiB (unique sectors):
|
||||
* +--------------+------------------+------------------------------+
|
||||
* | Sector size | Sectors / GiB | Hash map (~16 B / sector) |
|
||||
* +--------------+------------------+------------------------------+
|
||||
* | 512 bytes | 2,097,152 | ~33.5 MiB (≈32.0–36.0 MiB) |
|
||||
* | 2048 bytes | 524,288 | ~ 8.0 MiB (≈7.5–8.5 MiB) |
|
||||
* | 4096 bytes | 262,144 | ~ 4.0 MiB (≈3.8–4.3 MiB) |
|
||||
* +--------------+------------------+------------------------------+
|
||||
*
|
||||
* (Range reflects allocator + load factor variation.)
|
||||
*
|
||||
* Targeted projections (hash map only, R=1):
|
||||
* 2048‑byte sectors (~8 MiB per GiB unique)
|
||||
* Capacity | Hash map (MiB) | Hash map (GiB)
|
||||
* ---------+---------------+----------------
|
||||
* 25 GiB | ~200 | 0.20
|
||||
* 50 GiB | ~400 | 0.39
|
||||
*
|
||||
* 512‑byte sectors (~34 MiB per GiB unique; using 33.5 MiB for calc)
|
||||
* Capacity | Hash map (MiB) | Hash map (GiB)
|
||||
* ---------+---------------+----------------
|
||||
* 128 GiB | ~4288 | 4.19
|
||||
* 500 GiB | ~16750 | 16.36
|
||||
* 1 TiB* | ~34304 | 33.50
|
||||
* 2 TiB* | ~68608 | 67.00
|
||||
*
|
||||
* *TiB = 1024 GiB binary. For decimal TB reduce by ~7% (×0.93).
|
||||
*
|
||||
* Duplicate ratio scaling:
|
||||
* Effective hash RAM ≈ table_value * R, where R = unique_sectors / total_sectors.
|
||||
* Example: 500 GiB @512 B, R=0.4 ⇒ ~16750 MiB * 0.4 ≈ 6700 MiB (~6.54 GiB).
|
||||
*
|
||||
* Quick rule of thumb (hash only):
|
||||
* hash_bytes_per_GiB ≈ 16 * (2^30 / sector_size) ≈ (17.1799e9 / sector_size) bytes
|
||||
* → ≈ 33.6 MiB (512 B), 8.4 MiB (2048 B), 4.2 MiB (4096 B) per GiB unique.
|
||||
*
|
||||
* Memory planning tip:
|
||||
* If projected hash_map usage risks exceeding available RAM, consider:
|
||||
* - Increasing table_shift (reduces simultaneous secondary loads / contention)
|
||||
* - Lowering data_shift (if practical) to encourage earlier big DDT adoption with fewer unique blocks
|
||||
* - Segmenting the dump into phases (if workflow permits)
|
||||
* - Accepting higher duplicate ratio by pre-zero detection or sparse treatment externally.
|
||||
* - Resuming the dump in multiple passes: each resume rebuilds the hash_map from scratch, so peak RAM still
|
||||
* matches a single-pass estimate, but average RAM over total wall time can drop if you unload between passes.
|
||||
*
|
||||
* NOTE: DDT in-RAM portion (primary + one secondary) usually adds only a few additional MiB even for very large
|
||||
* images, hence omitted from sizing tables. Include +5% safety margin if extremely tight on memory.
|
||||
*
|
||||
* Guidance for table_shift / data_shift selection:
|
||||
* Let:
|
||||
* S = total logical sectors expected in image (estimate if unknown).
|
||||
* T = table_shift (items per primary DDT entry = 2^T when multi-level; 0 => single-level).
|
||||
* D = data_shift (in-block sector offset span = 2^D).
|
||||
* BA = block_alignment (bytes) = 2^block_alignment.
|
||||
* SS = sector size (bytes).
|
||||
*
|
||||
* 1. data_shift constraints:
|
||||
* - For SMALL DDT entries (12 payload bits after status): D must satisfy 0 < D < 12 and (12 - D) >= 1 so that at
|
||||
* least one bit remains for block index. Practical range for small DDT: 6..10 (leaves 2+ bits for block index).
|
||||
* - For BIG DDT entries (28 payload bits after status): D may be larger (up to 27) but values >16 rarely useful.
|
||||
* - Effective address granularity inside a block = min(2^D * SS, physical block span implied by BA).
|
||||
* - Choosing D too large wastes bits (larger offset range than block actually contains) and reduces the number of
|
||||
* block index bits within a small entry, potentially forcing upgrade to big DDT earlier.
|
||||
*
|
||||
* Recommended starting points:
|
||||
* * 512‑byte sectors, 512‑byte block alignment: D=9 (512 offsets) or D=8 (256 offsets) keeps small DDT viable.
|
||||
* * 2048‑byte optical sectors, 2048‑byte alignment: D=8 (256 offsets) typically sufficient.
|
||||
* * Mixed / large logical block sizes: keep D so that (2^D * SS) ≈ typical dedup block region you want
|
||||
* addressable.
|
||||
*
|
||||
* 2. block capacity within an entry:
|
||||
* - SMALL DDT: usable block index bits = 12 - D.
|
||||
* Max representable block index (small) = 2^(12-D) - 1.
|
||||
* - BIG DDT: usable block index bits = 28 - D.
|
||||
* Max representable block index (big) = 2^(28-D) - 1.
|
||||
* - If (requiredBlockIndex > max) you must either reduce D or rely on big DDT.
|
||||
*
|
||||
* Approximate requiredBlockIndex ≈ (TotalUniqueBlocks) where
|
||||
* TotalUniqueBlocks ≈ (S * SS) / (BA * (2^D * SS / (SS))) = S / (2^D * (BA / SS))
|
||||
* Simplified (assuming BA = SS): TotalUniqueBlocks ≈ S / 2^D.
|
||||
*
|
||||
* 3. table_shift considerations (multi-level DDT):
|
||||
* - Primary entries count ≈ ceil(S / 2^T). Choose T so this count fits memory and keeps lookup fast.
|
||||
* - Larger T reduces primary table size, increasing secondary table dereferences.
|
||||
* - Typical balanced values: T in [8..12] (256..4096 sectors per primary entry).
|
||||
* - Set T=0 for single-level when S is small enough that all entries fit comfortably in memory.
|
||||
*
|
||||
* Memory rough estimate for single-level SMALL DDT:
|
||||
* bytes ≈ S * 2 (each small entry 2 bytes). For BIG DDT: bytes ≈ S * 4.
|
||||
* Multi-level: primary table bytes ≈ (S / 2^T) * entrySize + sum(secondary tables).
|
||||
*
|
||||
* 4. Example scenarios:
|
||||
* - 50M sectors (≈25 GiB @512B), want small DDT: pick D=8 (256); block index bits=4 (max 16 blocks) insufficient.
|
||||
* Need either D=6 (1024 block indices) or accept BIG DDT (28-8=20 bits => million+ blocks). So prefer BIG DDT
|
||||
* here.
|
||||
* - 2M sectors, 2048B alignment, optical: D=8 gives S/2^D ≈ 7812 unique offsets; small DDT block index bits=4 (max
|
||||
* 16) inadequate → choose D=6 (offset span 64 sectors) giving 6 block index bits (max 64) or just use big DDT.
|
||||
*
|
||||
* 5. Practical recommendations:
|
||||
* - If unsure and image > ~1M sectors: keep defaults (data_shift=12, table_shift=9) and allow big DDT.
|
||||
* - For small archival (<100k sectors): T=0 (single-level), D≈8..10 to keep small DDT feasible.
|
||||
* - Benchmark before lowering D purely to stay in small DDT; increased secondary lookups or larger primary tables
|
||||
* can offset saved space.
|
||||
*
|
||||
* Recommended presets (approximate bands):
|
||||
* +----------------------+----------------------+---------------------------+-------------------------------+
|
||||
* | Total logical sectors | table_shift (T) | data_shift (D) | Notes |
|
||||
* +----------------------+----------------------+---------------------------+-------------------------------+
|
||||
* | < 50,000 | 0 | 8 – 10 | Single-level small DDT likely |
|
||||
* | 50K – 1,000,000 | 8 – 9 | 9 – 10 | Still feasible small DDT |
|
||||
* | 1M – 10,000,000 | 9 – 10 | 10 – 12 | Borderline small -> big DDT |
|
||||
* | 10M – 100,000,000 | 10 – 11 | 11 – 12 | Prefer big DDT; tune T for mem|
|
||||
* | > 100,000,000 | 11 – 12 | 12 | Big DDT; higher T saves memory|
|
||||
* +----------------------+----------------------+---------------------------+-------------------------------+
|
||||
* Ranges show typical stable regions; pick the lower end of table_shift if memory is ample, higher if minimizing
|
||||
* primary table size. Always validate actual unique block count vs payload bits.
|
||||
*
|
||||
* NOTE: The library will automatically fall back to BIG DDT where needed; these settings bias structure, they do not
|
||||
* guarantee small DDT retention.
|
||||
*
|
||||
* Thread-safety: aaru_options is a plain POD struct; caller may copy freely. parse_options() returns by value.
|
||||
*
|
||||
* Future compatibility: unknown keys are ignored by current parser; consumers should preserve original option
|
||||
* strings if round-tripping is required.
|
||||
*/
|
||||
|
||||
/** \struct aaru_options
|
||||
* \brief Parsed user-specified tunables controlling compression, deduplication, hashing and DDT geometry.
|
||||
*
|
||||
* All shifts are exponents of two.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
bool compress;
|
||||
bool deduplicate;
|
||||
uint32_t dictionary;
|
||||
uint8_t table_shift;
|
||||
uint8_t data_shift;
|
||||
uint8_t block_alignment;
|
||||
bool md5;
|
||||
bool sha1;
|
||||
bool sha256;
|
||||
bool blake3;
|
||||
bool spamsum;
|
||||
bool compress; ///< Enable adaptive compression (LZMA for data blocks, FLAC for audio). Default: true.
|
||||
bool deduplicate; ///< Storage dedup flag (DDT always exists). true=share identical sector content, false=store
|
||||
///< each instance.
|
||||
uint32_t dictionary; ///< LZMA dictionary size in bytes (>= 4096 recommended). Default: 33554432 (32 MiB).
|
||||
uint8_t table_shift; ///< DDT table shift (multi-level fan-out exponent). Default: 9.
|
||||
uint8_t data_shift; ///< Global data shift: low bits encode sector offset inside a block (2^data_shift span).
|
||||
uint8_t block_alignment; ///< log2 underlying block alignment (2^n bytes). Default: 9 (512 bytes).
|
||||
bool md5; ///< Generate MD5 checksum (ChecksumAlgorithm::Md5) when finalizing image.
|
||||
bool sha1; ///< Generate SHA-1 checksum (ChecksumAlgorithm::Sha1) when finalizing image.
|
||||
bool sha256; ///< Generate SHA-256 checksum (ChecksumAlgorithm::Sha256) when finalizing image.
|
||||
bool blake3; ///< Generate BLAKE3 checksum if supported (not stored if algorithm unavailable).
|
||||
bool spamsum; ///< Generate SpamSum fuzzy hash (ChecksumAlgorithm::SpamSum) if enabled.
|
||||
} aaru_options;
|
||||
|
||||
#endif // LIBAARUFORMAT_OPTIONS_H
|
||||
|
||||
Reference in New Issue
Block a user