mirror of
https://github.com/aaru-dps/libaaruformat.git
synced 2025-12-16 19:24:40 +00:00
Enhance documentation for various structures with detailed descriptions and formatting improvements
This commit is contained in:
1296
include/aaru.h
1296
include/aaru.h
File diff suppressed because it is too large
Load Diff
@@ -24,43 +24,90 @@
|
||||
#pragma ide diagnostic ignored "OCUnusedMacroInspection"
|
||||
#endif
|
||||
|
||||
/** Magic identidier = "DICMFRMT". */
|
||||
#define DIC_MAGIC 0x544D52464D434944
|
||||
/** Magic identidier = "AARUFRMT". */
|
||||
#define AARU_MAGIC 0x544D524655524141
|
||||
/** Image format version. A change in this number indicates an incompatible change to the format that prevents older
|
||||
* implementations from reading it correctly, if at all. */
|
||||
#define AARUF_VERSION 2
|
||||
/** First version of AaruFormat, created in C#.
|
||||
* CRC64 was byte-swapped
|
||||
/** \file aaruformat/consts.h
|
||||
* \brief Core public constants and compile‑time limits for the Aaru container format implementation.
|
||||
*
|
||||
* This header exposes magic identifiers, format version selectors, resource limits, codec parameter bounds,
|
||||
* and bit masks used across libaaruformat. All values are immutable interface contracts; changing them breaks
|
||||
* backward compatibility unless a new format version is declared.
|
||||
*
|
||||
* Summary:
|
||||
* - Magic numbers (DIC_MAGIC, AARU_MAGIC) identify container families (legacy DiscImageChef vs AaruFormat).
|
||||
* - Version macros distinguish format generations (V1 C# / legacy CRC endianness, V2 current C implementation).
|
||||
* - Cache and table size limits provide protective upper bounds against runaway memory consumption.
|
||||
* - Audio constants (SAMPLES_PER_SECTOR, MIN/MAX_FLAKE_BLOCK) align with Red Book (CD‑DA) and FLAC encoding best
|
||||
* practices.
|
||||
* - CD_* masks assist with extracting flags / positional subfields in deduplicated Compact Disc sector tables.
|
||||
* - CRC64 constants implement ECMA‑182 polynomial and standard seed, enabling deterministic end‑to‑end block
|
||||
* integrity.
|
||||
*
|
||||
* Notes:
|
||||
* - Magic values are stored little‑endian on disk when written as 64‑bit integers; when inspecting raw bytes make
|
||||
* sure to account for host endianness.
|
||||
* - AARUF_VERSION must be incremented only when an incompatible on‑disk layout change is introduced.
|
||||
* - MAX_DDT_ENTRY_CACHE is a soft upper bound sized to balance deduplication hit rate vs RAM; tune in future builds
|
||||
* via configuration if adaptive heuristics are introduced.
|
||||
* - The LZMA properties length (5) derives from the standard LZMA header (lc/lp/pb + dict size) and is constant for
|
||||
* raw LZMA streams used here.
|
||||
* - FLAC sample block guidance: empirical evaluation shows >4608 samples per block does not yield meaningful ratio
|
||||
* gains for typical optical audio captures while increasing decode buffer size.
|
||||
*
|
||||
* Thread safety: All macros are compile‑time constants; no synchronization required.
|
||||
* Portability: Constants chosen to fit within 64‑bit targets; arithmetic assumes two's complement.
|
||||
*/
|
||||
#define AARUF_VERSION_V1 1
|
||||
/** Second version of AaruFormat, created in C.
|
||||
* Introduced new header, many new features, and blocks.
|
||||
*/
|
||||
#define AARUF_VERSION_V2 2
|
||||
/** Maximum read cache size, 512MiB. */
|
||||
#define MAX_CACHE_SIZE 536870912
|
||||
/** Size in bytes of LZMA properties. */
|
||||
#define LZMA_PROPERTIES_LENGTH 5
|
||||
/** Maximum number of entries for the DDT cache. */
|
||||
#define MAX_DDT_ENTRY_CACHE 16000000
|
||||
/** How many samples are contained in a RedBook sector. */
|
||||
#define SAMPLES_PER_SECTOR 588
|
||||
/** Maximum number of samples for a FLAC block. Bigger than 4608 gives no benefit. */
|
||||
#define MAX_FLAKE_BLOCK 4608
|
||||
/** Minimum number of samples for a FLAC block. CUETools.Codecs.FLAKE does not support it to be smaller than 256. */
|
||||
#define MIN_FLAKE_BLOCK 256
|
||||
/** This mask is to check for flags in CompactDisc suffix/prefix DDT */
|
||||
#define CD_XFIX_MASK 0xFF000000
|
||||
/** This mask is to check for position in CompactDisc suffix/prefix deduplicated block */
|
||||
#define CD_DFIX_MASK 0x00FFFFFF
|
||||
|
||||
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
|
||||
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
|
||||
/** Magic identifier for legacy DiscImageChef container (ASCII "DICMFRMT").
|
||||
* Retained for backward compatibility / migration tooling. */
|
||||
#define DIC_MAGIC 0x544D52464D434944ULL
|
||||
/** Magic identifier for AaruFormat container (ASCII "AARUFRMT").
|
||||
* Used in the primary header to assert correct file type. */
|
||||
#define AARU_MAGIC 0x544D524655524141ULL
|
||||
|
||||
/** Current image format major version (incompatible changes bump this).
|
||||
* Readers should reject headers with a higher number unless explicitly forward compatible. */
|
||||
#define AARUF_VERSION 2
|
||||
/** First on‑disk version (C# implementation).
|
||||
* Quirk: CRC64 values were stored byte‑swapped relative to ECMA‑182 canonical output. */
|
||||
#define AARUF_VERSION_V1 1
|
||||
/** Second on‑disk version (C implementation).
|
||||
* Introduced: extended header (GUID, feature bitmaps), hierarchical DDT v2, improved index (v2/v3),
|
||||
* multi‑codec compression, refined metadata blocks. */
|
||||
#define AARUF_VERSION_V2 2
|
||||
|
||||
/** Maximum read cache size (bytes). 512 MiB chosen to prevent excessive resident memory while
|
||||
* still enabling efficient sequential and moderate random access patterns. */
|
||||
#define MAX_CACHE_SIZE 536870912ULL
|
||||
|
||||
/** Size in bytes of the fixed LZMA properties header (lc/lp/pb + dictionary size). */
|
||||
#define LZMA_PROPERTIES_LENGTH 5
|
||||
|
||||
/** Maximum number of cached DDT entry descriptors retained in memory for fast duplicate detection.
|
||||
* At 16,000,000 entries with a compact structure, this caps hash_map overhead while covering large images.
|
||||
* (Approx memory just for lookup bookkeeping: ~16 bytes * N ≈ 256 MB worst case; typical effective <50% of cap.) */
|
||||
#define MAX_DDT_ENTRY_CACHE 16000000
|
||||
|
||||
/** Red Book (CD‑DA) PCM samples per 2352‑byte sector: 44,100 Hz / 75 sectors per second = 588 samples. */
|
||||
#define SAMPLES_PER_SECTOR 588
|
||||
|
||||
/** FLAC maximum block size used for encoding audio sectors.
|
||||
* Empirically >4608 samples yields diminishing compression returns and higher decode latency. */
|
||||
#define MAX_FLAKE_BLOCK 4608
|
||||
/** FLAC minimum block size. CUETools.Codecs.FLAKE does not accept blocks smaller than 256 samples. */
|
||||
#define MIN_FLAKE_BLOCK 256
|
||||
|
||||
/** Mask for extracting correction / fix flags in Compact Disc suffix/prefix DDT entries.
|
||||
* High 8 bits store status (see SectorStatus / CdFixFlags relationships). */
|
||||
#define CD_XFIX_MASK 0xFF000000U
|
||||
/** Mask for extracting positional index (lower 24 bits) in Compact Disc suffix/prefix deduplicated block entries. */
|
||||
#define CD_DFIX_MASK 0x00FFFFFFU
|
||||
|
||||
/** ECMA‑182 CRC64 polynomial (reflected form used in standard implementations). */
|
||||
#define CRC64_ECMA_POLY 0xC96C5795D7870F42ULL
|
||||
/** Initial seed value for CRC64 computations (all bits set). */
|
||||
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFFULL
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#pragma clang diagnostic pop
|
||||
#endif
|
||||
|
||||
#endif // LIBAARUFORMAT_CONSTS_H
|
||||
#endif // LIBAARUFORMAT_CONSTS_H
|
||||
|
||||
@@ -25,6 +25,41 @@
|
||||
#include "structs.h"
|
||||
#include "utarray.h"
|
||||
|
||||
/** \file aaruformat/context.h
|
||||
* \brief Central runtime context structures for libaaruformat (image state, caches, checksum buffers).
|
||||
*
|
||||
* The principal structure, \ref aaruformatContext, aggregates: header metadata, open stream handle, deduplication
|
||||
* tables (DDT) currently in memory, optical disc auxiliary data (sector prefix/suffix/subchannel), track listings,
|
||||
* geometry & metadata blocks, checksum accumulators, CRC & ECC helper contexts, hash map for deduplication, and
|
||||
* transient write buffers.
|
||||
*
|
||||
* Memory ownership model (unless otherwise stated): if a pointer field is non-NULL it is owned by the context and
|
||||
* will be freed (or otherwise released) during context close / destruction. Callers must not free or reallocate
|
||||
* these pointers directly. External callers should treat all internal buffers as read‑only unless explicitly writing.
|
||||
*
|
||||
* Threading: a single context instance is NOT thread-safe; serialize access if used across threads.
|
||||
* Lifetime: allocate, initialize/open, perform read/write/verify operations, then close/free.
|
||||
*
|
||||
* Deduplication tables (DDT): only a subset (primary table + an active secondary + optional cache) is retained in RAM;
|
||||
* large images may rely on lazy loading of secondary tables. Flags (inMemoryDdt, userDataDdt*, cachedSecondary*)
|
||||
* indicate what is currently resident.
|
||||
*
|
||||
* Optical auxiliary buffers (sectorPrefix / sectorSuffix / subchannel / corrected variants) are populated only for
|
||||
* images where those components exist (e.g., raw CD dumps). They may be NULL for block devices / non‑optical media.
|
||||
*
|
||||
* Index handling: indexEntries (UT_array) holds a flattened list of \ref IndexEntry structures (regardless of
|
||||
* v1/v2/v3). hash_map_t *sectorHashMap provides fast duplicate detection keyed by content fingerprint / sparse sector
|
||||
* key.
|
||||
*
|
||||
* Invariants / sanity expectations (not strictly enforced everywhere):
|
||||
* - magic == AARU_MAGIC after successful open/create.
|
||||
* - header.imageMajorVersion <= AARUF_VERSION.
|
||||
* - imageStream != NULL when any I/O method is in progress.
|
||||
* - If deduplicate == false, sectorHashMap may still be populated for bookkeeping but duplicates are stored
|
||||
* independently.
|
||||
* - If userDataDdtMini != NULL then userDataDdtBig == NULL (and vice versa) for a given level.
|
||||
*/
|
||||
|
||||
#ifndef MD5_DIGEST_LENGTH
|
||||
#define MD5_DIGEST_LENGTH 16
|
||||
#endif
|
||||
@@ -37,121 +72,186 @@
|
||||
#define SHA256_DIGEST_LENGTH 32
|
||||
#endif
|
||||
|
||||
/** \struct Crc64Context
|
||||
* \brief Internal (legacy) CRC64 computation context (superseded by crt \ref crc64_ctx usage).
|
||||
*
|
||||
* Kept for compatibility with earlier code paths; new code should prefer the opaque crc64_ctx API.
|
||||
*/
|
||||
typedef struct Crc64Context
|
||||
{
|
||||
uint64_t finalSeed;
|
||||
uint64_t table[256];
|
||||
uint64_t hashInt;
|
||||
uint64_t finalSeed; ///< Final CRC value (post processing) or running seed.
|
||||
uint64_t table[256]; ///< Precomputed 256-entry lookup table for the ECMA polynomial.
|
||||
uint64_t hashInt; ///< Intermediate accumulator.
|
||||
} Crc64Context;
|
||||
|
||||
/** \struct CdEccContext
|
||||
* \brief Lookup tables and state for Compact Disc EDC/ECC (P/Q) regeneration / verification.
|
||||
*
|
||||
* Fields may be lazily allocated; inited_edc indicates tables are ready.
|
||||
*/
|
||||
typedef struct CdEccContext
|
||||
{
|
||||
bool inited_edc;
|
||||
uint8_t *ecc_b_table;
|
||||
uint8_t *ecc_f_table;
|
||||
uint32_t *edc_table;
|
||||
bool inited_edc; ///< True once EDC/ECC tables have been initialized.
|
||||
uint8_t *ecc_b_table; ///< Backward (B) ECC table (allocated, size implementation-defined).
|
||||
uint8_t *ecc_f_table; ///< Forward (F) ECC table.
|
||||
uint32_t *edc_table; ///< EDC (CRC) lookup table.
|
||||
} CdEccContext;
|
||||
|
||||
/** \struct Checksums
|
||||
* \brief Collected whole‑image checksums / hashes present in a checksum block.
|
||||
*
|
||||
* Only hash arrays with corresponding has* flags set contain valid data. spamsum is a dynamically allocated
|
||||
* NUL‑terminated buffer (original SpamSum signature bytes followed by appended '\0').
|
||||
*/
|
||||
typedef struct Checksums
|
||||
{
|
||||
bool hasMd5;
|
||||
bool hasSha1;
|
||||
bool hasSha256;
|
||||
bool hasSpamSum;
|
||||
uint8_t md5[MD5_DIGEST_LENGTH];
|
||||
uint8_t sha1[SHA1_DIGEST_LENGTH];
|
||||
uint8_t sha256[SHA256_DIGEST_LENGTH];
|
||||
uint8_t *spamsum;
|
||||
bool hasMd5; ///< True if md5[] buffer populated.
|
||||
bool hasSha1; ///< True if sha1[] buffer populated.
|
||||
bool hasSha256; ///< True if sha256[] buffer populated.
|
||||
bool hasSpamSum; ///< True if spamsum pointer allocated and signature read.
|
||||
uint8_t md5[MD5_DIGEST_LENGTH]; ///< MD5 digest (16 bytes).
|
||||
uint8_t sha1[SHA1_DIGEST_LENGTH]; ///< SHA-1 digest (20 bytes).
|
||||
uint8_t sha256[SHA256_DIGEST_LENGTH]; ///< SHA-256 digest (32 bytes).
|
||||
uint8_t *spamsum; ///< SpamSum fuzzy hash (ASCII), allocated length+1 with trailing 0.
|
||||
} Checksums;
|
||||
|
||||
/** \struct mediaTagEntry
|
||||
* \brief Hash table entry for an arbitrary media tag (e.g., proprietary drive/medium descriptor).
|
||||
*
|
||||
* Stored via uthash (hh handle). Type is a format‑specific integer identifier mapping to external interpretation.
|
||||
*/
|
||||
typedef struct mediaTagEntry
|
||||
{
|
||||
uint8_t *data;
|
||||
int32_t type;
|
||||
uint32_t length;
|
||||
UT_hash_handle hh;
|
||||
uint8_t *data; ///< Tag data blob (opaque to library core); length bytes long.
|
||||
int32_t type; ///< Numeric type identifier.
|
||||
uint32_t length; ///< Length in bytes of data.
|
||||
UT_hash_handle hh; ///< uthash linkage.
|
||||
} mediaTagEntry;
|
||||
|
||||
/** \struct aaruformatContext
|
||||
* \brief Master context representing an open or in‑creation Aaru image.
|
||||
*
|
||||
* Contains stream handle, parsed headers, deduplication structures, optical extras, metadata blocks, checksum
|
||||
* information, caches, and write-state. Allocate with library factory (or zero‑init + explicit open) and destroy
|
||||
* with corresponding close/free routine.
|
||||
*
|
||||
* Field grouping:
|
||||
* - Core & header: magic, library*Version, imageStream, header.
|
||||
* - Optical sector adjuncts: sectorPrefix/sectorSuffix/subchannel plus corrected variants & mode2Subheaders.
|
||||
* - Deduplication: inMemoryDdt, userDataDdt*, userDataDdtHeader, mini/big/cached secondary arrays, version tags.
|
||||
* - Metadata & geometry: geometryBlock, metadataBlockHeader+metadataBlock, cicmBlockHeader+cicmBlock, tracksHeader.
|
||||
* - Tracks & hardware: trackEntries, dataTracks, dumpHardwareHeader, dumpHardwareEntriesWithData.
|
||||
* - Integrity & ECC: checksums, eccCdContext, crc64Context.
|
||||
* - Index & dedup lookup: indexEntries (UT_array of IndexEntry), sectorHashMap (duplicate detection), deduplicate
|
||||
* flag.
|
||||
* - Write path: isWriting, currentBlockHeader, writingBuffer(+position/offset), nextBlockPosition.
|
||||
*
|
||||
* Notes:
|
||||
* - userDataDdt points to memory-mapped or fully loaded DDT (legacy path); userDataDdtMini / userDataDdtBig
|
||||
* supersede.
|
||||
* - shift retained for backward compatibility with earlier single‑level address shift semantics.
|
||||
* - mappedMemoryDdtSize is meaningful only if userDataDdt references an mmapped region.
|
||||
*/
|
||||
typedef struct aaruformatContext
|
||||
{
|
||||
uint64_t magic;
|
||||
uint8_t libraryMajorVersion;
|
||||
uint8_t libraryMinorVersion;
|
||||
FILE *imageStream;
|
||||
AaruHeaderV2 header;
|
||||
uint8_t *sectorPrefix;
|
||||
uint8_t *sectorPrefixCorrected;
|
||||
uint8_t *sectorSuffix;
|
||||
uint8_t *sectorSuffixCorrected;
|
||||
uint8_t *sectorSubchannel;
|
||||
uint8_t *mode2Subheaders;
|
||||
uint8_t shift;
|
||||
bool inMemoryDdt;
|
||||
uint64_t *userDataDdt;
|
||||
size_t mappedMemoryDdtSize;
|
||||
uint32_t *sectorPrefixDdt;
|
||||
uint32_t *sectorSuffixDdt;
|
||||
GeometryBlockHeader geometryBlock;
|
||||
MetadataBlockHeader metadataBlockHeader;
|
||||
uint8_t *metadataBlock;
|
||||
TracksHeader tracksHeader;
|
||||
TrackEntry *trackEntries;
|
||||
CicmMetadataBlock cicmBlockHeader;
|
||||
uint8_t *cicmBlock;
|
||||
DumpHardwareHeader dumpHardwareHeader;
|
||||
struct DumpHardwareEntriesWithData *dumpHardwareEntriesWithData;
|
||||
ImageInfo imageInfo;
|
||||
CdEccContext *eccCdContext;
|
||||
uint8_t numberOfDataTracks;
|
||||
TrackEntry *dataTracks;
|
||||
bool *readableSectorTags;
|
||||
struct CacheHeader blockHeaderCache;
|
||||
struct CacheHeader blockCache;
|
||||
Checksums checksums;
|
||||
mediaTagEntry *mediaTags;
|
||||
DdtHeader2 userDataDdtHeader;
|
||||
int ddtVersion;
|
||||
uint16_t *userDataDdtMini;
|
||||
uint32_t *userDataDdtBig;
|
||||
uint16_t *sectorPrefixDdtMini;
|
||||
uint16_t *sectorSuffixDdtMini;
|
||||
uint64_t cachedDdtOffset;
|
||||
uint64_t cachedDdtPosition;
|
||||
uint64_t primaryDdtOffset;
|
||||
uint16_t *cachedSecondaryDdtSmall;
|
||||
uint32_t *cachedSecondaryDdtBig;
|
||||
bool isWriting;
|
||||
BlockHeader currentBlockHeader;
|
||||
uint8_t *writingBuffer;
|
||||
int currentBlockOffset;
|
||||
crc64_ctx *crc64Context;
|
||||
int writingBufferPosition;
|
||||
uint64_t nextBlockPosition;
|
||||
UT_array *indexEntries;
|
||||
hash_map_t *sectorHashMap;
|
||||
bool deduplicate;
|
||||
uint64_t magic; ///< File magic (AARU_MAGIC) post-open.
|
||||
uint8_t libraryMajorVersion; ///< Linked library major version.
|
||||
uint8_t libraryMinorVersion; ///< Linked library minor version.
|
||||
FILE *imageStream; ///< Underlying FILE* stream (binary mode).
|
||||
AaruHeaderV2 header; ///< Parsed container header (v2).
|
||||
|
||||
/* Optical auxiliary buffers (NULL if not present) */
|
||||
uint8_t *sectorPrefix; ///< Raw per-sector prefix (e.g., sync+header) uncorrected.
|
||||
uint8_t *sectorPrefixCorrected; ///< Corrected variant (post error correction) if stored.
|
||||
uint8_t *sectorSuffix; ///< Raw per-sector suffix (EDC/ECC) uncorrected.
|
||||
uint8_t *sectorSuffixCorrected; ///< Corrected suffix if stored separately.
|
||||
uint8_t *sectorSubchannel; ///< Raw 96-byte subchannel (if captured).
|
||||
uint8_t *mode2Subheaders; ///< MODE2 Form1/Form2 8-byte subheaders (concatenated).
|
||||
|
||||
uint8_t shift; ///< Legacy overall shift (deprecated by data_shift/table_shift).
|
||||
bool inMemoryDdt; ///< True if primary (and possibly secondary) DDT loaded.
|
||||
uint64_t *userDataDdt; ///< Legacy flat DDT pointer (NULL when using v2 mini/big arrays).
|
||||
size_t mappedMemoryDdtSize; ///< Length of mmapped DDT if userDataDdt is mmapped.
|
||||
uint32_t *sectorPrefixDdt; ///< Legacy CD sector prefix DDT (deprecated by *_Mini/Big).
|
||||
uint32_t *sectorSuffixDdt; ///< Legacy CD sector suffix DDT.
|
||||
|
||||
GeometryBlockHeader geometryBlock; ///< Logical geometry block (if present).
|
||||
MetadataBlockHeader metadataBlockHeader; ///< Metadata block header.
|
||||
uint8_t *metadataBlock; ///< Raw metadata UTF-16LE concatenated strings.
|
||||
TracksHeader tracksHeader; ///< Tracks header (optical) if present.
|
||||
TrackEntry *trackEntries; ///< Full track list (tracksHeader.entries elements).
|
||||
CicmMetadataBlock cicmBlockHeader; ///< CICM metadata header (if present).
|
||||
uint8_t *cicmBlock; ///< CICM XML payload.
|
||||
DumpHardwareHeader dumpHardwareHeader; ///< Dump hardware header.
|
||||
struct DumpHardwareEntriesWithData *dumpHardwareEntriesWithData; ///< Array of dump hardware entries + strings.
|
||||
ImageInfo imageInfo; ///< Exposed high-level image info summary.
|
||||
|
||||
CdEccContext *eccCdContext; ///< CD ECC/EDC helper tables (allocated on demand).
|
||||
uint8_t numberOfDataTracks; ///< Count of tracks considered "data" (sequence 1..99 heuristics).
|
||||
TrackEntry *dataTracks; ///< Filtered list of data tracks (subset of trackEntries).
|
||||
bool *readableSectorTags; ///< Per-sector boolean array (optical tags read successfully?).
|
||||
|
||||
struct CacheHeader blockHeaderCache; ///< LRU/Cache header for block headers.
|
||||
struct CacheHeader blockCache; ///< LRU/Cache header for block payloads.
|
||||
|
||||
Checksums checksums; ///< Whole-image checksums discovered.
|
||||
mediaTagEntry *mediaTags; ///< Hash table of extra media tags (uthash root).
|
||||
|
||||
DdtHeader2 userDataDdtHeader; ///< Active user data DDT v2 header (primary table meta).
|
||||
int ddtVersion; ///< DDT version in use (1=legacy, 2=v2 hierarchical).
|
||||
uint16_t *userDataDdtMini; ///< DDT entries (small variant) primary/secondary current.
|
||||
uint32_t *userDataDdtBig; ///< DDT entries (big variant) primary/secondary current.
|
||||
uint16_t *sectorPrefixDdtMini; ///< CD sector prefix corrected DDT (small) if present.
|
||||
uint16_t *sectorSuffixDdtMini; ///< CD sector suffix corrected DDT (small) if present.
|
||||
|
||||
uint64_t cachedDdtOffset; ///< File offset of currently cached secondary DDT (0=none).
|
||||
uint64_t cachedDdtPosition; ///< Position index of cached secondary DDT.
|
||||
uint64_t primaryDdtOffset; ///< File offset of the primary DDT v2 table.
|
||||
uint16_t *cachedSecondaryDdtSmall; ///< Cached secondary table (small entries) or NULL.
|
||||
uint32_t *cachedSecondaryDdtBig; ///< Cached secondary table (big entries) or NULL.
|
||||
|
||||
bool isWriting; ///< True if context opened/created for writing.
|
||||
BlockHeader currentBlockHeader; ///< Header for block currently being assembled (write path).
|
||||
uint8_t *writingBuffer; ///< Accumulation buffer for current block data.
|
||||
int currentBlockOffset; ///< Logical offset inside block (units: bytes or sectors depending on path).
|
||||
crc64_ctx *crc64Context; ///< Opaque CRC64 context for streaming updates.
|
||||
int writingBufferPosition; ///< Current size / position within writingBuffer.
|
||||
uint64_t nextBlockPosition; ///< Absolute file offset where next block will be written.
|
||||
|
||||
UT_array *indexEntries; ///< Flattened index entries (UT_array of IndexEntry).
|
||||
hash_map_t *sectorHashMap; ///< Deduplication hash map (fingerprint->entry mapping).
|
||||
bool deduplicate; ///< Storage deduplication active (duplicates coalesce).
|
||||
} aaruformatContext;
|
||||
|
||||
/** \struct DumpHardwareEntriesWithData
|
||||
* \brief In-memory representation of a dump hardware entry plus decoded variable-length fields & extents.
|
||||
*
|
||||
* All string pointers are NUL-terminated UTF-8 copies of on-disk data (or NULL if absent). extents array may be NULL
|
||||
* when no ranges were recorded. Freed during context teardown.
|
||||
*/
|
||||
typedef struct DumpHardwareEntriesWithData
|
||||
{
|
||||
DumpHardwareEntry entry;
|
||||
struct DumpExtent *extents;
|
||||
uint8_t *manufacturer;
|
||||
uint8_t *model;
|
||||
uint8_t *revision;
|
||||
uint8_t *firmware;
|
||||
uint8_t *serial;
|
||||
uint8_t *softwareName;
|
||||
uint8_t *softwareVersion;
|
||||
uint8_t *softwareOperatingSystem;
|
||||
DumpHardwareEntry entry; ///< Fixed-size header with lengths & counts.
|
||||
struct DumpExtent *extents; ///< Array of extents (entry.extents elements) or NULL.
|
||||
uint8_t *manufacturer; ///< Manufacturer string (UTF-8) or NULL.
|
||||
uint8_t *model; ///< Model string or NULL.
|
||||
uint8_t *revision; ///< Hardware revision string or NULL.
|
||||
uint8_t *firmware; ///< Firmware version string or NULL.
|
||||
uint8_t *serial; ///< Serial number string or NULL.
|
||||
uint8_t *softwareName; ///< Dump software name or NULL.
|
||||
uint8_t *softwareVersion; ///< Dump software version or NULL.
|
||||
uint8_t *softwareOperatingSystem; ///< Host operating system string or NULL.
|
||||
} DumpHardwareEntriesWithData;
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/** \struct DumpExtent
|
||||
* \brief Inclusive [start,end] logical sector range contributed by a single hardware environment.
|
||||
*/
|
||||
typedef struct DumpExtent
|
||||
{
|
||||
uint64_t start;
|
||||
uint64_t end;
|
||||
uint64_t start; ///< Starting LBA (inclusive).
|
||||
uint64_t end; ///< Ending LBA (inclusive); >= start.
|
||||
} DumpExtent;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -20,11 +20,49 @@
|
||||
#define LIBAARUFORMAT_CRC64_H
|
||||
#include <stdint.h>
|
||||
|
||||
/** \file aaruformat/crc64.h
|
||||
* \brief CRC64 (ECMA-182) core context and precomputed slicing-by-4 tables.
|
||||
*
|
||||
* Exposes:
|
||||
* - \ref crc64_ctx: minimal incremental state (initialize crc to CRC64_ECMA_SEED).
|
||||
* - crc64_table[4][256]: 4-way (slicing-by-4) lookup tables for high-throughput updates.
|
||||
* - CRC64_ECMA_POLY / CRC64_ECMA_SEED macros matching ECMA-182 (reflected polynomial, all-bits-set seed).
|
||||
*
|
||||
* Algorithm characteristics:
|
||||
* - Polynomial: 0xC96C5795D7870F42 (reflected form).
|
||||
* - Seed / initial value: 0xFFFFFFFFFFFFFFFFULL.
|
||||
* - Final XOR: none (raw accumulator is the result).
|
||||
* - Bit order: reflected; least significant bit processed first.
|
||||
*
|
||||
* Table layout & optimization:
|
||||
* Four 256-entry tables are used (slicing-by-4) allowing 4-byte chunks to be folded per iteration, reducing data
|
||||
* dependency chains compared to a single-table approach. This improves throughput on modern CPUs with abundant ILP.
|
||||
*
|
||||
* Incremental usage (pseudo-code):
|
||||
* \code{.c}
|
||||
* crc64_ctx ctx = { .crc = CRC64_ECMA_SEED };
|
||||
* ctx.crc = crc64_update(ctx.crc, buf, len); // internal helper using crc64_table
|
||||
* // ctx.crc now holds ECMA-182 CRC64 value.
|
||||
* \endcode
|
||||
*
|
||||
* Thread safety: The table is read-only; each thread must use its own crc64_ctx.
|
||||
* Endianness: Table values are host-endian 64-bit constants; algorithm result is endianness-agnostic.
|
||||
*/
|
||||
|
||||
/** \struct crc64_ctx
|
||||
* \brief Minimal ECMA-182 CRC64 incremental state container (running value only).
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t crc;
|
||||
uint64_t crc; ///< Running CRC value (initialize to CRC64_ECMA_SEED before first update).
|
||||
} crc64_ctx;
|
||||
|
||||
/** \var crc64_table
|
||||
* \brief Precomputed slicing-by-4 ECMA-182 CRC64 lookup tables (4 * 256 * 8 = 8192 bytes).
|
||||
*
|
||||
* Each row corresponds to one byte lane in a 4-byte block update; actual folding logic resides in the implementation.
|
||||
* Content generated offline; do not modify manually.
|
||||
*/
|
||||
const static uint64_t crc64_table[4][256] = {
|
||||
{0x0000000000000000, 0xB32E4CBE03A75F6F, 0xF4843657A840A05B, 0x47AA7AE9ABE7FF34, 0x7BD0C384FF8F5E33,
|
||||
0xC8FE8F3AFC28015C, 0x8F54F5D357CFFE68, 0x3C7AB96D5468A107, 0xF7A18709FF1EBC66, 0x448FCBB7FCB9E309,
|
||||
@@ -236,7 +274,9 @@ const static uint64_t crc64_table[4][256] = {
|
||||
0x1E5CD90C6EC2440D}
|
||||
};
|
||||
|
||||
#define CRC64_ECMA_POLY 0xC96C5795D7870F42
|
||||
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFF
|
||||
/** ECMA-182 reflected polynomial constant. */
|
||||
#define CRC64_ECMA_POLY 0xC96C5795D7870F42ULL
|
||||
/** ECMA-182 initial seed (all bits set). */
|
||||
#define CRC64_ECMA_SEED 0xFFFFFFFFFFFFFFFFULL
|
||||
|
||||
#endif // LIBAARUFORMAT_CRC64_H
|
||||
|
||||
@@ -19,35 +19,136 @@
|
||||
#ifndef LIBAARUFORMAT_ERRORS_H
|
||||
#define LIBAARUFORMAT_ERRORS_H
|
||||
|
||||
#define AARUF_ERROR_NOT_AARUFORMAT (-1)
|
||||
#define AARUF_ERROR_FILE_TOO_SMALL (-2)
|
||||
#define AARUF_ERROR_INCOMPATIBLE_VERSION (-3)
|
||||
#define AARUF_ERROR_CANNOT_READ_INDEX (-4)
|
||||
#define AARUF_ERROR_SECTOR_OUT_OF_BOUNDS (-5)
|
||||
#define AARUF_ERROR_CANNOT_READ_HEADER (-6)
|
||||
#define AARUF_ERROR_CANNOT_READ_BLOCK (-7)
|
||||
#define AARUF_ERROR_UNSUPPORTED_COMPRESSION (-8)
|
||||
#define AARUF_ERROR_NOT_ENOUGH_MEMORY (-9)
|
||||
#define AARUF_ERROR_BUFFER_TOO_SMALL (-10)
|
||||
#define AARUF_ERROR_MEDIA_TAG_NOT_PRESENT (-11)
|
||||
#define AARUF_ERROR_INCORRECT_MEDIA_TYPE (-12)
|
||||
#define AARUF_ERROR_TRACK_NOT_FOUND (-13)
|
||||
#define AARUF_ERROR_REACHED_UNREACHABLE_CODE (-14)
|
||||
#define AARUF_ERROR_INVALID_TRACK_FORMAT (-15)
|
||||
#define AARUF_ERROR_SECTOR_TAG_NOT_PRESENT (-16)
|
||||
#define AARUF_ERROR_CANNOT_DECOMPRESS_BLOCK (-17)
|
||||
#define AARUF_ERROR_INVALID_BLOCK_CRC (-18)
|
||||
#define AARUF_ERROR_CANNOT_CREATE_FILE (-19)
|
||||
#define AARUF_ERROR_INVALID_APP_NAME_LENGTH (-20)
|
||||
#define AARUF_ERROR_CANNOT_WRITE_HEADER (-21)
|
||||
#define AARUF_READ_ONLY (-22)
|
||||
#define AARUF_ERROR_CANNOT_WRITE_BLOCK_HEADER (-23)
|
||||
#define AARUF_ERROR_CANNOT_WRITE_BLOCK_DATA (-24)
|
||||
#define AARUF_ERROR_CANNOT_SET_DDT_ENTRY (-25)
|
||||
/** \file aaruformat/errors.h
|
||||
* \brief Public error and status code definitions for libaaruformat.
|
||||
*
|
||||
* Negative values represent fatal / non-recoverable error conditions returned by library functions.
|
||||
* Non-negative values (>=0) are either success (0) or sector-level status annotations used when
|
||||
* decoding per-sector metadata (e.g. a sector not dumped or with corrected/unrecoverable errors).
|
||||
*
|
||||
* Usage guidelines:
|
||||
* - Always test for < 0 to check generic failure without enumerating all codes.
|
||||
* - Use exact comparisons for caller-specific handling (e.g. retry on AARUF_ERROR_CANNOT_READ_BLOCK).
|
||||
* - Sector status codes are never returned as fatal function results; they appear in output parameters
|
||||
* populated by read/identify routines.
|
||||
*
|
||||
* Helper: see aaruformat_error_string() for a human-readable textual description suitable for logs.
|
||||
*/
|
||||
|
||||
#define AARUF_STATUS_OK 0
|
||||
#define AARUF_STATUS_SECTOR_NOT_DUMPED 1
|
||||
#define AARUF_STATUS_SECTOR_WITH_ERRORS 2
|
||||
#define AARUF_STATUS_SECTOR_DELETED 3
|
||||
/** \name Fatal / library-level error codes (negative)
|
||||
* @{ */
|
||||
#define AARUF_ERROR_NOT_AARUFORMAT (-1) ///< Input file/stream failed magic or structural validation.
|
||||
#define AARUF_ERROR_FILE_TOO_SMALL (-2) ///< File size insufficient for mandatory header / structures.
|
||||
#define AARUF_ERROR_INCOMPATIBLE_VERSION (-3) ///< Image uses a newer incompatible on-disk version.
|
||||
#define AARUF_ERROR_CANNOT_READ_INDEX (-4) ///< Index block unreadable / truncated / bad identifier.
|
||||
#define AARUF_ERROR_SECTOR_OUT_OF_BOUNDS (-5) ///< Requested logical sector outside media bounds.
|
||||
#define AARUF_ERROR_CANNOT_READ_HEADER (-6) ///< Failed to read container header.
|
||||
#define AARUF_ERROR_CANNOT_READ_BLOCK (-7) ///< Generic block read failure (seek/read error).
|
||||
#define AARUF_ERROR_UNSUPPORTED_COMPRESSION (-8) ///< Block marked with unsupported compression algorithm.
|
||||
#define AARUF_ERROR_NOT_ENOUGH_MEMORY (-9) ///< Memory allocation failure (critical).
|
||||
#define AARUF_ERROR_BUFFER_TOO_SMALL (-10) ///< Caller-supplied buffer insufficient for data.
|
||||
#define AARUF_ERROR_MEDIA_TAG_NOT_PRESENT (-11) ///< Requested media tag absent.
|
||||
#define AARUF_ERROR_INCORRECT_MEDIA_TYPE (-12) ///< Operation incompatible with image media type.
|
||||
#define AARUF_ERROR_TRACK_NOT_FOUND (-13) ///< Referenced track number not present.
|
||||
#define AARUF_ERROR_REACHED_UNREACHABLE_CODE (-14) ///< Internal logic assertion hit unexpected path.
|
||||
#define AARUF_ERROR_INVALID_TRACK_FORMAT (-15) ///< Track metadata internally inconsistent or malformed.
|
||||
#define AARUF_ERROR_SECTOR_TAG_NOT_PRESENT (-16) ///< Requested sector tag (e.g. subchannel/prefix) not stored.
|
||||
#define AARUF_ERROR_CANNOT_DECOMPRESS_BLOCK (-17) ///< Decompression routine failed or size mismatch.
|
||||
#define AARUF_ERROR_INVALID_BLOCK_CRC (-18) ///< CRC64 mismatch indicating corruption.
|
||||
#define AARUF_ERROR_CANNOT_CREATE_FILE (-19) ///< Output file could not be created / opened for write.
|
||||
#define AARUF_ERROR_INVALID_APP_NAME_LENGTH (-20) ///< Application name field length invalid (sanity limit).
|
||||
#define AARUF_ERROR_CANNOT_WRITE_HEADER (-21) ///< Failure writing container header.
|
||||
#define AARUF_READ_ONLY (-22) ///< Operation requires write mode but context is read-only.
|
||||
#define AARUF_ERROR_CANNOT_WRITE_BLOCK_HEADER (-23) ///< Failure writing block header.
|
||||
#define AARUF_ERROR_CANNOT_WRITE_BLOCK_DATA (-24) ///< Failure writing block payload.
|
||||
#define AARUF_ERROR_CANNOT_SET_DDT_ENTRY (-25) ///< Failed to encode/store a DDT entry (overflow or IO).
|
||||
/** @} */
|
||||
|
||||
/** \name Non-fatal sector status codes (non-negative)
|
||||
* Returned through output parameters to describe individual sector state.
|
||||
* @{ */
|
||||
#define AARUF_STATUS_OK 0 ///< Sector present and read without uncorrectable errors.
|
||||
#define AARUF_STATUS_SECTOR_NOT_DUMPED 1 ///< Sector not captured (gap / missing / intentionally skipped).
|
||||
#define AARUF_STATUS_SECTOR_WITH_ERRORS 2 ///< Sector present but with unrecoverable or flagged errors.
|
||||
#define AARUF_STATUS_SECTOR_DELETED 3 ///< Sector logically marked deleted (e.g. filesystem deleted area).
|
||||
|
||||
/** @} */
|
||||
|
||||
/** \brief Convert an AaruFormat error or status code to a static human-readable string.
|
||||
*
|
||||
* Designed for diagnostics / logging; returns a constant string literal. Unknown codes yield
|
||||
* "Unknown error/status". This helper is inline to avoid adding a separate translation unit.
|
||||
*
|
||||
* \param code Error (<0) or status (>=0) numeric code.
|
||||
* \return Constant C string describing the code.
|
||||
*/
|
||||
static inline const char *aaruformat_error_string(int code)
|
||||
{
|
||||
switch(code)
|
||||
{
|
||||
/* Errors */
|
||||
case AARUF_ERROR_NOT_AARUFORMAT:
|
||||
return "Not an AaruFormat image";
|
||||
case AARUF_ERROR_FILE_TOO_SMALL:
|
||||
return "File too small";
|
||||
case AARUF_ERROR_INCOMPATIBLE_VERSION:
|
||||
return "Incompatible image version";
|
||||
case AARUF_ERROR_CANNOT_READ_INDEX:
|
||||
return "Cannot read index";
|
||||
case AARUF_ERROR_SECTOR_OUT_OF_BOUNDS:
|
||||
return "Sector out of bounds";
|
||||
case AARUF_ERROR_CANNOT_READ_HEADER:
|
||||
return "Cannot read header";
|
||||
case AARUF_ERROR_CANNOT_READ_BLOCK:
|
||||
return "Cannot read block";
|
||||
case AARUF_ERROR_UNSUPPORTED_COMPRESSION:
|
||||
return "Unsupported compression";
|
||||
case AARUF_ERROR_NOT_ENOUGH_MEMORY:
|
||||
return "Not enough memory";
|
||||
case AARUF_ERROR_BUFFER_TOO_SMALL:
|
||||
return "Buffer too small";
|
||||
case AARUF_ERROR_MEDIA_TAG_NOT_PRESENT:
|
||||
return "Media tag not present";
|
||||
case AARUF_ERROR_INCORRECT_MEDIA_TYPE:
|
||||
return "Incorrect media type";
|
||||
case AARUF_ERROR_TRACK_NOT_FOUND:
|
||||
return "Track not found";
|
||||
case AARUF_ERROR_REACHED_UNREACHABLE_CODE:
|
||||
return "Internal unreachable code reached";
|
||||
case AARUF_ERROR_INVALID_TRACK_FORMAT:
|
||||
return "Invalid track format";
|
||||
case AARUF_ERROR_SECTOR_TAG_NOT_PRESENT:
|
||||
return "Sector tag not present";
|
||||
case AARUF_ERROR_CANNOT_DECOMPRESS_BLOCK:
|
||||
return "Cannot decompress block";
|
||||
case AARUF_ERROR_INVALID_BLOCK_CRC:
|
||||
return "Invalid block CRC";
|
||||
case AARUF_ERROR_CANNOT_CREATE_FILE:
|
||||
return "Cannot create file";
|
||||
case AARUF_ERROR_INVALID_APP_NAME_LENGTH:
|
||||
return "Invalid application name length";
|
||||
case AARUF_ERROR_CANNOT_WRITE_HEADER:
|
||||
return "Cannot write header";
|
||||
case AARUF_READ_ONLY:
|
||||
return "Read-only context";
|
||||
case AARUF_ERROR_CANNOT_WRITE_BLOCK_HEADER:
|
||||
return "Cannot write block header";
|
||||
case AARUF_ERROR_CANNOT_WRITE_BLOCK_DATA:
|
||||
return "Cannot write block data";
|
||||
case AARUF_ERROR_CANNOT_SET_DDT_ENTRY:
|
||||
return "Cannot set DDT entry";
|
||||
|
||||
/* Status */
|
||||
case AARUF_STATUS_OK:
|
||||
return "OK";
|
||||
case AARUF_STATUS_SECTOR_NOT_DUMPED:
|
||||
return "Sector not dumped";
|
||||
case AARUF_STATUS_SECTOR_WITH_ERRORS:
|
||||
return "Sector with errors";
|
||||
case AARUF_STATUS_SECTOR_DELETED:
|
||||
return "Sector deleted";
|
||||
}
|
||||
return "Unknown error/status";
|
||||
}
|
||||
|
||||
#endif // LIBAARUFORMAT_ERRORS_H
|
||||
|
||||
@@ -22,22 +22,40 @@
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
/** \struct kv_pair_t
|
||||
* \brief Single key/value slot used internally by the open-addressing hash map.
|
||||
*
|
||||
* Collision resolution strategy (implementation detail): linear or quadratic probing (see source). An empty
|
||||
* slot is typically represented by a key sentinel (e.g. 0 or another reserved value) – callers never interact
|
||||
* with individual kv_pair_t entries directly; they are managed through the map API.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
uint64_t key;
|
||||
uint64_t value;
|
||||
uint64_t key; ///< Stored key (64-bit). May use a reserved sentinel to denote an empty slot.
|
||||
uint64_t value; ///< Associated value payload (64-bit) stored alongside the key.
|
||||
} kv_pair_t;
|
||||
|
||||
/** \struct hash_map_t
|
||||
* \brief Minimal open-addressing hash map for 64-bit key/value pairs used in deduplication lookup.
|
||||
*
|
||||
* Fields:
|
||||
* - table: Pointer to contiguous array of kv_pair_t entries (capacity == size).
|
||||
* - size: Total number of slots allocated in table (must be >= 1).
|
||||
* - count: Number of occupied (non-empty) slots currently in use.
|
||||
*
|
||||
* Load factor guidance: insert performance degrades as count approaches size; callers may rebuild with a larger
|
||||
* size when (count * 10 / size) exceeds a chosen threshold (e.g. 70 – 80%). No automatic resizing is performed.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
kv_pair_t *table;
|
||||
size_t size;
|
||||
size_t count;
|
||||
kv_pair_t *table; ///< Array of key/value slots of length == size.
|
||||
size_t size; ///< Allocated slot capacity of table.
|
||||
size_t count; ///< Number of active (filled) entries.
|
||||
} hash_map_t;
|
||||
|
||||
hash_map_t *create_map(size_t size);
|
||||
void free_map(hash_map_t *map);
|
||||
bool insert_map(hash_map_t *map, uint64_t key, uint64_t value);
|
||||
bool lookup_map(const hash_map_t *map, uint64_t key, uint64_t *out_value);
|
||||
void free_map(hash_map_t *map);
|
||||
bool insert_map(hash_map_t *map, uint64_t key, uint64_t value);
|
||||
bool lookup_map(const hash_map_t *map, uint64_t key, uint64_t *out_value);
|
||||
|
||||
#endif // LIBAARUFORMAT_HASH_MAP_H
|
||||
|
||||
@@ -8,49 +8,49 @@
|
||||
#include <stdint.h>
|
||||
#include <uthash.h>
|
||||
|
||||
/** \struct CacheEntry
|
||||
* \brief Single hash entry in the in-memory cache.
|
||||
*
|
||||
* This structure is managed by uthash (open addressing with chaining semantics provided by macros).
|
||||
* It represents one key/value association tracked by the cache. The cache implementation supports
|
||||
* both string keys (null-terminated) and 64-bit numeric keys; numeric keys are stored by casting
|
||||
* to a temporary string buffer upstream (see implementation). Callers do not allocate or free
|
||||
* individual entries directly; use the cache API helpers.
|
||||
*
|
||||
* Lifetime & ownership:
|
||||
* - key points either to a heap-allocated C string owned by the cache or to a short-lived buffer
|
||||
* duplicated internally; callers must not free it after insertion.
|
||||
* - value is an opaque pointer supplied by caller; the cache does not take ownership of the pointee
|
||||
* (caller remains responsible for the underlying object unless documented otherwise).
|
||||
*/
|
||||
struct CacheEntry
|
||||
{
|
||||
char *key;
|
||||
void *value;
|
||||
UT_hash_handle hh;
|
||||
char *key; ///< Null-terminated key string (unique within the cache). May encode numeric keys.
|
||||
void *value; ///< Opaque value pointer associated with key (not freed automatically on eviction/clear).
|
||||
UT_hash_handle hh; ///< uthash handle linking this entry into the hash table (must remain last or per uthash docs).
|
||||
};
|
||||
|
||||
/** \struct CacheHeader
|
||||
* \brief Cache top-level descriptor encapsulating the hash table root and capacity limit.
|
||||
*
|
||||
* The cache enforces an upper bound (max_items) on the number of tracked entries. Insert helpers are expected
|
||||
* to evict (or refuse) when the limit is exceeded (strategy defined in implementation; current behavior may be
|
||||
* simple non-evicting if not yet implemented as a true LRU). The cache pointer holds the uthash root (NULL when
|
||||
* empty).
|
||||
*
|
||||
* Fields:
|
||||
* - max_items: Maximum number of entries allowed; 0 means "no explicit limit" if accepted by implementation.
|
||||
* - cache: uthash root pointer; NULL when the cache is empty.
|
||||
*/
|
||||
struct CacheHeader
|
||||
{
|
||||
uint64_t max_items;
|
||||
struct CacheEntry *cache;
|
||||
uint64_t max_items; ///< Hard limit for number of entries (policy: enforce/ignore depends on implementation).
|
||||
struct CacheEntry *cache; ///< Hash root (uthash). NULL when empty.
|
||||
};
|
||||
|
||||
/**
|
||||
* Finds an item in the specified cache
|
||||
* @param cache Pointer to the cache header
|
||||
* @param key Key
|
||||
* @return Value if found, NULL if not
|
||||
*/
|
||||
void *find_in_cache(struct CacheHeader *cache, const char *key);
|
||||
|
||||
/**
|
||||
* Adds an item to the specified cache
|
||||
* @param cache Pointer to the cache header
|
||||
* @param key Key
|
||||
* @param value Value
|
||||
*/
|
||||
void add_to_cache(struct CacheHeader *cache, const char *key, void *value);
|
||||
|
||||
/**
|
||||
* Finds an item in the specified cache using a 64-bit integer key
|
||||
* @param cache Pointer to the cache header
|
||||
* @param key Key
|
||||
* @return Value if found, NULL if not
|
||||
*/
|
||||
void add_to_cache(struct CacheHeader *cache, const char *key, void *value);
|
||||
void *find_in_cache_uint64(struct CacheHeader *cache, uint64_t key);
|
||||
|
||||
/**
|
||||
* Adds an item to the specified cache using a 64-bit integer key
|
||||
* @param cache Pointer to the cache header
|
||||
* @param key Key
|
||||
* @param value Value
|
||||
*/
|
||||
void add_to_cache_uint64(struct CacheHeader *cache, uint64_t key, void *value);
|
||||
void add_to_cache_uint64(struct CacheHeader *cache, uint64_t key, void *value);
|
||||
|
||||
#endif // LIBAARUFORMAT_LRU_H
|
||||
|
||||
@@ -19,29 +19,80 @@
|
||||
#ifndef LIBAARUFORMAT_CHECKSUM_H
|
||||
#define LIBAARUFORMAT_CHECKSUM_H
|
||||
|
||||
#include <stdint.h> // Fixed-width integer types for on-disk structures.
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**
|
||||
* Checksum block, contains a checksum of all user data sectors (except for optical discs that is 2352 uint8_ts raw
|
||||
* sector if available
|
||||
* */
|
||||
typedef struct ChecksumHeader {
|
||||
/**Identifier, <see cref="BlockType.ChecksumBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**Length in uint8_ts of the block */
|
||||
uint32_t length;
|
||||
/**How many checksums follow */
|
||||
uint8_t entries;
|
||||
* \file aaruformat/structs/checksum.h
|
||||
* \brief On-disk layout definitions for the checksum block (BlockType::ChecksumBlock).
|
||||
*
|
||||
* A checksum block stores one or more whole-image (user data) checksums. For optical media the
|
||||
* user data definition follows the format's raw sector rules (e.g. 2352-byte raw sector when available).
|
||||
*
|
||||
* Binary layout (all integers are little-endian, structure is packed):
|
||||
*
|
||||
* +------------------------------+-------------------------------+
|
||||
* | Field | Size (bytes) |
|
||||
* +==============================+===============================+
|
||||
* | ChecksumHeader | sizeof(ChecksumHeader)=9 |
|
||||
* | identifier | 4 (BlockType::ChecksumBlock) |
|
||||
* | length | 4 (payload bytes that follow)|
|
||||
* | entries | 1 (number of checksum entries)|
|
||||
* +------------------------------+-------------------------------+
|
||||
* | Repeated for each entry: |
|
||||
* | ChecksumEntry | sizeof(ChecksumEntry)=5 |
|
||||
* | type | 1 (ChecksumAlgorithm) |
|
||||
* | length | 4 (digest length) |
|
||||
* | digest bytes | length |
|
||||
* +------------------------------+-------------------------------+
|
||||
*
|
||||
* Thus, the payload size (ChecksumHeader.length) MUST equal the sum over all entries of:
|
||||
* sizeof(ChecksumEntry) + entry.length.
|
||||
*
|
||||
* Typical digest lengths:
|
||||
* - Md5: 16 bytes
|
||||
* - Sha1: 20 bytes
|
||||
* - Sha256: 32 bytes
|
||||
* - SpamSum: variable length ASCII, NOT null-terminated on disk (a terminating '\0' may be appended in memory).
|
||||
*
|
||||
* \warning The structures are packed; never rely on host compiler default padding or directly casting from a buffer
|
||||
* without ensuring correct endianness if porting to big-endian systems (current implementation assumes LE).
|
||||
*
|
||||
* \see BlockType
|
||||
* \see ChecksumAlgorithm
|
||||
*/
|
||||
|
||||
/**
|
||||
* \struct ChecksumHeader
|
||||
* \brief Header that precedes the sequence of checksum entries for a checksum block.
|
||||
*
|
||||
* After this header, exactly \ref ChecksumHeader::length bytes follow containing \ref ChecksumHeader::entries
|
||||
* consecutive \ref ChecksumEntry records, each immediately followed by its digest payload.
|
||||
*/
|
||||
typedef struct ChecksumHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::ChecksumBlock.
|
||||
uint32_t length; ///< Length in bytes of the payload (all entries + their digest data, excluding this header).
|
||||
uint8_t entries; ///< Number of checksum entries that follow in the payload.
|
||||
} ChecksumHeader;
|
||||
|
||||
/**Checksum entry, followed by checksum data itself */
|
||||
typedef struct ChecksumEntry {
|
||||
/**Checksum algorithm */
|
||||
uint8_t type;
|
||||
/**Length in uint8_ts of checksum that follows this structure */
|
||||
uint32_t length;
|
||||
/**
|
||||
* \struct ChecksumEntry
|
||||
* \brief Per-checksum metadata immediately followed by the digest / signature bytes.
|
||||
*
|
||||
* For fixed-length algorithms the \ref length MUST match the known digest size. For SpamSum it is variable.
|
||||
* The bytes immediately following this structure (not null-terminated) constitute the digest and are exactly
|
||||
* \ref length bytes long.
|
||||
*
|
||||
* Order of entries is not mandated; readers should scan all entries and match by \ref type.
|
||||
*/
|
||||
typedef struct ChecksumEntry
|
||||
{
|
||||
uint8_t type; ///< Algorithm used (value from \ref ChecksumAlgorithm).
|
||||
uint32_t length; ///< Length in bytes of the digest that immediately follows this structure.
|
||||
} ChecksumEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_CHECKSUM_H
|
||||
#endif // LIBAARUFORMAT_CHECKSUM_H
|
||||
|
||||
@@ -19,37 +19,82 @@
|
||||
#ifndef LIBAARUFORMAT_DATA_H
|
||||
#define LIBAARUFORMAT_DATA_H
|
||||
|
||||
#include <stdint.h> // Fixed width integer types used in on-disk packed structs.
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Block header, precedes block data */
|
||||
typedef struct BlockHeader {
|
||||
/**Identifier, <see cref="BlockType.DataBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**Type of data contained by this block */
|
||||
uint16_t type;
|
||||
/**Compression algorithm used to compress the block */
|
||||
uint16_t compression;
|
||||
/**Size in uint8_ts of each sector contained in this block */
|
||||
uint32_t sectorSize;
|
||||
/**Compressed length for the block */
|
||||
uint32_t cmpLength;
|
||||
/**Uncompressed length for the block */
|
||||
uint32_t length;
|
||||
/**CRC64-ECMA of the compressed block */
|
||||
uint64_t cmpCrc64;
|
||||
/**CRC64-ECMA of the uncompressed block */
|
||||
uint64_t crc64;
|
||||
/**
|
||||
* \file aaruformat/structs/data.h
|
||||
* \brief On-disk layout structures for data-bearing and geometry blocks.
|
||||
*
|
||||
* These packed structures describe the headers that precede variable-length payloads
|
||||
* inside blocks whose identifiers are enumerated in \ref BlockType.
|
||||
* All integer fields are stored little-endian on disk. The library currently assumes a
|
||||
* little-endian host; if ported to a big-endian architecture explicit byte swapping will be required.
|
||||
*
|
||||
* Layout of a data block (BlockType::DataBlock):
|
||||
* BlockHeader (sizeof(BlockHeader) bytes)
|
||||
* Compressed payload (cmpLength bytes)
|
||||
*
|
||||
* Payload decoding:
|
||||
* - Apply the algorithm indicated by \ref BlockHeader::compression (\ref CompressionType) to the
|
||||
* cmpLength bytes following the header to obtain exactly \ref BlockHeader::length bytes.
|
||||
* - The uncompressed data MUST be an integer multiple of \ref BlockHeader::sectorSize.
|
||||
* - A CRC64-ECMA is provided for both compressed (cmpCrc64) and uncompressed (crc64) forms to allow
|
||||
* validation at either stage of the pipeline.
|
||||
*
|
||||
* Geometry block (BlockType::GeometryBlock) has a \ref GeometryBlockHeader followed by no additional
|
||||
* fixed payload in the current format version; it conveys legacy CHS-style logical geometry metadata.
|
||||
*
|
||||
* \warning These structs are packed; do not take their address and assume natural alignment.
|
||||
* \see BlockType
|
||||
* \see DataType
|
||||
* \see CompressionType
|
||||
*/
|
||||
|
||||
/**
|
||||
* \struct BlockHeader
|
||||
* \brief Header preceding the compressed data payload of a data block (BlockType::DataBlock).
|
||||
*
|
||||
* Invariants:
|
||||
* - cmpLength > 0 unless length == 0 (empty block)
|
||||
* - length == 0 implies cmpLength == 0
|
||||
* - If compression == CompressionType::None then cmpLength == length
|
||||
* - length % sectorSize == 0
|
||||
*
|
||||
* Validation strategy (recommended for readers):
|
||||
* 1. Verify identifier == BlockType::DataBlock.
|
||||
* 2. Verify sectorSize is non-zero and a power-of-two or a commonly used size (512/1024/2048/4096/2352).
|
||||
* 3. Verify invariants above and CRCs after (de)compression.
|
||||
*/
|
||||
typedef struct BlockHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DataBlock.
|
||||
uint16_t type; ///< Logical data classification (value from \ref DataType).
|
||||
uint16_t compression; ///< Compression algorithm used (value from \ref CompressionType).
|
||||
uint32_t sectorSize; ///< Size in bytes of each logical sector represented in this block.
|
||||
uint32_t cmpLength; ///< Size in bytes of the compressed payload immediately following this header.
|
||||
uint32_t length; ///< Size in bytes of the uncompressed payload resulting after decompression.
|
||||
uint64_t cmpCrc64; ///< CRC64-ECMA of the compressed payload (cmpLength bytes).
|
||||
uint64_t crc64; ///< CRC64-ECMA of the uncompressed payload (length bytes).
|
||||
} BlockHeader;
|
||||
|
||||
/**Geometry block, contains physical geometry information */
|
||||
typedef struct GeometryBlockHeader {
|
||||
/**Identifier, <see cref="BlockType.GeometryBlock" /> */
|
||||
uint32_t identifier;
|
||||
uint32_t cylinders;
|
||||
uint32_t heads;
|
||||
uint32_t sectorsPerTrack;
|
||||
/**
|
||||
* \struct GeometryBlockHeader
|
||||
* \brief Legacy CHS style logical geometry metadata (BlockType::GeometryBlock).
|
||||
*
|
||||
* Total logical sectors implied by this header is cylinders * heads * sectorsPerTrack.
|
||||
* Sector size is not included here and must be derived from context (e.g., accompanying metadata
|
||||
* or defaulting to 512 for many block devices).
|
||||
*/
|
||||
typedef struct GeometryBlockHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::GeometryBlock.
|
||||
uint32_t cylinders; ///< Number of cylinders.
|
||||
uint32_t heads; ///< Number of heads (tracks per cylinder).
|
||||
uint32_t sectorsPerTrack; ///< Number of sectors per track.
|
||||
} GeometryBlockHeader;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_DATA_H
|
||||
#endif // LIBAARUFORMAT_DATA_H
|
||||
|
||||
@@ -19,71 +19,149 @@
|
||||
#ifndef LIBAARUFORMAT_DDT_H
|
||||
#define LIBAARUFORMAT_DDT_H
|
||||
|
||||
#include <stdint.h> // fixed-width types for on-disk layout
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Header for a deduplication table. Table follows it */
|
||||
/** \file aaruformat/structs/ddt.h
|
||||
* \brief On-disk headers for Deduplication Data Tables (DDT) versions 1 and 2.
|
||||
*
|
||||
* A DDT maps logical sector indices (LBAs within an image's logical address space) to (block, sector)
|
||||
* pairs plus a base file offset, enabling content de-duplication inside the container. Two generations
|
||||
* exist:
|
||||
* - DdtHeader ("version 1") flat table.
|
||||
* - DdtHeader2 ("version 2") hierarchical, multi-level subtables for scalability.
|
||||
*
|
||||
* All integers are little-endian. Structures are packed (1-byte alignment). When porting to a big-endian
|
||||
* architecture callers must perform byte swapping. Do not rely on compiler-introduced padding.
|
||||
*
|
||||
* Compression of the table body (entries array) follows the same conventions as data blocks: first
|
||||
* decompress according to the compression enum, then validate CRC64 for uncompressed contents.
|
||||
*
|
||||
* Related enumerations:
|
||||
* - BlockType::DeDuplicationTable / BlockType::DeDuplicationTable2
|
||||
* - CompressionType
|
||||
* - DataType
|
||||
* - DdtSizeType (for DdtHeader2::sizeType)
|
||||
*/
|
||||
|
||||
/**
|
||||
* \struct DdtHeader
|
||||
* \brief Header preceding a version 1 (flat) deduplication table body.
|
||||
*
|
||||
* Immediately after this header there are \ref entries table records (compressed if \ref compression != None).
|
||||
* Each table record encodes a pointer using an 8-bit file offset component and a sector offset inside a block:
|
||||
* logicalEntryValue = ((uint64_t)fileByteOffset << shift) + sectorOffsetWithinBlock
|
||||
* where fileByteOffset is measured in bytes (granularity depends on shift) and sectorOffsetWithinBlock is
|
||||
* relative to the start of the referenced data block. The sector size must be taken from the corresponding
|
||||
* data block(s) (see BlockHeader::sectorSize) or higher-level metadata.
|
||||
*
|
||||
* Invariants:
|
||||
* - cmpLength == length if compression == CompressionType::None
|
||||
* - length % (entrySize) == 0 after decompression (implementation-defined entry size)
|
||||
* - entries * entrySize == length
|
||||
* - entries > 0 implies length > 0
|
||||
*/
|
||||
typedef struct DdtHeader
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
|
||||
uint32_t identifier;
|
||||
/**Type of data pointed by this DDT */
|
||||
uint16_t type;
|
||||
/**Compression algorithm used to compress the DDT */
|
||||
uint16_t compression;
|
||||
/**Each entry is ((uint8_t offset in file) << shift) + (sector offset in block) */
|
||||
uint8_t shift;
|
||||
/**How many entries are in the table */
|
||||
uint64_t entries;
|
||||
/**Compressed length for the DDT */
|
||||
uint64_t cmpLength;
|
||||
/**Uncompressed length for the DDT */
|
||||
uint64_t length;
|
||||
/**CRC64-ECMA of the compressed DDT */
|
||||
uint64_t cmpCrc64;
|
||||
/**CRC64-ECMA of the uncompressed DDT */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DeDuplicationTable.
|
||||
uint16_t type; ///< Data classification (\ref DataType) for sectors referenced by this table.
|
||||
uint16_t compression; ///< Compression algorithm for the table body (\ref CompressionType).
|
||||
uint8_t shift; ///< Left shift applied to per-entry file offset component forming logicalEntryValue.
|
||||
uint64_t entries; ///< Number of deduplication entries contained in (uncompressed) table.
|
||||
uint64_t cmpLength; ///< Size in bytes of compressed entries payload.
|
||||
uint64_t length; ///< Size in bytes of uncompressed entries payload.
|
||||
uint64_t cmpCrc64; ///< CRC64-ECMA of the compressed payload.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the uncompressed payload.
|
||||
} DdtHeader;
|
||||
|
||||
/**
|
||||
* \struct DdtHeader2
|
||||
* \brief Header preceding a version 2 hierarchical deduplication table.
|
||||
*
|
||||
* Version 2 introduces multi-level tables to efficiently address very large images by subdividing
|
||||
* the logical address space. Tables at higher levels partition regions; leaves contain direct
|
||||
* (block, sector) entry mappings. Navigation uses \ref tableLevel (0 = root) and \ref levels (total depth).
|
||||
*
|
||||
* Logical sector (LBA) mapping (actual implementation in decode_ddt_{single,multi}_level_v2):
|
||||
* 1. Let L be the requested logical sector (can be negative externally). Internal index I = L + negative.
|
||||
* Valid range: 0 <= I < blocks. (Total user-data sectors often = blocks - negative - overflow.)
|
||||
* 2. If tableShift == 0 (single-level): entryIndex = I.
|
||||
* Else (multi-level):
|
||||
* itemsPerPrimaryEntry = 1 << tableShift
|
||||
* primaryIndex = I / itemsPerPrimaryEntry
|
||||
* secondaryIndex = I % itemsPerPrimaryEntry
|
||||
* The primary table entry at primaryIndex yields a secondary DDT file offset (scaled by 2^blockAlignmentShift),
|
||||
* whose table entries are then indexed by secondaryIndex.
|
||||
* 3. Read raw DDT entry value E (16-bit if sizeType == SmallDdtSizeType, 32-bit if BigDdtSizeType).
|
||||
* 4. If E == 0: sector_status = SectorStatusNotDumped; offset=block_offset=0.
|
||||
* Otherwise extract:
|
||||
* statusBits = E >> 12 (small) or E >> 28 (big)
|
||||
* baseBits = E & 0x0FFF (small) or E & 0x0FFFFFFF (big)
|
||||
* sectorOffsetWithinBlock = baseBits & ((1 << dataShift) - 1)
|
||||
* blockIndex = baseBits >> dataShift
|
||||
* block_offset (bytes) = blockIndex << blockAlignmentShift
|
||||
* offset (sector units inside block) = sectorOffsetWithinBlock
|
||||
* 5. The consumer combines block_offset, offset, and the (external) logical sector size to locate data.
|
||||
*
|
||||
* Field roles:
|
||||
* - negative: Count of leading negative LBAs supported; added to L to form internal index.
|
||||
* - overflow: Count of trailing LBAs beyond the user area upper bound that are still dumped and have
|
||||
* normal DDT entries (e.g. optical disc lead-out). Symmetrical to 'negative' on the high end.
|
||||
* - start: For secondary tables, base internal index covered (written when creating new tables). Current decoding
|
||||
* logic does not consult this field (future-proof placeholder).
|
||||
* - blockAlignmentShift: log2 alignment of stored data blocks (byte granularity of block_offset).
|
||||
* - dataShift: log2 of the number of addressable sectors per increment of blockIndex bitfield unit.
|
||||
* - tableShift: log2 of number of logical sectors covered by a single primary-table pointer (multi-level only).
|
||||
* - sizeType: Selects entry width (small=16b, big=32b) impacting available bits for blockIndex+offset.
|
||||
*
|
||||
* Notes & current limitations:
|
||||
* - User area sector count = blocks - negative - overflow.
|
||||
* - Valid external LBA range exposed by the image = [-negative, (blocks - negative - 1)].
|
||||
* * Negative range: [-negative, -1]
|
||||
* * User area range: [0, (blocks - negative - overflow - 1)]
|
||||
* * Overflow range: [(blocks - negative - overflow), (blocks - negative - 1)]
|
||||
* - Both negative and overflow ranges are stored with normal DDT entries (if present), enabling complete
|
||||
* reproduction of lead-in / lead-out or similar padding regions.
|
||||
* - start is presently ignored during decoding; integrity checks against it may be added in future revisions.
|
||||
* - No masking is applied to I besides array bounds; callers must ensure L is within representable range.
|
||||
*
|
||||
* Example (Compact Disc):
|
||||
* Disc has 360000 user sectors. Lead-in captured as 15000 negative sectors and lead-out as 15000 overflow sectors.
|
||||
* negative = 15000
|
||||
* overflow = 15000
|
||||
* user sectors = 360000
|
||||
* blocks (internal span) = negative + user + overflow = 390000
|
||||
* External LBA spans: -15000 .. 374999
|
||||
* * Negative: -15000 .. -1 (15000 sectors)
|
||||
* * User: 0 .. 359999 (360000 sectors)
|
||||
* * Overflow: 360000 .. 374999 (15000 sectors)
|
||||
* Internal index I for any external L is I = L + negative.
|
||||
* User area sector count reported to callers (ctx->imageInfo.Sectors) = blocks - negative - overflow = 360000.
|
||||
*/
|
||||
typedef struct DdtHeader2
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.DeDuplicationTable" /> */
|
||||
uint32_t identifier;
|
||||
/**Type of data pointed by this DDT */
|
||||
uint16_t type;
|
||||
/**Compression algorithm used to compress the DDT */
|
||||
uint16_t compression;
|
||||
/**How many levels of subtables are present */
|
||||
uint8_t levels;
|
||||
/**Which level this table belongs to */
|
||||
uint8_t tableLevel;
|
||||
/**Pointer to absolute byte offset in file where the previous level table is located */
|
||||
uint64_t previousLevelOffset;
|
||||
/**Negative displacement of LBAs */
|
||||
uint16_t negative;
|
||||
/**Number of blocks in media */
|
||||
uint64_t blocks;
|
||||
/**Positive overflow displacement of LBAs */
|
||||
uint16_t overflow;
|
||||
/**First LBA contained in this table */
|
||||
uint64_t start;
|
||||
/**Block alignment boundaries */
|
||||
uint8_t blockAlignmentShift;
|
||||
/**Data shift */
|
||||
uint8_t dataShift;
|
||||
/**Table shift */
|
||||
uint8_t tableShift;
|
||||
/**Size type */
|
||||
uint8_t sizeType;
|
||||
/**Entries in this table */
|
||||
uint64_t entries;
|
||||
/**Compressed length for the DDT */
|
||||
uint64_t cmpLength;
|
||||
/**Uncompressed length for the DDT */
|
||||
uint64_t length;
|
||||
/**CRC64-ECMA of the compressed DDT */
|
||||
uint64_t cmpCrc64;
|
||||
/**CRC64-ECMA of the uncompressed DDT */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DeDuplicationTable2.
|
||||
uint16_t type; ///< Data classification (\ref DataType) for sectors referenced by this table.
|
||||
uint16_t compression; ///< Compression algorithm for this table body (\ref CompressionType).
|
||||
uint8_t levels; ///< Total number of hierarchy levels (root depth); > 0.
|
||||
uint8_t tableLevel; ///< Zero-based level index of this table (0 = root, increases downward).
|
||||
uint64_t previousLevelOffset; ///< Absolute byte offset of the parent (previous) level table; 0 if root.
|
||||
uint16_t negative; ///< Leading negative LBA count; added to external L to build internal index.
|
||||
uint64_t blocks; ///< Total internal span (negative + usable + overflow) in logical sectors.
|
||||
uint16_t overflow; ///< Trailing dumped sectors beyond user area (overflow range), still mapped with entries.
|
||||
uint64_t
|
||||
start; ///< Base internal index covered by this table (used for secondary tables; currently informational).
|
||||
uint8_t blockAlignmentShift; ///< 2^blockAlignmentShift = block alignment boundary in bytes.
|
||||
uint8_t dataShift; ///< 2^dataShift = sectors represented per increment in blockIndex field.
|
||||
uint8_t tableShift; ///< 2^tableShift = number of logical sectors per primary entry (multi-level only; 0 for
|
||||
///< single-level or secondary tables).
|
||||
uint8_t sizeType; ///< Entry size variant (\ref DdtSizeType) controlling width of E.
|
||||
uint64_t entries; ///< Number of entries contained in (uncompressed) table payload.
|
||||
uint64_t cmpLength; ///< Compressed payload size in bytes.
|
||||
uint64_t length; ///< Uncompressed payload size in bytes.
|
||||
uint64_t cmpCrc64; ///< CRC64-ECMA of compressed table payload.
|
||||
uint64_t crc64; ///< CRC64-ECMA of uncompressed table payload.
|
||||
} DdtHeader2;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -19,42 +19,109 @@
|
||||
#ifndef LIBAARUFORMAT_DUMP_H
|
||||
#define LIBAARUFORMAT_DUMP_H
|
||||
|
||||
#include <stdint.h> /* Fixed-width integer types for on‑disk packed structures */
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Dump hardware block, contains a list of hardware used to dump the media on this image */
|
||||
typedef struct DumpHardwareHeader {
|
||||
/**Identifier, <see cref="BlockType.DumpHardwareBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint16_t entries;
|
||||
/**Size of the whole block, not including this header, in uint8_ts */
|
||||
uint32_t length;
|
||||
/**CRC64-ECMA of the block */
|
||||
uint64_t crc64;
|
||||
/** \file aaruformat/structs/dump.h
|
||||
* \brief Packed on-disk structures describing hardware and software used during image acquisition.
|
||||
*
|
||||
* A Dump Hardware block (identifier = BlockType::DumpHardwareBlock) records one or more dump "environments" –
|
||||
* typically combinations of a physical device (drive, controller, adapter) and the software stack that
|
||||
* performed the read operation. Each environment is represented by a \ref DumpHardwareEntry followed by a
|
||||
* sequence of UTF‑8 strings and an optional array of extent ranges (\ref DumpExtent, defined in context.h) that
|
||||
* delimit portions of the medium this environment contributed to.
|
||||
*
|
||||
* Binary layout (little-endian, packed, all multi-byte integers LE):
|
||||
*
|
||||
* DumpHardwareHeader (sizeof = 16 bytes)
|
||||
* identifier (4) -> BlockType::DumpHardwareBlock
|
||||
* entries (2) -> number of following hardware entries
|
||||
* length (4) -> total bytes of payload that follow this header
|
||||
* crc64 (8) -> CRC64-ECMA of the payload bytes
|
||||
*
|
||||
* Repeated for i in [0, entries):
|
||||
* DumpHardwareEntry (36 bytes)
|
||||
* manufacturerLength (4)
|
||||
* modelLength (4)
|
||||
* revisionLength (4)
|
||||
* firmwareLength (4)
|
||||
* serialLength (4)
|
||||
* softwareNameLength (4)
|
||||
* softwareVersionLength (4)
|
||||
* softwareOperatingSystemLength (4)
|
||||
* extents (4) -> number of DumpExtent structs after the strings
|
||||
*
|
||||
* Variable-length UTF-8 strings (not NUL-terminated on disk) appear immediately after the entry, in the
|
||||
* exact order of the length fields above; each string is present only if its length > 0. The reader allocates
|
||||
* an extra byte to append '\0' for in-memory convenience.
|
||||
*
|
||||
* Array of 'extents' DumpExtent structures (each 16 bytes: start, end) follows the strings if extents > 0.
|
||||
* The semantic of each extent is an inclusive [start, end] logical sector (or unit) range contributed by
|
||||
* this hardware/software combination.
|
||||
*
|
||||
* CRC semantics:
|
||||
* - crc64 covers exactly 'length' bytes immediately following the header.
|
||||
* - For legacy images with header.imageMajorVersion <= AARUF_VERSION_V1 the original C# writer produced a
|
||||
* byte-swapped CRC; the library compensates internally (see process_dumphw_block()).
|
||||
*
|
||||
* Invariants / validation recommendations:
|
||||
* - identifier == BlockType::DumpHardwareBlock
|
||||
* - Accumulated size of all (entry + strings + extents arrays) == length
|
||||
* - All length fields are trusted only after bounds checking against remaining payload bytes
|
||||
* - Strings are raw UTF-8 data with no implicit terminator
|
||||
* - extents * sizeof(DumpExtent) fits inside remaining payload
|
||||
*
|
||||
* Memory management notes (runtime library):
|
||||
* - Each string is malloc'ed with +1 byte for terminator during processing.
|
||||
* - Extents array is malloc'ed per entry when extents > 0.
|
||||
* - See aaruformatContext::dumpHardwareEntriesWithData for owning pointers.
|
||||
*
|
||||
* \warning Structures are packed; never rely on natural alignment when mapping from a byte buffer.
|
||||
* \see DumpHardwareHeader
|
||||
* \see DumpHardwareEntry
|
||||
* \see DumpExtent (in context.h)
|
||||
* \see BlockType
|
||||
*/
|
||||
|
||||
/** \struct DumpHardwareHeader
|
||||
* \brief Header that precedes a sequence of dump hardware entries and their variable-length payload.
|
||||
*/
|
||||
typedef struct DumpHardwareHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::DumpHardwareBlock.
|
||||
uint16_t entries; ///< Number of DumpHardwareEntry records that follow.
|
||||
uint32_t length; ///< Total payload bytes after this header (sum of entries, strings, and extents arrays).
|
||||
uint64_t crc64; ///< CRC64-ECMA of the payload (byte-swapped for legacy v1 images, handled automatically).
|
||||
} DumpHardwareHeader;
|
||||
|
||||
/**Dump hardware entry, contains length of strings that follow, in the same order as the length, this structure */
|
||||
typedef struct DumpHardwareEntry {
|
||||
/**Length of UTF-8 manufacturer string */
|
||||
uint32_t manufacturerLength;
|
||||
/**Length of UTF-8 model string */
|
||||
uint32_t modelLength;
|
||||
/**Length of UTF-8 revision string */
|
||||
uint32_t revisionLength;
|
||||
/**Length of UTF-8 firmware version string */
|
||||
uint32_t firmwareLength;
|
||||
/**Length of UTF-8 serial string */
|
||||
uint32_t serialLength;
|
||||
/**Length of UTF-8 software name string */
|
||||
uint32_t softwareNameLength;
|
||||
/**Length of UTF-8 software version string */
|
||||
uint32_t softwareVersionLength;
|
||||
/**Length of UTF-8 software operating system string */
|
||||
uint32_t softwareOperatingSystemLength;
|
||||
/**How many extents are after the strings */
|
||||
uint32_t extents;
|
||||
/** \struct DumpHardwareEntry
|
||||
* \brief Per-environment length table describing subsequent UTF-8 strings and optional extent array.
|
||||
*
|
||||
* Immediately after this structure the variable-length UTF‑8 strings appear in the documented order, each
|
||||
* present only if its corresponding length is non-zero. No padding is present between strings. When all
|
||||
* strings are consumed, an array of \ref DumpExtent follows if \ref extents > 0.
|
||||
*
|
||||
* All length fields measure bytes (not characters) and exclude any in-memory NUL terminator added by the reader.
|
||||
*
|
||||
* Typical semantics:
|
||||
* - manufacturer/model/revision/firmware/serial identify the hardware device.
|
||||
* - softwareName/softwareVersion/softwareOperatingSystem identify the acquisition software environment.
|
||||
* - extents list which logical ranges this environment actually dumped (useful for multi-device composites).
|
||||
*/
|
||||
typedef struct DumpHardwareEntry
|
||||
{
|
||||
uint32_t manufacturerLength; ///< Length in bytes of manufacturer UTF-8 string.
|
||||
uint32_t modelLength; ///< Length in bytes of model UTF-8 string.
|
||||
uint32_t revisionLength; ///< Length in bytes of revision / hardware revision string.
|
||||
uint32_t firmwareLength; ///< Length in bytes of firmware version string.
|
||||
uint32_t serialLength; ///< Length in bytes of device serial number string.
|
||||
uint32_t softwareNameLength; ///< Length in bytes of dumping software name string.
|
||||
uint32_t softwareVersionLength; ///< Length in bytes of dumping software version string.
|
||||
uint32_t softwareOperatingSystemLength; ///< Length in bytes of host operating system string.
|
||||
uint32_t extents; ///< Number of DumpExtent records following the strings (0 = none).
|
||||
} DumpHardwareEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_DUMP_H
|
||||
#endif // LIBAARUFORMAT_DUMP_H
|
||||
|
||||
@@ -19,73 +19,111 @@
|
||||
#ifndef LIBAARUFORMAT_HEADER_H
|
||||
#define LIBAARUFORMAT_HEADER_H
|
||||
|
||||
#define AARU_HEADER_APP_NAME_LEN 64
|
||||
#define GUID_SIZE 16
|
||||
/** \file aaruformat/structs/header.h
|
||||
* \brief On-disk container header structures (v1 and v2) for Aaru images.
|
||||
*
|
||||
* These packed headers appear at the very beginning (offset 0) of every Aaru image file and
|
||||
* advertise container format version, creator application, indexing offset and optional extended
|
||||
* feature capability bitfields (v2+). All multi-byte integers are little-endian. Strings stored
|
||||
* in the fixed-size application field are UTF‑16LE and zero padded (not necessarily NUL-terminated
|
||||
* if fully filled). The GUID field (v2) allows derivative / child images to reference an origin.
|
||||
*
|
||||
* Version progression:
|
||||
* - v1: \ref AaruHeader (no GUID, no alignment or shift metadata, no feature bitfields).
|
||||
* - v2: \ref AaruHeaderV2 introduces GUID, block/data/table shift hints (mirroring DDT metadata),
|
||||
* and three 64‑bit feature bitmaps to negotiate reader/writer compatibility.
|
||||
*
|
||||
* Compatibility handling (recommended logic for consumers):
|
||||
* 1. If any bit set in featureIncompatible is not implemented by the reader: abort (cannot safely read/write).
|
||||
* 2. Else if any bit set in featureCompatibleRo is not implemented: allow read‑only operations.
|
||||
* 3. Bits only present in featureCompatible but not implemented MAY be ignored for both read/write while
|
||||
* still preserving round‑trip capability (writer should not clear unknown bits when re‑saving).
|
||||
*
|
||||
* Alignment & shift semantics (duplicated here for quick reference, see DdtHeader2 for full details):
|
||||
* - blockAlignmentShift: underlying blocks are aligned to 2^blockAlignmentShift bytes.
|
||||
* - dataShift: data pointer / DDT entry low bits encode offsets modulo 2^dataShift sectors/items.
|
||||
* - tableShift: primary DDT entries span 2^tableShift logical sectors (0 implies single-level tables).
|
||||
*
|
||||
* Invariants:
|
||||
* - identifier == AARU_MAGIC (external constant; not defined here).
|
||||
* - For v1: sizeof(AaruHeader) exact and indexOffset > 0 (indexOffset == 0 => corrupt/unreadable image).
|
||||
* - For v2: sizeof(AaruHeaderV2) exact; indexOffset > 0; blockAlignmentShift, dataShift, tableShift within
|
||||
* sane bounds (e.g. < 63). Zero is permissible only for the shift fields (not for indexOffset).
|
||||
*
|
||||
* Security / robustness considerations:
|
||||
* - Always bounds-check indexOffset against file size before seeking.
|
||||
* - Treat application field as untrusted UTF‑16LE; validate surrogate pairs if necessary.
|
||||
* - Unknown feature bits MUST be preserved if a file is rewritten to avoid capability loss.
|
||||
*/
|
||||
|
||||
#define AARU_HEADER_APP_NAME_LEN 64 /**< Size in bytes (UTF-16LE) of application name field (32 UTF-16 code units). */
|
||||
#define GUID_SIZE 16 /**< Size in bytes of GUID / UUID-like binary identifier. */
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Header, at start of file */
|
||||
typedef struct AaruHeader {
|
||||
/**Header identifier, <see cref="AARU_MAGIC" /> */
|
||||
uint64_t identifier;
|
||||
/**UTF-16LE name of the application that created the image */
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN];
|
||||
/**Image format major version. A new major version means a possibly incompatible change of format */
|
||||
uint8_t imageMajorVersion;
|
||||
/**Image format minor version. A new minor version indicates a compatible change of format */
|
||||
uint8_t imageMinorVersion;
|
||||
/**Major version of the application that created the image */
|
||||
uint8_t applicationMajorVersion;
|
||||
/**Minor version of the application that created the image */
|
||||
uint8_t applicationMinorVersion;
|
||||
/**Type of media contained on image */
|
||||
uint32_t mediaType;
|
||||
/**Offset to index */
|
||||
uint64_t indexOffset;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
|
||||
int64_t creationTime;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
|
||||
int64_t lastWrittenTime;
|
||||
/** \struct AaruHeader
|
||||
* \brief Version 1 container header placed at offset 0 for legacy / initial format.
|
||||
*
|
||||
* Field summary:
|
||||
* - identifier: magic signature (AARU_MAGIC) identifying the container.
|
||||
* - application: UTF‑16LE creator application name (fixed 64 bytes, zero padded).
|
||||
* - imageMajorVersion / imageMinorVersion: container format version of the file itself (not the app).
|
||||
* - applicationMajorVersion / applicationMinorVersion: version of the creating application.
|
||||
* - mediaType: media type enumeration (\ref MediaType).
|
||||
* - indexOffset: byte offset to the first index block (must be > 0).
|
||||
* - creationTime / lastWrittenTime: 64-bit Windows FILETIME timestamps (100 ns intervals since 1601-01-01 UTC).
|
||||
*/
|
||||
typedef struct AaruHeader
|
||||
{
|
||||
uint64_t identifier; ///< File magic (AARU_MAGIC).
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN]; ///< UTF-16LE creator application name (fixed-size buffer).
|
||||
uint8_t imageMajorVersion; ///< Container format major version (incompatible changes when incremented).
|
||||
uint8_t imageMinorVersion; ///< Container format minor version (backward compatible evolutions).
|
||||
uint8_t applicationMajorVersion; ///< Creator application major version.
|
||||
uint8_t applicationMinorVersion; ///< Creator application minor / patch version.
|
||||
uint32_t mediaType; ///< Media type enumeration (value from \ref MediaType).
|
||||
uint64_t indexOffset; ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
|
||||
int64_t creationTime; ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
|
||||
int64_t lastWrittenTime; ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
|
||||
} AaruHeader;
|
||||
|
||||
/**Header, at start of file */
|
||||
typedef struct AaruHeaderV2 {
|
||||
/**Header identifier, see AARU_MAGIC */
|
||||
uint64_t identifier;
|
||||
/**UTF-16LE name of the application that created the image */
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN];
|
||||
/**Image format major version. A new major version means a possibly incompatible change of format */
|
||||
uint8_t imageMajorVersion;
|
||||
/**Image format minor version. A new minor version indicates a compatible change of format */
|
||||
uint8_t imageMinorVersion;
|
||||
/**Major version of the application that created the image */
|
||||
uint8_t applicationMajorVersion;
|
||||
/**Minor version of the application that created the image */
|
||||
uint8_t applicationMinorVersion;
|
||||
/**Type of media contained on image */
|
||||
uint32_t mediaType;
|
||||
/**Offset to index */
|
||||
uint64_t indexOffset;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image creation time */
|
||||
int64_t creationTime;
|
||||
/**Windows filetime (100 nanoseconds since 1601/01/01 00:00:00 UTC) of image last written time */
|
||||
int64_t lastWrittenTime;
|
||||
/**Unique identifier that allows children images to recognize and find this image.*/
|
||||
uint8_t guid[GUID_SIZE];
|
||||
/**Block alignment shift. All blocks in the image are aligned at 2 << blockAlignmentShift bytes */
|
||||
uint8_t blockAlignmentShift;
|
||||
/**Data shift. All data blocks in the image contain 2 << dataShift items at most */
|
||||
uint8_t dataShift;
|
||||
/**Table shift. All deduplication tables in the image use this shift to calculate the position of an item */
|
||||
uint8_t tableShift;
|
||||
/**Features used in this image that if unsupported are still compatible for reading and writing implementations */
|
||||
uint64_t featureCompatible;
|
||||
/**Features used in this image that if unsupported are still compatible for reading implementations but not for writing */
|
||||
uint64_t featureCompatibleRo;
|
||||
/**Featured used in this image that if unsupported prevent reading or writing the image*/
|
||||
uint64_t featureIncompatible;
|
||||
/** \struct AaruHeaderV2
|
||||
* \brief Version 2 container header with GUID, alignment shifts, and feature negotiation bitmaps.
|
||||
*
|
||||
* Additions over v1:
|
||||
* - guid: stable 128-bit identifier enabling linkage by derivative images.
|
||||
* - blockAlignmentShift / dataShift / tableShift: global structural hints copied into data & DDT blocks.
|
||||
* - featureCompatible / featureCompatibleRo / featureIncompatible: capability bitmasks.
|
||||
*
|
||||
* Feature bitmask semantics:
|
||||
* - featureCompatible: Optional features; absence of implementation should not impact R/W correctness.
|
||||
* - featureCompatibleRo: If unimplemented, image MAY be opened read-only.
|
||||
* - featureIncompatible: If any bit unimplemented, image MUST NOT be opened (prevent misinterpretation).
|
||||
*
|
||||
* Readers should AND their supported bit set with the header masks to decide access level (see file
|
||||
* documentation). Writers must preserve unknown bits when saving an existing image.
|
||||
*/
|
||||
typedef struct AaruHeaderV2
|
||||
{
|
||||
uint64_t identifier; ///< File magic (AARU_MAGIC).
|
||||
uint8_t application[AARU_HEADER_APP_NAME_LEN]; ///< UTF-16LE creator application name (fixed 64 bytes).
|
||||
uint8_t imageMajorVersion; ///< Container format major version.
|
||||
uint8_t imageMinorVersion; ///< Container format minor version.
|
||||
uint8_t applicationMajorVersion; ///< Creator application major version.
|
||||
uint8_t applicationMinorVersion; ///< Creator application minor / patch version.
|
||||
uint32_t mediaType; ///< Media type enumeration (value from \ref MediaType).
|
||||
uint64_t indexOffset; ///< Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
|
||||
int64_t creationTime; ///< Creation FILETIME (100 ns since 1601-01-01 UTC).
|
||||
int64_t lastWrittenTime; ///< Last modification FILETIME (100 ns since 1601-01-01 UTC).
|
||||
uint8_t guid[GUID_SIZE]; ///< 128-bit image GUID (binary, not text); stable across children.
|
||||
uint8_t blockAlignmentShift; ///< log2 block alignment (block size alignment = 2^blockAlignmentShift bytes).
|
||||
uint8_t dataShift; ///< log2 sectors/items per block-index increment in DDT entries (2^dataShift).
|
||||
uint8_t tableShift; ///< log2 sectors spanned by each primary DDT entry (0 = single-level).
|
||||
uint64_t featureCompatible; ///< Feature bits: unimplemented bits are ignorable (still R/W safe).
|
||||
uint64_t featureCompatibleRo; ///< Feature bits: unimplemented -> degrade to read-only access.
|
||||
uint64_t featureIncompatible; ///< Feature bits: any unimplemented -> abort (cannot open safely).
|
||||
} AaruHeaderV2;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_HEADER_H
|
||||
#endif // LIBAARUFORMAT_HEADER_H
|
||||
|
||||
@@ -21,50 +21,95 @@
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Header for the index, followed by entries */
|
||||
/** \file aaruformat/structs/index.h
|
||||
* \brief On‑disk index block header and entry structures (versions 1, 2 and 3).
|
||||
*
|
||||
* The index provides a directory of all blocks contained in an Aaru image. Each index block starts with
|
||||
* a versioned header (IndexHeader / IndexHeader2 / IndexHeader3) followed by a contiguous array of
|
||||
* fixed‑size \ref IndexEntry records. Version 3 adds support for hierarchical (chained / nested) subindexes.
|
||||
*
|
||||
* Version mapping by block identifier (see \ref BlockType):
|
||||
* - IndexBlock (v1) -> \ref IndexHeader followed by 16‑bit entry count entries.
|
||||
* - IndexBlock2 (v2) -> \ref IndexHeader2 followed by 64‑bit entry count entries.
|
||||
* - IndexBlock3 (v3) -> \ref IndexHeader3 with optional hierarchical subindex references.
|
||||
*
|
||||
* CRC coverage & endianness:
|
||||
* - The crc64 field stores a CRC64-ECMA over the entries array ONLY (header bytes are excluded).
|
||||
* - For images with imageMajorVersion <= AARUF_VERSION_V1 a legacy writer byte-swapped the CRC; readers
|
||||
* compensate (see verify_index_v1/v2/v3). The value in the header remains whatever was originally written.
|
||||
*
|
||||
* Hierarchical (v3) behavior:
|
||||
* - Entries whose blockType == IndexBlock3 refer to subindex blocks; readers recursively load and flatten.
|
||||
* - IndexHeader3::previous can point to a preceding index segment (for append / incremental scenarios) or 0.
|
||||
* - CRC of the main index does NOT cover subindex contents; each subindex has its own header + CRC.
|
||||
*
|
||||
* Invariants / validation recommendations:
|
||||
* - identifier must equal the expected BlockType variant for that version.
|
||||
* - entries > 0 implies the entries array byte size == entries * sizeof(IndexEntry).
|
||||
* - crc64 must match recomputed CRC64( entries array ) (after legacy byte swap handling if required).
|
||||
* - For v3, if previous != 0 it should point to another IndexBlock3 header (optional best‑effort check).
|
||||
*
|
||||
* Notes:
|
||||
* - Structures are packed (1‑byte alignment). All multi-byte integers are little‑endian on disk.
|
||||
* - The index does not store per-entry CRC; integrity relies on each individual block's own CRC plus the index CRC.
|
||||
* - dataType in \ref IndexEntry is meaningful only for block types that carry typed data (e.g. DataBlock,
|
||||
* DumpHardwareBlock, etc.).
|
||||
*
|
||||
* See also: verify_index_v1(), verify_index_v2(), verify_index_v3() for integrity procedures.
|
||||
*/
|
||||
|
||||
/** \struct IndexHeader
|
||||
* \brief Index header (version 1) for legacy images (identifier == IndexBlock).
|
||||
*
|
||||
* Uses a 16‑bit entry counter limiting the number of indexable blocks in v1.
|
||||
*/
|
||||
typedef struct IndexHeader
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.Index" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint16_t entries;
|
||||
/**CRC64-ECMA of the index */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock).
|
||||
uint16_t entries; ///< Number of \ref IndexEntry records that follow immediately.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the entries array (legacy byte-swapped for early images).
|
||||
} IndexHeader;
|
||||
|
||||
/**Header for the index, followed by entries */
|
||||
/** \struct IndexHeader2
|
||||
* \brief Index header (version 2) with 64‑bit entry counter (identifier == IndexBlock2).
|
||||
*
|
||||
* Enlarges the entry count field to 64 bits for large images; otherwise structurally identical to v1.
|
||||
*/
|
||||
typedef struct IndexHeader2
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.Index" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint64_t entries;
|
||||
/**CRC64-ECMA of the index */
|
||||
uint64_t crc64;
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock2).
|
||||
uint64_t entries; ///< Number of \ref IndexEntry records that follow immediately.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the entries array (legacy byte-swapped rule still applies for old versions).
|
||||
} IndexHeader2;
|
||||
|
||||
/**Header for the index, followed by entries */
|
||||
/** \struct IndexHeader3
|
||||
* \brief Index header (version 3) adding hierarchical chaining (identifier == IndexBlock3).
|
||||
*
|
||||
* Supports flattened hierarchical indexes: entries referencing additional IndexBlock3 subindexes.
|
||||
* The 'previous' pointer allows chaining earlier index segments (e.g., incremental append) enabling
|
||||
* cumulative discovery without rewriting earlier headers.
|
||||
*/
|
||||
typedef struct IndexHeader3
|
||||
{
|
||||
/**Identifier, <see cref="BlockType.Index" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint64_t entries;
|
||||
/**CRC64-ECMA of the index */
|
||||
uint64_t crc64;
|
||||
/**Pointer to the previous index header */
|
||||
uint64_t previous;
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::IndexBlock3).
|
||||
uint64_t entries; ///< Number of \ref IndexEntry records that follow in this (sub)index block.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the local entries array (does NOT cover subindexes or previous chains).
|
||||
uint64_t previous; ///< File offset of a previous IndexBlock3 header (0 if none / root segment).
|
||||
} IndexHeader3;
|
||||
|
||||
/**Index entry */
|
||||
/** \struct IndexEntry
|
||||
* \brief Single index entry describing a block's type, (optional) data classification, and file offset.
|
||||
*
|
||||
* Semantics by blockType (see \ref BlockType):
|
||||
* - DataBlock / GeometryBlock / ChecksumBlock / etc.: dataType conveys specific stored data category (\ref DataType).
|
||||
* - Deduplication (DDT) or Index blocks: dataType may be ignored or set to a sentinel.
|
||||
* - IndexBlock3: this entry refers to a subindex; offset points to another IndexHeader3.
|
||||
*/
|
||||
typedef struct IndexEntry
|
||||
{
|
||||
/**Type of item pointed by this entry */
|
||||
uint32_t blockType;
|
||||
/**Type of data contained by the block pointed by this entry */
|
||||
uint16_t dataType;
|
||||
/**Offset in file where item is stored */
|
||||
uint64_t offset;
|
||||
uint32_t blockType; ///< Block identifier of the referenced block (value from \ref BlockType).
|
||||
uint16_t dataType; ///< Data classification (value from \ref DataType) or unused for untyped blocks.
|
||||
uint64_t offset; ///< Absolute byte offset in the image where the referenced block header begins.
|
||||
} IndexEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
@@ -21,73 +21,95 @@
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Metadata block, contains metadata */
|
||||
typedef struct MetadataBlockHeader {
|
||||
/**Identifier, <see cref="BlockType.MetadataBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**Size in uint8_ts of this whole metadata block */
|
||||
uint32_t blockSize;
|
||||
/**Sequence of media set this media belongs to */
|
||||
int32_t mediaSequence;
|
||||
/**Total number of media on the media set this media belongs to */
|
||||
int32_t lastMediaSequence;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t creatorOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t creatorLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t commentsOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t commentsLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaTitleOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaTitleLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaManufacturerOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaManufacturerLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaModelOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaModelLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaSerialNumberOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaSerialNumberLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaBarcodeOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaBarcodeLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t mediaPartNumberOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t mediaPartNumberLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveManufacturerOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveManufacturerLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveModelOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveModelLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveSerialNumberOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveSerialNumberLength;
|
||||
/**Offset to start of creator string from start of this block */
|
||||
uint32_t driveFirmwareRevisionOffset;
|
||||
/**Length in uint8_ts of the null-terminated UTF-16LE creator string */
|
||||
uint32_t driveFirmwareRevisionLength;
|
||||
/** \file aaruformat/structs/metadata.h
|
||||
* \brief Packed on-disk metadata block headers for descriptive strings and CICM XML (if present).
|
||||
*
|
||||
* Two metadata-related block header layouts are defined:
|
||||
* - \ref MetadataBlockHeader (BlockType::MetadataBlock): offsets + lengths for several UTF-16LE strings.
|
||||
* - \ref CicmMetadataBlock (BlockType::CicmBlock): length of embedded CICM XML metadata payload.
|
||||
*
|
||||
* All multi-byte integers are little-endian. Structures are packed (1-byte alignment). All textual fields
|
||||
* referenced by offsets are UTF-16LE, null-terminated (0x0000). Length fields include the terminating
|
||||
* null (i.e. length >= 2 and an even number). Offsets are relative to the start of the corresponding block
|
||||
* header (byte 0 = first byte of the header). No padding is implicitly added between strings; producers
|
||||
* may pack them tightly or align them manually (alignment not required by the specification).
|
||||
*
|
||||
* Metadata block layout (conceptual):
|
||||
* MetadataBlockHeader (fixed size)
|
||||
* <variable region holding each present UTF-16LE string in any order chosen by the writer>
|
||||
*
|
||||
* Invariants / validation recommendations for MetadataBlockHeader:
|
||||
* - identifier == BlockType::MetadataBlock
|
||||
* - blockSize >= sizeof(MetadataBlockHeader)
|
||||
* - For every (offset,length) pair where length > 0:
|
||||
* * offset >= sizeof(MetadataBlockHeader)
|
||||
* * offset + length <= blockSize
|
||||
* * length % 2 == 0
|
||||
* * The 16-bit code unit at (offset + length - 2) == 0x0000 (null terminator)
|
||||
* - mediaSequence >= 0 and lastMediaSequence >= 0; if lastMediaSequence > 0 then 0 <= mediaSequence <
|
||||
* lastMediaSequence
|
||||
*
|
||||
* CICM metadata block layout:
|
||||
* CicmMetadataBlock (header)
|
||||
* <length bytes of UTF-8 or XML text payload (implementation-defined, not null-terminated)>
|
||||
*
|
||||
* NOTE: The library code reading these blocks must not assume strings are present; a zero length means the
|
||||
* corresponding field is omitted. Offsets for omitted fields MAY be zero or arbitrary; readers should skip them
|
||||
* whenever length == 0.
|
||||
*/
|
||||
|
||||
/** \struct MetadataBlockHeader
|
||||
* \brief Header for a metadata block containing offsets and lengths to UTF-16LE descriptive strings.
|
||||
*
|
||||
* Descriptive fields (all optional): creator, comments, media title/manufacturer/model/serial/barcode/part number,
|
||||
* drive manufacturer/model/serial/firmware revision. Strings can be used to describe both physical medium and
|
||||
* acquisition hardware. Length values include the UTF-16LE null terminator (two zero bytes).
|
||||
*/
|
||||
typedef struct MetadataBlockHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::MetadataBlock.
|
||||
uint32_t blockSize; ///< Total size in bytes of the entire metadata block (header + strings).
|
||||
int32_t mediaSequence; ///< Sequence number within a multi-disc / multi-volume set (0-based or 1-based as
|
||||
///< producer defines).
|
||||
int32_t lastMediaSequence; ///< Total number of media in the set; 0 or 1 if single item.
|
||||
uint32_t creatorOffset; ///< Offset to UTF-16LE creator string (or undefined if creatorLength==0).
|
||||
uint32_t creatorLength; ///< Length in bytes (including null) of creator string (0 if absent).
|
||||
uint32_t commentsOffset; ///< Offset to UTF-16LE comments string.
|
||||
uint32_t commentsLength; ///< Length in bytes (including null) of comments string.
|
||||
uint32_t mediaTitleOffset; ///< Offset to UTF-16LE media title string.
|
||||
uint32_t mediaTitleLength; ///< Length in bytes (including null) of media title string.
|
||||
uint32_t mediaManufacturerOffset; ///< Offset to UTF-16LE media manufacturer string.
|
||||
uint32_t mediaManufacturerLength; ///< Length in bytes (including null) of media manufacturer string.
|
||||
uint32_t mediaModelOffset; ///< Offset to UTF-16LE media model string.
|
||||
uint32_t mediaModelLength; ///< Length in bytes (including null) of media model string.
|
||||
uint32_t mediaSerialNumberOffset; ///< Offset to UTF-16LE media serial number string.
|
||||
uint32_t mediaSerialNumberLength; ///< Length in bytes (including null) of media serial number string.
|
||||
uint32_t mediaBarcodeOffset; ///< Offset to UTF-16LE media barcode string.
|
||||
uint32_t mediaBarcodeLength; ///< Length in bytes (including null) of media barcode string.
|
||||
uint32_t mediaPartNumberOffset; ///< Offset to UTF-16LE media part number string.
|
||||
uint32_t mediaPartNumberLength; ///< Length in bytes (including null) of media part number string.
|
||||
uint32_t driveManufacturerOffset; ///< Offset to UTF-16LE drive manufacturer string.
|
||||
uint32_t driveManufacturerLength; ///< Length in bytes (including null) of drive manufacturer string.
|
||||
uint32_t driveModelOffset; ///< Offset to UTF-16LE drive model string.
|
||||
uint32_t driveModelLength; ///< Length in bytes (including null) of drive model string.
|
||||
uint32_t driveSerialNumberOffset; ///< Offset to UTF-16LE drive serial number string.
|
||||
uint32_t driveSerialNumberLength; ///< Length in bytes (including null) of drive serial number string.
|
||||
uint32_t driveFirmwareRevisionOffset; ///< Offset to UTF-16LE drive firmware revision string.
|
||||
uint32_t driveFirmwareRevisionLength; ///< Length in bytes (including null) of drive firmware revision string.
|
||||
} MetadataBlockHeader;
|
||||
|
||||
/**Geometry block, contains physical geometry information */
|
||||
typedef struct CicmMetadataBlock {
|
||||
/**Identifier, <see cref="BlockType.CicmBlock" /> */
|
||||
uint32_t identifier;
|
||||
uint32_t length;
|
||||
/** \struct CicmMetadataBlock
|
||||
* \brief Header for a CICM XML metadata block (identifier == BlockType::CicmBlock).
|
||||
*
|
||||
* The following 'length' bytes immediately after the header contain the CICM XML payload. Encoding is typically
|
||||
* UTF-8; the payload is not required to be null-terminated.
|
||||
*/
|
||||
typedef struct CicmMetadataBlock
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier, must be BlockType::CicmBlock.
|
||||
uint32_t length; ///< Length in bytes of the CICM metadata payload that follows.
|
||||
} CicmMetadataBlock;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_METADATA_H
|
||||
#endif // LIBAARUFORMAT_METADATA_H
|
||||
|
||||
@@ -21,36 +21,65 @@
|
||||
|
||||
#pragma pack(push, 1)
|
||||
|
||||
/**Contains list of optical disc tracks */
|
||||
typedef struct TracksHeader {
|
||||
/**Identifier, <see cref="BlockType.TracksBlock" /> */
|
||||
uint32_t identifier;
|
||||
/**How many entries follow this header */
|
||||
uint16_t entries;
|
||||
/**CRC64-ECMA of the block */
|
||||
uint64_t crc64;
|
||||
/** \file aaruformat/structs/optical.h
|
||||
* \brief On-disk structures describing optical disc tracks (Track list block).
|
||||
*
|
||||
* An optical tracks block (identifier == BlockType::TracksBlock) stores a list of \ref TrackEntry
|
||||
* records describing the logical layout of tracks and sessions for CD/DVD/BD and similar media.
|
||||
*
|
||||
* Layout:
|
||||
* TracksHeader (fixed)
|
||||
* TrackEntry[ entries ] (array, packed)
|
||||
*
|
||||
* CRC semantics:
|
||||
* - TracksHeader::crc64 is a CRC64-ECMA over the contiguous TrackEntry array ONLY (header excluded).
|
||||
* - For legacy images (imageMajorVersion <= AARUF_VERSION_V1) a byte swap is applied when verifying.
|
||||
*
|
||||
* Field semantics (TrackEntry):
|
||||
* - sequence: Logical track number (1..99 typical for CD). Values outside that range may encode extras.
|
||||
* - type: Value from \ref TrackType (Audio, Data, Mode variants, etc.).
|
||||
* - start / end: Inclusive Logical Block Address (LBA) bounds for the track. end >= start.
|
||||
* - pregap: Number of sectors of pre-gap *preceding* the track's first user-accessible sector (can be 0 or negative
|
||||
* if representing lead-in semantics; negative interpretation is implementation-defined).
|
||||
* - session: Session number starting at 1 for multi-session discs (1 for single session).
|
||||
* - isrc: 13-byte ISRC (raw code, no terminating null). If fewer significant characters, remaining bytes are 0.
|
||||
* - flags: Bitmask of track/control flags. Unless otherwise specified, recommended mapping (mirrors CD subchannel Q
|
||||
* control bits) is: bit0 Pre-emphasis, bit1 Copy permitted, bit2 Data track, bit3 Four-channel audio,
|
||||
* bits4-7 reserved. Actual semantics may be extended by the format specification.
|
||||
*
|
||||
* Invariants / validation recommendations:
|
||||
* - identifier == BlockType::TracksBlock
|
||||
* - entries * sizeof(TrackEntry) bytes are present after the header in the block image.
|
||||
* - 1 <= sequence <= 99 for standard CD tracks (non-conforming values allowed but should be documented).
|
||||
* - start <= end; pregap >= 0 (if negative pregaps unsupported in implementation).
|
||||
* - ISRC bytes either all zero (no ISRC) or printable ASCII (A-Z 0-9 -) per ISO 3901 (without hyphen formatting).
|
||||
*/
|
||||
|
||||
/** \struct TracksHeader
|
||||
* \brief Header for an optical tracks block listing track entries.
|
||||
*/
|
||||
typedef struct TracksHeader
|
||||
{
|
||||
uint32_t identifier; ///< Block identifier (must be BlockType::TracksBlock).
|
||||
uint16_t entries; ///< Number of TrackEntry records following this header.
|
||||
uint64_t crc64; ///< CRC64-ECMA of the TrackEntry array (header excluded, legacy byte-swap for early versions).
|
||||
} TracksHeader;
|
||||
|
||||
/**Optical disc track */
|
||||
typedef struct TrackEntry {
|
||||
/**Track sequence */
|
||||
uint8_t sequence;
|
||||
/**Track type */
|
||||
uint8_t type;
|
||||
/**Track starting LBA */
|
||||
int64_t start;
|
||||
/**Track last LBA */
|
||||
int64_t end;
|
||||
/**Track pregap in sectors */
|
||||
int64_t pregap;
|
||||
/**Track session */
|
||||
uint8_t session;
|
||||
/**Track's ISRC in ASCII */
|
||||
uint8_t isrc[13];
|
||||
/**Track flags */
|
||||
uint8_t flags;
|
||||
/** \struct TrackEntry
|
||||
* \brief Single optical disc track descriptor (sequence, type, LBAs, session, ISRC, flags).
|
||||
*/
|
||||
typedef struct TrackEntry
|
||||
{
|
||||
uint8_t sequence; ///< Track number (1..99 typical for CD audio/data). 0 may indicate placeholder/non-standard.
|
||||
uint8_t type; ///< Track type (value from \ref TrackType).
|
||||
int64_t start; ///< Inclusive starting LBA of the track.
|
||||
int64_t end; ///< Inclusive ending LBA of the track.
|
||||
int64_t pregap; ///< Pre-gap length in sectors preceding track start (0 if none).
|
||||
uint8_t session; ///< Session number (1-based). 1 for single-session discs.
|
||||
uint8_t isrc[13]; ///< ISRC raw 13-byte code (no null terminator). All zeros if not present.
|
||||
uint8_t flags; ///< Control / attribute bitfield (see file documentation for suggested bit mapping).
|
||||
} TrackEntry;
|
||||
|
||||
#pragma pack(pop)
|
||||
|
||||
#endif //LIBAARUFORMAT_OPTICAL_H
|
||||
#endif // LIBAARUFORMAT_OPTICAL_H
|
||||
|
||||
@@ -19,19 +19,214 @@
|
||||
#ifndef LIBAARUFORMAT_OPTIONS_H
|
||||
#define LIBAARUFORMAT_OPTIONS_H
|
||||
|
||||
#include <stdbool.h> ///< For bool type used in aaru_options.
|
||||
#include <stdint.h> ///< For fixed-width integer types.
|
||||
|
||||
/** \file aaruformat/structs/options.h
|
||||
* \brief Image creation / open tuning options structure and related semantics.
|
||||
*
|
||||
* The library accepts a semicolon-delimited key=value options string (see parse_options()). Recognized keys:
|
||||
* compress=true|false Enable/disable block compression (LZMA for data blocks, FLAC for audio tracks).
|
||||
* deduplicate=true|false If true, identical (duplicate) sectors are stored once (DDT entries point to same
|
||||
* physical block). If false, duplicates are still tracked in DDT but each occurrence
|
||||
* is stored independently (no storage savings). DDT itself is always present.
|
||||
* dictionary=<bytes> LZMA dictionary size in bytes (fallback default 33554432 if 0 or invalid).
|
||||
* table_shift=<n> DDT v2 table shift (default 9) (items per primary entry = 2^n when multi-level).
|
||||
* data_shift=<n> Global data shift (default 12). Defines per-block address granularity: the low
|
||||
* 2^n range encodes the sector (or unit) offset within a block; higher bits combine
|
||||
* with block_alignment to derive block file offsets. Used by DDT but not limited to it.
|
||||
* block_alignment=<n> log2 alignment of underlying data blocks (default 9 => 512 bytes) (block size = 2^n).
|
||||
* md5=true|false Generate MD5 checksum (stored in checksum block if true).
|
||||
* sha1=true|false Generate SHA-1 checksum.
|
||||
* sha256=true|false Generate SHA-256 checksum.
|
||||
* blake3=true|false Generate BLAKE3 checksum (may require build-time support; ignored if unsupported).
|
||||
* spamsum=true|false Generate SpamSum fuzzy hash.
|
||||
*
|
||||
* Defaults (when option string NULL or key omitted):
|
||||
* compress=true, deduplicate=true, dictionary=33554432, table_shift=9, data_shift=12,
|
||||
* block_alignment=9, md5=false, sha1=false, sha256=false, blake3=false, spamsum=false.
|
||||
*
|
||||
* Validation / normalization done in parse_options():
|
||||
* - Zero / missing dictionary resets to default 33554432.
|
||||
* - Zero table_shift resets to 9.
|
||||
* - Zero data_shift resets to 12.
|
||||
* - Zero block_alignment resets to 9.
|
||||
*
|
||||
* Rationale:
|
||||
* - table_shift, data_shift and block_alignment mirror fields stored in on-disk headers (see AaruHeaderV2 &
|
||||
* DdtHeader2); data_shift is a global per-block granularity exponent (not DDT-specific) governing how in-block offsets
|
||||
* are encoded.
|
||||
* - compress selects adaptive codec usage: LZMA applied to generic/data blocks, FLAC applied to audio track payloads.
|
||||
* - deduplicate toggles storage optimization only: the DDT directory is always built for addressing; disabling simply
|
||||
* forces each sector's content to be written even if already present (useful for forensic byte-for-byte
|
||||
* duplication).
|
||||
* - dictionary tunes compression ratio/memory use; large values increase memory footprint.
|
||||
* - Checksums are optional; enabling multiple increases CPU time at write finalization.
|
||||
*
|
||||
* Performance / space trade-offs (deduplicate=false):
|
||||
* - Significantly larger image size: every repeated sector payload is written again.
|
||||
* - Higher write I/O and longer creation time for highly redundant sources (e.g., zero-filled regions) compared to
|
||||
* deduplicate=true, although CPU time spent on duplicate detection/hash lookups is reduced.
|
||||
* - Potentially simpler post-process forensic validation (physical ordering preserved without logical coalescing).
|
||||
* - Use when exact physical repetition is more critical than storage efficiency, or to benchmark raw device
|
||||
* throughput.
|
||||
* - For typical archival use-cases with large zero / repeated patterns, deduplicate=true markedly reduces footprint.
|
||||
*
|
||||
* Approximate in-RAM hash map usage for deduplication (deduplicate=true):
|
||||
* The on-disk DDT can span many secondary tables, but only the primary table plus a currently loaded secondary (and
|
||||
* possibly a small cache) reside in memory; their footprint is typically <<5% of total indexed media space and is
|
||||
* often negligible compared to the hash map used to detect duplicate sectors. Therefore we focus here on the hash /
|
||||
* lookup structure ("hash_map") memory, not the entire DDT on-disk size.
|
||||
*
|
||||
* Worst-case (all sectors unique) per 1 GiB of user data:
|
||||
* sectors_per_GiB = 2^30 / sector_size
|
||||
* hash_bytes ≈ sectors_per_GiB * H (H ≈ 16 bytes: 8-byte fingerprint + ~8 bytes map overhead)
|
||||
*
|
||||
* Resulting hash_map RAM per GiB (unique sectors):
|
||||
* +--------------+------------------+------------------------------+
|
||||
* | Sector size | Sectors / GiB | Hash map (~16 B / sector) |
|
||||
* +--------------+------------------+------------------------------+
|
||||
* | 512 bytes | 2,097,152 | ~33.5 MiB (≈32.0–36.0 MiB) |
|
||||
* | 2048 bytes | 524,288 | ~ 8.0 MiB (≈7.5–8.5 MiB) |
|
||||
* | 4096 bytes | 262,144 | ~ 4.0 MiB (≈3.8–4.3 MiB) |
|
||||
* +--------------+------------------+------------------------------+
|
||||
*
|
||||
* (Range reflects allocator + load factor variation.)
|
||||
*
|
||||
* Targeted projections (hash map only, R=1):
|
||||
* 2048‑byte sectors (~8 MiB per GiB unique)
|
||||
* Capacity | Hash map (MiB) | Hash map (GiB)
|
||||
* ---------+---------------+----------------
|
||||
* 25 GiB | ~200 | 0.20
|
||||
* 50 GiB | ~400 | 0.39
|
||||
*
|
||||
* 512‑byte sectors (~34 MiB per GiB unique; using 33.5 MiB for calc)
|
||||
* Capacity | Hash map (MiB) | Hash map (GiB)
|
||||
* ---------+---------------+----------------
|
||||
* 128 GiB | ~4288 | 4.19
|
||||
* 500 GiB | ~16750 | 16.36
|
||||
* 1 TiB* | ~34304 | 33.50
|
||||
* 2 TiB* | ~68608 | 67.00
|
||||
*
|
||||
* *TiB = 1024 GiB binary. For decimal TB reduce by ~7% (×0.93).
|
||||
*
|
||||
* Duplicate ratio scaling:
|
||||
* Effective hash RAM ≈ table_value * R, where R = unique_sectors / total_sectors.
|
||||
* Example: 500 GiB @512 B, R=0.4 ⇒ ~16750 MiB * 0.4 ≈ 6700 MiB (~6.54 GiB).
|
||||
*
|
||||
* Quick rule of thumb (hash only):
|
||||
* hash_bytes_per_GiB ≈ 16 * (2^30 / sector_size) ≈ (17.1799e9 / sector_size) bytes
|
||||
* → ≈ 33.6 MiB (512 B), 8.4 MiB (2048 B), 4.2 MiB (4096 B) per GiB unique.
|
||||
*
|
||||
* Memory planning tip:
|
||||
* If projected hash_map usage risks exceeding available RAM, consider:
|
||||
* - Increasing table_shift (reduces simultaneous secondary loads / contention)
|
||||
* - Lowering data_shift (if practical) to encourage earlier big DDT adoption with fewer unique blocks
|
||||
* - Segmenting the dump into phases (if workflow permits)
|
||||
* - Accepting higher duplicate ratio by pre-zero detection or sparse treatment externally.
|
||||
* - Resuming the dump in multiple passes: each resume rebuilds the hash_map from scratch, so peak RAM still
|
||||
* matches a single-pass estimate, but average RAM over total wall time can drop if you unload between passes.
|
||||
*
|
||||
* NOTE: DDT in-RAM portion (primary + one secondary) usually adds only a few additional MiB even for very large
|
||||
* images, hence omitted from sizing tables. Include +5% safety margin if extremely tight on memory.
|
||||
*
|
||||
* Guidance for table_shift / data_shift selection:
|
||||
* Let:
|
||||
* S = total logical sectors expected in image (estimate if unknown).
|
||||
* T = table_shift (items per primary DDT entry = 2^T when multi-level; 0 => single-level).
|
||||
* D = data_shift (in-block sector offset span = 2^D).
|
||||
* BA = block_alignment (bytes) = 2^block_alignment.
|
||||
* SS = sector size (bytes).
|
||||
*
|
||||
* 1. data_shift constraints:
|
||||
* - For SMALL DDT entries (12 payload bits after status): D must satisfy 0 < D < 12 and (12 - D) >= 1 so that at
|
||||
* least one bit remains for block index. Practical range for small DDT: 6..10 (leaves 2+ bits for block index).
|
||||
* - For BIG DDT entries (28 payload bits after status): D may be larger (up to 27) but values >16 rarely useful.
|
||||
* - Effective address granularity inside a block = min(2^D * SS, physical block span implied by BA).
|
||||
* - Choosing D too large wastes bits (larger offset range than block actually contains) and reduces the number of
|
||||
* block index bits within a small entry, potentially forcing upgrade to big DDT earlier.
|
||||
*
|
||||
* Recommended starting points:
|
||||
* * 512‑byte sectors, 512‑byte block alignment: D=9 (512 offsets) or D=8 (256 offsets) keeps small DDT viable.
|
||||
* * 2048‑byte optical sectors, 2048‑byte alignment: D=8 (256 offsets) typically sufficient.
|
||||
* * Mixed / large logical block sizes: keep D so that (2^D * SS) ≈ typical dedup block region you want
|
||||
* addressable.
|
||||
*
|
||||
* 2. block capacity within an entry:
|
||||
* - SMALL DDT: usable block index bits = 12 - D.
|
||||
* Max representable block index (small) = 2^(12-D) - 1.
|
||||
* - BIG DDT: usable block index bits = 28 - D.
|
||||
* Max representable block index (big) = 2^(28-D) - 1.
|
||||
* - If (requiredBlockIndex > max) you must either reduce D or rely on big DDT.
|
||||
*
|
||||
* Approximate requiredBlockIndex ≈ (TotalUniqueBlocks) where
|
||||
* TotalUniqueBlocks ≈ (S * SS) / (BA * (2^D * SS / (SS))) = S / (2^D * (BA / SS))
|
||||
* Simplified (assuming BA = SS): TotalUniqueBlocks ≈ S / 2^D.
|
||||
*
|
||||
* 3. table_shift considerations (multi-level DDT):
|
||||
* - Primary entries count ≈ ceil(S / 2^T). Choose T so this count fits memory and keeps lookup fast.
|
||||
* - Larger T reduces primary table size, increasing secondary table dereferences.
|
||||
* - Typical balanced values: T in [8..12] (256..4096 sectors per primary entry).
|
||||
* - Set T=0 for single-level when S is small enough that all entries fit comfortably in memory.
|
||||
*
|
||||
* Memory rough estimate for single-level SMALL DDT:
|
||||
* bytes ≈ S * 2 (each small entry 2 bytes). For BIG DDT: bytes ≈ S * 4.
|
||||
* Multi-level: primary table bytes ≈ (S / 2^T) * entrySize + sum(secondary tables).
|
||||
*
|
||||
* 4. Example scenarios:
|
||||
* - 50M sectors (≈25 GiB @512B), want small DDT: pick D=8 (256); block index bits=4 (max 16 blocks) insufficient.
|
||||
* Need either D=6 (1024 block indices) or accept BIG DDT (28-8=20 bits => million+ blocks). So prefer BIG DDT
|
||||
* here.
|
||||
* - 2M sectors, 2048B alignment, optical: D=8 gives S/2^D ≈ 7812 unique offsets; small DDT block index bits=4 (max
|
||||
* 16) inadequate → choose D=6 (offset span 64 sectors) giving 6 block index bits (max 64) or just use big DDT.
|
||||
*
|
||||
* 5. Practical recommendations:
|
||||
* - If unsure and image > ~1M sectors: keep defaults (data_shift=12, table_shift=9) and allow big DDT.
|
||||
* - For small archival (<100k sectors): T=0 (single-level), D≈8..10 to keep small DDT feasible.
|
||||
* - Benchmark before lowering D purely to stay in small DDT; increased secondary lookups or larger primary tables
|
||||
* can offset saved space.
|
||||
*
|
||||
* Recommended presets (approximate bands):
|
||||
* +----------------------+----------------------+---------------------------+-------------------------------+
|
||||
* | Total logical sectors | table_shift (T) | data_shift (D) | Notes |
|
||||
* +----------------------+----------------------+---------------------------+-------------------------------+
|
||||
* | < 50,000 | 0 | 8 – 10 | Single-level small DDT likely |
|
||||
* | 50K – 1,000,000 | 8 – 9 | 9 – 10 | Still feasible small DDT |
|
||||
* | 1M – 10,000,000 | 9 – 10 | 10 – 12 | Borderline small -> big DDT |
|
||||
* | 10M – 100,000,000 | 10 – 11 | 11 – 12 | Prefer big DDT; tune T for mem|
|
||||
* | > 100,000,000 | 11 – 12 | 12 | Big DDT; higher T saves memory|
|
||||
* +----------------------+----------------------+---------------------------+-------------------------------+
|
||||
* Ranges show typical stable regions; pick the lower end of table_shift if memory is ample, higher if minimizing
|
||||
* primary table size. Always validate actual unique block count vs payload bits.
|
||||
*
|
||||
* NOTE: The library will automatically fall back to BIG DDT where needed; these settings bias structure, they do not
|
||||
* guarantee small DDT retention.
|
||||
*
|
||||
* Thread-safety: aaru_options is a plain POD struct; caller may copy freely. parse_options() returns by value.
|
||||
*
|
||||
* Future compatibility: unknown keys are ignored by current parser; consumers should preserve original option
|
||||
* strings if round-tripping is required.
|
||||
*/
|
||||
|
||||
/** \struct aaru_options
|
||||
* \brief Parsed user-specified tunables controlling compression, deduplication, hashing and DDT geometry.
|
||||
*
|
||||
* All shifts are exponents of two.
|
||||
*/
|
||||
typedef struct
|
||||
{
|
||||
bool compress;
|
||||
bool deduplicate;
|
||||
uint32_t dictionary;
|
||||
uint8_t table_shift;
|
||||
uint8_t data_shift;
|
||||
uint8_t block_alignment;
|
||||
bool md5;
|
||||
bool sha1;
|
||||
bool sha256;
|
||||
bool blake3;
|
||||
bool spamsum;
|
||||
bool compress; ///< Enable adaptive compression (LZMA for data blocks, FLAC for audio). Default: true.
|
||||
bool deduplicate; ///< Storage dedup flag (DDT always exists). true=share identical sector content, false=store
|
||||
///< each instance.
|
||||
uint32_t dictionary; ///< LZMA dictionary size in bytes (>= 4096 recommended). Default: 33554432 (32 MiB).
|
||||
uint8_t table_shift; ///< DDT table shift (multi-level fan-out exponent). Default: 9.
|
||||
uint8_t data_shift; ///< Global data shift: low bits encode sector offset inside a block (2^data_shift span).
|
||||
uint8_t block_alignment; ///< log2 underlying block alignment (2^n bytes). Default: 9 (512 bytes).
|
||||
bool md5; ///< Generate MD5 checksum (ChecksumAlgorithm::Md5) when finalizing image.
|
||||
bool sha1; ///< Generate SHA-1 checksum (ChecksumAlgorithm::Sha1) when finalizing image.
|
||||
bool sha256; ///< Generate SHA-256 checksum (ChecksumAlgorithm::Sha256) when finalizing image.
|
||||
bool blake3; ///< Generate BLAKE3 checksum if supported (not stored if algorithm unavailable).
|
||||
bool spamsum; ///< Generate SpamSum fuzzy hash (ChecksumAlgorithm::SpamSum) if enabled.
|
||||
} aaru_options;
|
||||
|
||||
#endif // LIBAARUFORMAT_OPTIONS_H
|
||||
|
||||
Reference in New Issue
Block a user