/* * This file is part of the Aaru Data Preservation Suite. * Copyright (c) 2019-2025 Natalia Portillo. * * This library is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser General Public License as * published by the Free Software Foundation; either version 2.1 of the * License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, but * WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, see . */ #ifndef LIBAARUFORMAT_CONTEXT_H #define LIBAARUFORMAT_CONTEXT_H #include "blake3.h" #include "crc64.h" #include "hash_map.h" #include "lru.h" #include "md5.h" #include "sha1.h" #include "sha256.h" #include "spamsum.h" #include "structs.h" #include "utarray.h" /** \file aaruformat/context.h * \brief Central runtime context structures for libaaruformat (image state, caches, checksum buffers). * * The principal structure, \ref aaruformat_context, aggregates: header metadata, open stream handle, deduplication * tables (DDT) currently in memory, optical disc auxiliary data (sector prefix/suffix/subchannel), track listings, * geometry & metadata blocks, checksum accumulators, CRC & ECC helper contexts, hash map for deduplication, and * transient write buffers. * * Memory ownership model (unless otherwise stated): if a pointer field is non-NULL it is owned by the context and * will be freed (or otherwise released) during context close / destruction. Callers must not free or reallocate * these pointers directly. External callers should treat all internal buffers as read‑only unless explicitly writing. * * Threading: a single context instance is NOT thread-safe; serialize access if used across threads. * Lifetime: allocate, initialize/open, perform read/write/verify operations, then close/free. * * Deduplication tables (DDT): only a subset (primary table + an active secondary + optional cache) is retained in RAM; * large images may rely on lazy loading of secondary tables. Flags (inMemoryDdt, userDataDdt*, cachedSecondary*) * indicate what is currently resident. * * Optical auxiliary buffers (sectorPrefix / sectorSuffix / subchannel / corrected variants) are populated only for * images where those components exist (e.g., raw CD dumps). They may be NULL for block devices / non‑optical media. * * Index handling: indexEntries (UT_array) holds a flattened list of \ref IndexEntry structures (regardless of * v1/v2/v3). hash_map_t *sectorHashMap provides fast duplicate detection keyed by content fingerprint / sparse sector * key. * * Invariants / sanity expectations (not strictly enforced everywhere): * - magic == AARU_MAGIC after successful open/create. * - header.imageMajorVersion <= AARUF_VERSION. * - imageStream != NULL when any I/O method is in progress. * - If deduplicate == false, sectorHashMap may still be populated for bookkeeping but duplicates are stored * independently. * - If userDataDdtMini != NULL then userDataDdtBig == NULL (and vice versa) for a given level. */ #ifndef MD5_DIGEST_LENGTH #define MD5_DIGEST_LENGTH 16 #endif #ifndef SHA1_DIGEST_LENGTH #define SHA1_DIGEST_LENGTH 20 #endif #ifndef SHA256_DIGEST_LENGTH #define SHA256_DIGEST_LENGTH 32 #endif /** \struct CdEccContext * \brief Lookup tables and state for Compact Disc EDC/ECC (P/Q) regeneration / verification. * * Fields may be lazily allocated; inited_edc indicates tables are ready. */ typedef struct CdEccContext { bool inited_edc; ///< True once EDC/ECC tables have been initialized. uint8_t *ecc_b_table; ///< Backward (B) ECC table (allocated, size implementation-defined). uint8_t *ecc_f_table; ///< Forward (F) ECC table. uint32_t *edc_table; ///< EDC (CRC) lookup table. } CdEccContext; /** \struct Checksums * \brief Collected whole‑image checksums / hashes present in a checksum block. * * Only hash arrays with corresponding has* flags set contain valid data. spamsum is a dynamically allocated * NUL‑terminated buffer (original SpamSum signature bytes followed by appended '\0'). */ typedef struct Checksums { bool hasMd5; ///< True if md5[] buffer populated. bool hasSha1; ///< True if sha1[] buffer populated. bool hasSha256; ///< True if sha256[] buffer populated. bool hasBlake3; ///< True if blake3[] buffer populated. bool hasSpamSum; ///< True if spamsum pointer allocated and signature read. uint8_t md5[MD5_DIGEST_LENGTH]; ///< MD5 digest (16 bytes). uint8_t sha1[SHA1_DIGEST_LENGTH]; ///< SHA-1 digest (20 bytes). uint8_t sha256[SHA256_DIGEST_LENGTH]; ///< SHA-256 digest (32 bytes). uint8_t blake3[BLAKE3_OUT_LEN]; ///< BLAKE3 digest (32 bytes). uint8_t *spamsum; ///< SpamSum fuzzy hash (ASCII), allocated length+1 with trailing 0. } Checksums; /** \struct mediaTagEntry * \brief Hash table entry for an arbitrary media tag (e.g., proprietary drive/medium descriptor). * * Stored via uthash (hh handle). Type is a format‑specific integer identifier mapping to external interpretation. */ typedef struct mediaTagEntry { uint8_t *data; ///< Tag data blob (opaque to library core); length bytes long. int32_t type; ///< Numeric type identifier. uint32_t length; ///< Length in bytes of data. UT_hash_handle hh; ///< uthash linkage. } mediaTagEntry; typedef struct TapeFileHashEntry { uint64_t key; ///< Composite key: partition << 32 | file TapeFileEntry fileEntry; ///< The actual tape file data UT_hash_handle hh; ///< UTHASH handle } tapeFileHashEntry; typedef struct TapePartitionHashEntry { uint8_t key; ///< Key: partition TapePartitionEntry partitionEntry; ///< The actual tape partition data UT_hash_handle hh; ///< UTHASH handle } TapePartitionHashEntry; typedef struct TapeDdtHashEntry { uint64_t key; ///< Key: sector address uint64_t value; ///< Value: DDT entry UT_hash_handle hh; ///< UTHASH handle } TapeDdtHashEntry; /** \struct aaruformat_context * \brief Master context representing an open or in‑creation Aaru image. * * Contains stream handle, parsed headers, deduplication structures, optical extras, metadata blocks, checksum * information, caches, and write-state. Allocate with library factory (or zero‑init + explicit open) and destroy * with corresponding close/free routine. * * Field grouping: * - Core & header: magic, library*Version, imageStream, header. * - Optical sector adjuncts: sectorPrefix/sectorSuffix/subchannel plus corrected variants & mode2_subheaders. * - Deduplication: inMemoryDdt, userDataDdt*, userDataDdtHeader, mini/big/cached secondary arrays, version tags. * - Metadata & geometry: geometryBlock, metadataBlockHeader+metadataBlock, cicmBlockHeader+cicmBlock, tracksHeader. * - Tracks & hardware: trackEntries, dataTracks, dumpHardwareHeader, dumpHardwareEntriesWithData. * - Integrity & ECC: checksums, eccCdContext, crc64Context. * - Index & dedup lookup: indexEntries (UT_array of IndexEntry), sectorHashMap (duplicate detection), deduplicate * flag. * - Write path: isWriting, currentBlockHeader, writingBuffer(+position/offset), nextBlockPosition. * * Notes: * - userDataDdt points to memory-mapped or fully loaded DDT (legacy path); userDataDdtMini / userDataDdtBig * supersede. * - shift retained for backward compatibility with earlier single‑level address shift semantics. * - mappedMemoryDdtSize is meaningful only if userDataDdt references an mmapped region. */ typedef struct aaruformat_context { /* Core & header */ uint64_t magic; ///< File magic (AARU_MAGIC) post-open. AaruHeaderV2 header; ///< Parsed container header (v2). FILE *imageStream; ///< Underlying FILE* stream (binary mode). uint8_t library_major_version; ///< Linked library major version. uint8_t library_minor_version; ///< Linked library minor version; /* Deduplication tables (DDT) */ uint64_t *user_data_ddt; ///< Legacy flat DDT pointer (NULL when using v2 mini/big arrays). TapeDdtHashEntry *tape_ddt; ///< Hash table root for tape DDT entries uint32_t *sector_prefix_ddt; ///< Legacy CD sector prefix DDT (deprecated by *2). uint32_t *sector_suffix_ddt; ///< Legacy CD sector suffix DDT. uint64_t *sector_prefix_ddt2; ///< CD sector prefix DDT V2. uint64_t *sector_suffix_ddt2; ///< CD sector suffix DDT V2. uint64_t *user_data_ddt2; ///< DDT entries (big variant) primary/secondary current. uint64_t *cached_secondary_ddt2; ///< Cached secondary table (big entries) or NULL. DdtHeader2 user_data_ddt_header; ///< Active user data DDT v2 header (primary table meta). uint64_t cached_ddt_offset; ///< File offset of currently cached secondary DDT (0=none). uint64_t cached_ddt_position; ///< Position index of cached secondary DDT. uint64_t primary_ddt_offset; ///< File offset of the primary DDT v2 table. size_t mapped_memory_ddt_size; ///< Length of mmapped DDT if userDataDdt is mmapped. int ddt_version; ///< DDT version in use (1=legacy, 2=v2 hierarchical). uint8_t shift; ///< Legacy overall shift (deprecated by data_shift/table_shift). bool in_memory_ddt; ///< True if primary (and possibly secondary) DDT loaded. /* Optical auxiliary buffers (NULL if not present) */ uint8_t *sector_prefix; ///< Raw per-sector prefix (e.g., sync+header) uncorrected. uint8_t *sector_prefix_corrected; ///< Corrected variant (post error correction) if stored. uint8_t *sector_suffix; ///< Raw per-sector suffix (EDC/ECC) uncorrected. uint8_t *sector_suffix_corrected; ///< Corrected suffix if stored separately. uint8_t *sector_subchannel; ///< Raw 96-byte subchannel (if captured). uint8_t *mode2_subheaders; ///< MODE2 Form1/Form2 8-byte subheaders (concatenated). uint8_t *sector_id; ///< DVD sector ID (4 bytes) if present. uint8_t *sector_ied; ///< DVD sector IED (2 bytes) if present. uint8_t *sector_cpr_mai; ///< DVD sector CPR_MAI (6 bytes) if present. uint8_t *sector_edc; ///< DVD sector EDC (4 bytes) if present. uint8_t *sector_decrypted_title_key; ///< DVD decrypted title key (5 bytes) if present. /* Metadata & geometry */ struct DumpHardwareEntriesWithData *dump_hardware_entries_with_data; ///< Array of dump hardware entries + strings. uint8_t *metadata_block; ///< Raw metadata UTF-16LE concatenated strings. uint8_t *cicm_block; ///< CICM XML payload. uint8_t *json_block; ///< JSON metadata block payload (UTF-8). uint8_t *creator; ///< Who (person) created the image? uint8_t *media_title; ///< Title of the media represented by the image uint8_t *comments; ///< Image comments uint8_t *media_manufacturer; ///< Manufacturer of the media represented by the image uint8_t *media_model; ///< Model of the media represented by the image uint8_t *media_serial_number; ///< Serial number of the media represented by the image uint8_t *media_barcode; ///< Barcode of the media represented by the image uint8_t *media_part_number; ///< Part number of the media represented by the image uint8_t *drive_manufacturer; ///< Manufacturer of the drive used to read the media represented by the image uint8_t *drive_model; ///< Model of the drive used to read the media represented by the image uint8_t *drive_serial_number; ///< Serial number of the drive used to read the media represented by the image uint8_t *drive_firmware_revision; ///< Firmware revision of the drive used to read the media represented by the image GeometryBlockHeader geometry_block; ///< Logical geometry block (if present). MetadataBlockHeader metadata_block_header; ///< Metadata block header. CicmMetadataBlock cicm_block_header; ///< CICM metadata header (if present). DumpHardwareHeader dump_hardware_header; ///< Dump hardware header. AaruMetadataJsonBlockHeader json_block_header; ///< JSON metadata block header (if present). uint32_t cylinders; ///< Cylinders of the media represented by the image uint32_t heads; ///< Heads of the media represented by the image uint32_t sectors_per_track; ///< Sectors per track of the media represented by the image (for variable image, the ///< smallest) int32_t media_sequence; ///< Number in sequence for the media represented by the image int32_t last_media_sequence; ///< Last media of the sequence the media represented by the image corresponds to /* Optical information */ TrackEntry *track_entries; ///< Full track list (tracksHeader.entries elements). TrackEntry *data_tracks; ///< Filtered list of data tracks (subset of trackEntries). TracksHeader tracks_header; ///< Tracks header (optical) if present. uint8_t number_of_data_tracks; ///< Count of tracks considered "data" (sequence 1..99 heuristics). /* Integrity & ECC */ CdEccContext *ecc_cd_context; ///< CD ECC/EDC helper tables (allocated on demand). crc64_ctx *crc64_context; ///< Opaque CRC64 context for streaming updates. /* Index & deduplication lookup */ UT_array *index_entries; ///< Flattened index entries (UT_array of IndexEntry). hash_map_t *sector_hash_map; ///< Deduplication hash map (fingerprint->entry mapping). /* Caches */ struct CacheHeader block_header_cache; ///< LRU/Cache header for block headers. struct CacheHeader block_cache; ///< LRU/Cache header for block payloads. /* High-level summary */ ImageInfo image_info; ///< Exposed high-level image info summary. /* Tags */ bool *readableSectorTags; ///< Per-sector boolean array (optical tags read successfully?). mediaTagEntry *mediaTags; ///< Hash table of extra media tags (uthash root). /* Checksums */ spamsum_ctx *spamsum_context; ///< Opaque SpamSum context for streaming updates blake3_hasher *blake3_context; ///< Opaque BLAKE3 context for streaming updates Checksums checksums; ///< Whole-image checksums discovered. md5_ctx md5_context; ///< Opaque MD5 context for streaming updates sha1_ctx sha1_context; ///< Opaque SHA-1 context for streaming updates sha256_ctx sha256_context; ///< Opaque SHA-256 context for streaming updates bool calculating_md5; ///< True if whole-image MD5 being calculated on-the-fly. bool calculating_sha1; ///< True if whole-image SHA-1 being calculated on-the-fly. bool calculating_sha256; ///< True if whole-image SHA-256 being calculated on-the-fly. bool calculating_spamsum; ///< True if whole-image SpamSum being calculated on-the-fly. bool calculating_blake3; ///< True if whole-image BLAKE3 being calculated on-the-fly. /* Write path */ uint8_t *writing_buffer; ///< Accumulation buffer for current block data. BlockHeader current_block_header; ///< Header for block currently being assembled (write path). uint64_t next_block_position; ///< Absolute file offset where next block will be written. uint64_t last_written_block; ///< Last written block number (write path). size_t sector_prefix_length; ///< Length of sector_prefix size_t sector_suffix_length; ///< Length of sector_suffix size_t sector_prefix_offset; ///< Current position in sector_prefix size_t sector_suffix_offset; ///< Current position in sector_suffix int current_block_offset; ///< Logical offset inside block (units: bytes or sectors depending on path). int writing_buffer_position; ///< Current size / position within writingBuffer. uint8_t current_track_type; ///< Current track type (when writing optical images with tracks, needed for block ///< compression type). bool is_writing; ///< True if context opened/created for writing. bool rewinded; ///< True if stream has been rewound after open (write path). bool writing_long; ///< True if writing long sectors bool block_zero_written; ///< True if block zero has been written (writing path). /* Options */ uint32_t lzma_dict_size; ///< LZMA dictionary size (writing path). bool deduplicate; ///< Storage deduplication active (duplicates coalesce). bool compression_enabled; ///< True if block compression enabled (writing path). /* Tape-specific structures */ tapeFileHashEntry *tape_files; ///< Hash table root for tape files TapePartitionHashEntry *tape_partitions; ///< Hash table root for tape partitions bool is_tape; ///< True if the image is a tape image } aaruformat_context; /** \struct DumpHardwareEntriesWithData * \brief In-memory representation of a dump hardware entry plus decoded variable-length fields & extents. * * All string pointers are NUL-terminated UTF-8 copies of on-disk data (or NULL if absent). extents array may be NULL * when no ranges were recorded. Freed during context teardown. */ typedef struct DumpHardwareEntriesWithData { DumpHardwareEntry entry; ///< Fixed-size header with lengths & counts. struct DumpExtent *extents; ///< Array of extents (entry.extents elements) or NULL. uint8_t *manufacturer; ///< Manufacturer string (UTF-8) or NULL. uint8_t *model; ///< Model string or NULL. uint8_t *revision; ///< Hardware revision string or NULL. uint8_t *firmware; ///< Firmware version string or NULL. uint8_t *serial; ///< Serial number string or NULL. uint8_t *softwareName; ///< Dump software name or NULL. uint8_t *softwareVersion; ///< Dump software version or NULL. uint8_t *softwareOperatingSystem; ///< Host operating system string or NULL. } DumpHardwareEntriesWithData; #pragma pack(push, 1) /** \struct DumpExtent * \brief Inclusive [start,end] logical sector range contributed by a single hardware environment. */ typedef struct DumpExtent { uint64_t start; ///< Starting LBA (inclusive). uint64_t end; ///< Ending LBA (inclusive); >= start. } DumpExtent; #pragma pack(pop) #endif // LIBAARUFORMAT_CONTEXT_H