Files
libaaruformat/include/aaruformat/context.h

367 lines
22 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LIBAARUFORMAT_CONTEXT_H
#define LIBAARUFORMAT_CONTEXT_H
#include "blake3.h"
#include "crc64.h"
#include "hash_map.h"
#include "lru.h"
#include "md5.h"
#include "sha1.h"
#include "sha256.h"
#include "spamsum.h"
#include "structs.h"
#include "utarray.h"
/** \file aaruformat/context.h
* \brief Central runtime context structures for libaaruformat (image state, caches, checksum buffers).
*
* The principal structure, \ref aaruformat_context, aggregates: header metadata, open stream handle, deduplication
* tables (DDT) currently in memory, optical disc auxiliary data (sector prefix/suffix/subchannel), track listings,
* geometry & metadata blocks, checksum accumulators, CRC & ECC helper contexts, hash map for deduplication, and
* transient write buffers.
*
* Memory ownership model (unless otherwise stated): if a pointer field is non-NULL it is owned by the context and
* will be freed (or otherwise released) during context close / destruction. Callers must not free or reallocate
* these pointers directly. External callers should treat all internal buffers as readonly unless explicitly writing.
*
* Threading: a single context instance is NOT thread-safe; serialize access if used across threads.
* Lifetime: allocate, initialize/open, perform read/write/verify operations, then close/free.
*
* Deduplication tables (DDT): only a subset (primary table + an active secondary + optional cache) is retained in RAM;
* large images may rely on lazy loading of secondary tables. Flags (inMemoryDdt, userDataDdt*, cachedSecondary*)
* indicate what is currently resident.
*
* Optical auxiliary buffers (sectorPrefix / sectorSuffix / subchannel / corrected variants) are populated only for
* images where those components exist (e.g., raw CD dumps). They may be NULL for block devices / nonoptical media.
*
* Index handling: indexEntries (UT_array) holds a flattened list of \ref IndexEntry structures (regardless of
* v1/v2/v3). hash_map_t *sectorHashMap provides fast duplicate detection keyed by content fingerprint / sparse sector
* key.
*
* Invariants / sanity expectations (not strictly enforced everywhere):
* - magic == AARU_MAGIC after successful open/create.
* - header.imageMajorVersion <= AARUF_VERSION.
* - imageStream != NULL when any I/O method is in progress.
* - If deduplicate == false, sectorHashMap may still be populated for bookkeeping but duplicates are stored
* independently.
* - If userDataDdtMini != NULL then userDataDdtBig == NULL (and vice versa) for a given level.
*/
#ifndef MD5_DIGEST_LENGTH
#define MD5_DIGEST_LENGTH 16
#endif
#ifndef SHA1_DIGEST_LENGTH
#define SHA1_DIGEST_LENGTH 20
#endif
#ifndef SHA256_DIGEST_LENGTH
#define SHA256_DIGEST_LENGTH 32
#endif
/** \struct CdEccContext
* \brief Lookup tables and state for Compact Disc EDC/ECC (P/Q) regeneration / verification.
*
* Fields may be lazily allocated; inited_edc indicates tables are ready.
*/
typedef struct CdEccContext
{
bool inited_edc; ///< True once EDC/ECC tables have been initialized.
uint8_t *ecc_b_table; ///< Backward (B) ECC table (allocated, size implementation-defined).
uint8_t *ecc_f_table; ///< Forward (F) ECC table.
uint32_t *edc_table; ///< EDC (CRC) lookup table.
} CdEccContext;
/** \struct Checksums
* \brief Collected wholeimage checksums / hashes present in a checksum block.
*
* Only hash arrays with corresponding has* flags set contain valid data. spamsum is a dynamically allocated
* NULterminated buffer (original SpamSum signature bytes followed by appended '\0').
*/
typedef struct Checksums
{
bool hasMd5; ///< True if md5[] buffer populated.
bool hasSha1; ///< True if sha1[] buffer populated.
bool hasSha256; ///< True if sha256[] buffer populated.
bool hasBlake3; ///< True if blake3[] buffer populated.
bool hasSpamSum; ///< True if spamsum pointer allocated and signature read.
uint8_t md5[MD5_DIGEST_LENGTH]; ///< MD5 digest (16 bytes).
uint8_t sha1[SHA1_DIGEST_LENGTH]; ///< SHA-1 digest (20 bytes).
uint8_t sha256[SHA256_DIGEST_LENGTH]; ///< SHA-256 digest (32 bytes).
uint8_t blake3[BLAKE3_OUT_LEN]; ///< BLAKE3 digest (32 bytes).
uint8_t *spamsum; ///< SpamSum fuzzy hash (ASCII), allocated length+1 with trailing 0.
} Checksums;
/** \struct mediaTagEntry
* \brief Hash table entry for an arbitrary media tag (e.g., proprietary drive/medium descriptor).
*
* Stored via uthash (hh handle). Type is a formatspecific integer identifier mapping to external interpretation.
*/
typedef struct mediaTagEntry
{
uint8_t *data; ///< Tag data blob (opaque to library core); length bytes long.
int32_t type; ///< Numeric type identifier.
uint32_t length; ///< Length in bytes of data.
UT_hash_handle hh; ///< uthash linkage.
} mediaTagEntry;
typedef struct TapeFileHashEntry
{
uint64_t key; ///< Composite key: partition << 32 | file
TapeFileEntry fileEntry; ///< The actual tape file data
UT_hash_handle hh; ///< UTHASH handle
} tapeFileHashEntry;
typedef struct TapePartitionHashEntry
{
uint8_t key; ///< Key: partition
TapePartitionEntry partitionEntry; ///< The actual tape partition data
UT_hash_handle hh; ///< UTHASH handle
} TapePartitionHashEntry;
typedef struct TapeDdtHashEntry
{
uint64_t key; ///< Key: sector address
uint64_t value; ///< Value: DDT entry
UT_hash_handle hh; ///< UTHASH handle
} TapeDdtHashEntry;
/** \struct aaruformat_context
* \brief Master context representing an open or increation Aaru image.
*
* Contains stream handle, parsed headers, deduplication structures, optical extras, metadata blocks, checksum
* information, caches, and write-state. Allocate with library factory (or zeroinit + explicit open) and destroy
* with corresponding close/free routine.
*
* Field grouping:
* - Core & header: magic, library*Version, imageStream, header.
* - Optical sector adjuncts: sectorPrefix/sectorSuffix/subchannel plus corrected variants & mode2_subheaders.
* - Deduplication: inMemoryDdt, userDataDdt*, userDataDdtHeader, mini/big/cached secondary arrays, version tags.
* - Metadata & geometry: geometryBlock, metadataBlockHeader+metadataBlock, cicmBlockHeader+cicmBlock, tracksHeader.
* - Tracks & hardware: trackEntries, dataTracks, dumpHardwareHeader, dumpHardwareEntriesWithData.
* - Integrity & ECC: checksums, eccCdContext, crc64Context.
* - Index & dedup lookup: indexEntries (UT_array of IndexEntry), sectorHashMap (duplicate detection), deduplicate
* flag.
* - Write path: isWriting, currentBlockHeader, writingBuffer(+position/offset), nextBlockPosition.
*
* Notes:
* - userDataDdt points to memory-mapped or fully loaded DDT (legacy path); userDataDdtMini / userDataDdtBig
* supersede.
* - shift retained for backward compatibility with earlier singlelevel address shift semantics.
* - mappedMemoryDdtSize is meaningful only if userDataDdt references an mmapped region.
*/
typedef struct aaruformat_context
{
/* Core & header */
uint64_t magic; ///< File magic (AARU_MAGIC) post-open.
AaruHeaderV2 header; ///< Parsed container header (v2).
FILE *imageStream; ///< Underlying FILE* stream (binary mode).
uint8_t library_major_version; ///< Linked library major version.
uint8_t library_minor_version; ///< Linked library minor version;
/* Deduplication tables (DDT) */
uint64_t *user_data_ddt; ///< Legacy flat DDT pointer (NULL when using v2 mini/big arrays).
TapeDdtHashEntry *tape_ddt; ///< Hash table root for tape DDT entries
uint32_t *sector_prefix_ddt; ///< Legacy CD sector prefix DDT (deprecated by *2).
uint32_t *sector_suffix_ddt; ///< Legacy CD sector suffix DDT.
uint64_t *sector_prefix_ddt2; ///< CD sector prefix DDT V2.
uint64_t *sector_suffix_ddt2; ///< CD sector suffix DDT V2.
uint64_t *user_data_ddt2; ///< DDT entries (big variant) primary/secondary current.
uint64_t *cached_secondary_ddt2; ///< Cached secondary table (big entries) or NULL.
DdtHeader2 user_data_ddt_header; ///< Active user data DDT v2 header (primary table meta).
uint64_t cached_ddt_offset; ///< File offset of currently cached secondary DDT (0=none).
uint64_t cached_ddt_position; ///< Position index of cached secondary DDT.
uint64_t primary_ddt_offset; ///< File offset of the primary DDT v2 table.
size_t mapped_memory_ddt_size; ///< Length of mmapped DDT if userDataDdt is mmapped.
int ddt_version; ///< DDT version in use (1=legacy, 2=v2 hierarchical).
uint8_t shift; ///< Legacy overall shift (deprecated by data_shift/table_shift).
bool in_memory_ddt; ///< True if primary (and possibly secondary) DDT loaded.
/* Optical auxiliary buffers (NULL if not present) */
uint8_t *sector_prefix; ///< Raw per-sector prefix (e.g., sync+header) uncorrected.
uint8_t *sector_prefix_corrected; ///< Corrected variant (post error correction) if stored.
uint8_t *sector_suffix; ///< Raw per-sector suffix (EDC/ECC) uncorrected.
uint8_t *sector_suffix_corrected; ///< Corrected suffix if stored separately.
uint8_t *sector_subchannel; ///< Raw 96-byte subchannel (if captured).
uint8_t *mode2_subheaders; ///< MODE2 Form1/Form2 8-byte subheaders (concatenated).
uint8_t *sector_id; ///< DVD sector ID (4 bytes) if present.
uint8_t *sector_ied; ///< DVD sector IED (2 bytes) if present.
uint8_t *sector_cpr_mai; ///< DVD sector CPR_MAI (6 bytes) if present.
uint8_t *sector_edc; ///< DVD sector EDC (4 bytes) if present.
uint8_t *sector_decrypted_title_key; ///< DVD decrypted title key (5 bytes) if present.
/* Metadata & geometry */
struct DumpHardwareEntriesWithData *dump_hardware_entries_with_data; ///< Array of dump hardware entries + strings.
uint8_t *metadata_block; ///< Raw metadata UTF-16LE concatenated strings.
uint8_t *cicm_block; ///< CICM XML payload.
uint8_t *json_block; ///< JSON metadata block payload (UTF-8).
uint8_t *creator; ///< Who (person) created the image?
uint8_t *media_title; ///< Title of the media represented by the image
uint8_t *comments; ///< Image comments
uint8_t *media_manufacturer; ///< Manufacturer of the media represented by the image
uint8_t *media_model; ///< Model of the media represented by the image
uint8_t *media_serial_number; ///< Serial number of the media represented by the image
uint8_t *media_barcode; ///< Barcode of the media represented by the image
uint8_t *media_part_number; ///< Part number of the media represented by the image
uint8_t *drive_manufacturer; ///< Manufacturer of the drive used to read the media represented by the image
uint8_t *drive_model; ///< Model of the drive used to read the media represented by the image
uint8_t *drive_serial_number; ///< Serial number of the drive used to read the media represented by the image
uint8_t
*drive_firmware_revision; ///< Firmware revision of the drive used to read the media represented by the image
GeometryBlockHeader geometry_block; ///< Logical geometry block (if present).
MetadataBlockHeader metadata_block_header; ///< Metadata block header.
CicmMetadataBlock cicm_block_header; ///< CICM metadata header (if present).
DumpHardwareHeader dump_hardware_header; ///< Dump hardware header.
AaruMetadataJsonBlockHeader json_block_header; ///< JSON metadata block header (if present).
uint32_t cylinders; ///< Cylinders of the media represented by the image
uint32_t heads; ///< Heads of the media represented by the image
uint32_t sectors_per_track; ///< Sectors per track of the media represented by the image (for variable image, the
///< smallest)
int32_t media_sequence; ///< Number in sequence for the media represented by the image
int32_t last_media_sequence; ///< Last media of the sequence the media represented by the image corresponds to
/* Optical information */
TrackEntry *track_entries; ///< Full track list (tracksHeader.entries elements).
TrackEntry *data_tracks; ///< Filtered list of data tracks (subset of trackEntries).
TracksHeader tracks_header; ///< Tracks header (optical) if present.
uint8_t number_of_data_tracks; ///< Count of tracks considered "data" (sequence 1..99 heuristics).
/* Integrity & ECC */
CdEccContext *ecc_cd_context; ///< CD ECC/EDC helper tables (allocated on demand).
crc64_ctx *crc64_context; ///< Opaque CRC64 context for streaming updates.
/* Index & deduplication lookup */
UT_array *index_entries; ///< Flattened index entries (UT_array of IndexEntry).
hash_map_t *sector_hash_map; ///< Deduplication hash map (fingerprint->entry mapping).
/* Caches */
struct CacheHeader block_header_cache; ///< LRU/Cache header for block headers.
struct CacheHeader block_cache; ///< LRU/Cache header for block payloads.
/* High-level summary */
ImageInfo image_info; ///< Exposed high-level image info summary.
/* Tags */
bool *readableSectorTags; ///< Per-sector boolean array (optical tags read successfully?).
mediaTagEntry *mediaTags; ///< Hash table of extra media tags (uthash root).
/* Checksums */
spamsum_ctx *spamsum_context; ///< Opaque SpamSum context for streaming updates
blake3_hasher *blake3_context; ///< Opaque BLAKE3 context for streaming updates
Checksums checksums; ///< Whole-image checksums discovered.
md5_ctx md5_context; ///< Opaque MD5 context for streaming updates
sha1_ctx sha1_context; ///< Opaque SHA-1 context for streaming updates
sha256_ctx sha256_context; ///< Opaque SHA-256 context for streaming updates
bool calculating_md5; ///< True if whole-image MD5 being calculated on-the-fly.
bool calculating_sha1; ///< True if whole-image SHA-1 being calculated on-the-fly.
bool calculating_sha256; ///< True if whole-image SHA-256 being calculated on-the-fly.
bool calculating_spamsum; ///< True if whole-image SpamSum being calculated on-the-fly.
bool calculating_blake3; ///< True if whole-image BLAKE3 being calculated on-the-fly.
/* Write path */
uint8_t *writing_buffer; ///< Accumulation buffer for current block data.
BlockHeader current_block_header; ///< Header for block currently being assembled (write path).
uint64_t next_block_position; ///< Absolute file offset where next block will be written.
uint64_t last_written_block; ///< Last written block number (write path).
size_t sector_prefix_length; ///< Length of sector_prefix
size_t sector_suffix_length; ///< Length of sector_suffix
size_t sector_prefix_offset; ///< Current position in sector_prefix
size_t sector_suffix_offset; ///< Current position in sector_suffix
int current_block_offset; ///< Logical offset inside block (units: bytes or sectors depending on path).
int writing_buffer_position; ///< Current size / position within writingBuffer.
uint8_t current_track_type; ///< Current track type (when writing optical images with tracks, needed for block
///< compression type).
bool is_writing; ///< True if context opened/created for writing.
bool rewinded; ///< True if stream has been rewound after open (write path).
bool writing_long; ///< True if writing long sectors
bool block_zero_written; ///< True if block zero has been written (writing path).
/* Options */
uint32_t lzma_dict_size; ///< LZMA dictionary size (writing path).
bool deduplicate; ///< Storage deduplication active (duplicates coalesce).
bool compression_enabled; ///< True if block compression enabled (writing path).
/* Tape-specific structures */
tapeFileHashEntry *tape_files; ///< Hash table root for tape files
TapePartitionHashEntry *tape_partitions; ///< Hash table root for tape partitions
bool is_tape; ///< True if the image is a tape image
/* Dirty flags (controls write behavior in close.c) */
bool dirty_secondary_ddt; ///< True if secondary DDT tables should be written during close
bool dirty_primary_ddt; ///< True if primary DDT table should be written during close
bool dirty_single_level_ddt; ///< True if single-level DDT should be written during close
bool dirty_checksum_block; ///< True if checksum block should be written during close
bool dirty_tracks_block; ///< True if tracks block should be written during close
bool dirty_mode2_subheaders_block; ///< True if MODE2 subheader block should be written during close
bool dirty_sector_prefix_block; ///< True if sector prefix block should be written during close
bool dirty_sector_prefix_ddt; ///< True if sector prefix DDT should be written during close
bool dirty_sector_suffix_block; ///< True if sector suffix block should be written during close
bool dirty_sector_suffix_ddt; ///< True if sector suffix DDT should be written during close
bool dirty_sector_subchannel_block; ///< True if subchannel block should be written during close
bool dirty_dvd_long_sector_blocks; ///< True if DVD long sector blocks should be written during close
bool dirty_dvd_title_key_decrypted_block; ///< True if decrypted title key block should be written during close
bool dirty_media_tags; ///< True if media tags should be written during close
bool dirty_tape_ddt; ///< True if tape DDT should be written during close
bool dirty_tape_file_block; ///< True if tape file block should be written during close
bool dirty_tape_partition_block; ///< True if tape partition block should be written during close
bool dirty_geometry_block; ///< True if geometry block should be written during close
bool dirty_metadata_block; ///< True if metadata block should be written during close
bool dirty_dumphw_block; ///< True if dump hardware block should be written during close
bool dirty_cicm_block; ///< True if CICM metadata block should be written during close
bool dirty_json_block; ///< True if JSON metadata block should be written during close
bool dirty_index_block; ///< True if index block should be written during close
} aaruformat_context;
/** \struct DumpHardwareEntriesWithData
* \brief In-memory representation of a dump hardware entry plus decoded variable-length fields & extents.
*
* All string pointers are NUL-terminated UTF-8 copies of on-disk data (or NULL if absent). extents array may be NULL
* when no ranges were recorded. Freed during context teardown.
*/
typedef struct DumpHardwareEntriesWithData
{
DumpHardwareEntry entry; ///< Fixed-size header with lengths & counts.
struct DumpExtent *extents; ///< Array of extents (entry.extents elements) or NULL.
uint8_t *manufacturer; ///< Manufacturer string (UTF-8) or NULL.
uint8_t *model; ///< Model string or NULL.
uint8_t *revision; ///< Hardware revision string or NULL.
uint8_t *firmware; ///< Firmware version string or NULL.
uint8_t *serial; ///< Serial number string or NULL.
uint8_t *softwareName; ///< Dump software name or NULL.
uint8_t *softwareVersion; ///< Dump software version or NULL.
uint8_t *softwareOperatingSystem; ///< Host operating system string or NULL.
} DumpHardwareEntriesWithData;
#pragma pack(push, 1)
/** \struct DumpExtent
* \brief Inclusive [start,end] logical sector range contributed by a single hardware environment.
*/
typedef struct DumpExtent
{
uint64_t start; ///< Starting LBA (inclusive).
uint64_t end; ///< Ending LBA (inclusive); >= start.
} DumpExtent;
#pragma pack(pop)
#endif // LIBAARUFORMAT_CONTEXT_H