Files
libaaruformat/include/aaruformat/context.h

340 lines
20 KiB
C
Raw Normal View History

2022-05-28 12:57:21 +01:00
/*
* This file is part of the Aaru Data Preservation Suite.
2025-08-01 21:19:45 +01:00
* Copyright (c) 2019-2025 Natalia Portillo.
2022-05-28 12:57:21 +01:00
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
2020-03-01 19:53:05 +00:00
#ifndef LIBAARUFORMAT_CONTEXT_H
#define LIBAARUFORMAT_CONTEXT_H
#include "blake3.h"
#include "crc64.h"
#include "hash_map.h"
2022-10-02 16:05:25 +01:00
#include "lru.h"
#include "md5.h"
2025-10-03 01:49:44 +01:00
#include "sha1.h"
#include "sha256.h"
#include "spamsum.h"
2022-05-28 12:10:04 +01:00
#include "structs.h"
#include "utarray.h"
2022-05-28 12:10:04 +01:00
/** \file aaruformat/context.h
* \brief Central runtime context structures for libaaruformat (image state, caches, checksum buffers).
*
* The principal structure, \ref aaruformatContext, aggregates: header metadata, open stream handle, deduplication
* tables (DDT) currently in memory, optical disc auxiliary data (sector prefix/suffix/subchannel), track listings,
* geometry & metadata blocks, checksum accumulators, CRC & ECC helper contexts, hash map for deduplication, and
* transient write buffers.
*
* Memory ownership model (unless otherwise stated): if a pointer field is non-NULL it is owned by the context and
* will be freed (or otherwise released) during context close / destruction. Callers must not free or reallocate
* these pointers directly. External callers should treat all internal buffers as readonly unless explicitly writing.
*
* Threading: a single context instance is NOT thread-safe; serialize access if used across threads.
* Lifetime: allocate, initialize/open, perform read/write/verify operations, then close/free.
*
* Deduplication tables (DDT): only a subset (primary table + an active secondary + optional cache) is retained in RAM;
* large images may rely on lazy loading of secondary tables. Flags (inMemoryDdt, userDataDdt*, cachedSecondary*)
* indicate what is currently resident.
*
* Optical auxiliary buffers (sectorPrefix / sectorSuffix / subchannel / corrected variants) are populated only for
* images where those components exist (e.g., raw CD dumps). They may be NULL for block devices / nonoptical media.
*
* Index handling: indexEntries (UT_array) holds a flattened list of \ref IndexEntry structures (regardless of
* v1/v2/v3). hash_map_t *sectorHashMap provides fast duplicate detection keyed by content fingerprint / sparse sector
* key.
*
* Invariants / sanity expectations (not strictly enforced everywhere):
* - magic == AARU_MAGIC after successful open/create.
* - header.imageMajorVersion <= AARUF_VERSION.
* - imageStream != NULL when any I/O method is in progress.
* - If deduplicate == false, sectorHashMap may still be populated for bookkeeping but duplicates are stored
* independently.
* - If userDataDdtMini != NULL then userDataDdtBig == NULL (and vice versa) for a given level.
*/
2022-10-04 19:44:34 +01:00
#ifndef MD5_DIGEST_LENGTH
#define MD5_DIGEST_LENGTH 16
#endif
#ifndef SHA1_DIGEST_LENGTH
#define SHA1_DIGEST_LENGTH 20
#endif
#ifndef SHA256_DIGEST_LENGTH
#define SHA256_DIGEST_LENGTH 32
#endif
/** \struct CdEccContext
* \brief Lookup tables and state for Compact Disc EDC/ECC (P/Q) regeneration / verification.
*
* Fields may be lazily allocated; inited_edc indicates tables are ready.
*/
typedef struct CdEccContext
{
bool inited_edc; ///< True once EDC/ECC tables have been initialized.
uint8_t *ecc_b_table; ///< Backward (B) ECC table (allocated, size implementation-defined).
uint8_t *ecc_f_table; ///< Forward (F) ECC table.
uint32_t *edc_table; ///< EDC (CRC) lookup table.
2025-08-01 02:48:16 +01:00
} CdEccContext;
/** \struct Checksums
* \brief Collected wholeimage checksums / hashes present in a checksum block.
*
* Only hash arrays with corresponding has* flags set contain valid data. spamsum is a dynamically allocated
* NULterminated buffer (original SpamSum signature bytes followed by appended '\0').
*/
2022-10-04 19:44:34 +01:00
typedef struct Checksums
{
bool hasMd5; ///< True if md5[] buffer populated.
bool hasSha1; ///< True if sha1[] buffer populated.
bool hasSha256; ///< True if sha256[] buffer populated.
bool hasBlake3; ///< True if blake3[] buffer populated.
bool hasSpamSum; ///< True if spamsum pointer allocated and signature read.
uint8_t md5[MD5_DIGEST_LENGTH]; ///< MD5 digest (16 bytes).
uint8_t sha1[SHA1_DIGEST_LENGTH]; ///< SHA-1 digest (20 bytes).
uint8_t sha256[SHA256_DIGEST_LENGTH]; ///< SHA-256 digest (32 bytes).
uint8_t blake3[BLAKE3_OUT_LEN]; ///< BLAKE3 digest (32 bytes).
uint8_t *spamsum; ///< SpamSum fuzzy hash (ASCII), allocated length+1 with trailing 0.
2022-10-04 19:44:34 +01:00
} Checksums;
/** \struct mediaTagEntry
* \brief Hash table entry for an arbitrary media tag (e.g., proprietary drive/medium descriptor).
*
* Stored via uthash (hh handle). Type is a formatspecific integer identifier mapping to external interpretation.
*/
2022-10-04 20:32:26 +01:00
typedef struct mediaTagEntry
{
uint8_t *data; ///< Tag data blob (opaque to library core); length bytes long.
int32_t type; ///< Numeric type identifier.
uint32_t length; ///< Length in bytes of data.
UT_hash_handle hh; ///< uthash linkage.
2022-10-04 20:32:26 +01:00
} mediaTagEntry;
typedef struct TapeFileHashEntry
{
uint64_t key; ///< Composite key: partition << 32 | file
TapeFileEntry fileEntry; ///< The actual tape file data
UT_hash_handle hh; ///< UTHASH handle
} tapeFileHashEntry;
typedef struct TapePartitionHashEntry
{
uint8_t key; ///< Key: partition
TapePartitionEntry partitionEntry; ///< The actual tape partition data
UT_hash_handle hh; ///< UTHASH handle
} TapePartitionHashEntry;
typedef struct TapeDdtHashEntry
{
uint64_t key; ///< Key: sector address
uint32_t value; ///< Value: DDT entry
UT_hash_handle hh; ///< UTHASH handle
} TapeDdtHashEntry;
/** \struct aaruformatContext
* \brief Master context representing an open or increation Aaru image.
*
* Contains stream handle, parsed headers, deduplication structures, optical extras, metadata blocks, checksum
* information, caches, and write-state. Allocate with library factory (or zeroinit + explicit open) and destroy
* with corresponding close/free routine.
*
* Field grouping:
* - Core & header: magic, library*Version, imageStream, header.
* - Optical sector adjuncts: sectorPrefix/sectorSuffix/subchannel plus corrected variants & mode2_subheaders.
* - Deduplication: inMemoryDdt, userDataDdt*, userDataDdtHeader, mini/big/cached secondary arrays, version tags.
* - Metadata & geometry: geometryBlock, metadataBlockHeader+metadataBlock, cicmBlockHeader+cicmBlock, tracksHeader.
* - Tracks & hardware: trackEntries, dataTracks, dumpHardwareHeader, dumpHardwareEntriesWithData.
* - Integrity & ECC: checksums, eccCdContext, crc64Context.
* - Index & dedup lookup: indexEntries (UT_array of IndexEntry), sectorHashMap (duplicate detection), deduplicate
* flag.
* - Write path: isWriting, currentBlockHeader, writingBuffer(+position/offset), nextBlockPosition.
*
* Notes:
* - userDataDdt points to memory-mapped or fully loaded DDT (legacy path); userDataDdtMini / userDataDdtBig
* supersede.
* - shift retained for backward compatibility with earlier singlelevel address shift semantics.
* - mappedMemoryDdtSize is meaningful only if userDataDdt references an mmapped region.
*/
2020-03-01 19:51:13 +00:00
typedef struct aaruformatContext
{
/* Core & header */
uint64_t magic; ///< File magic (AARU_MAGIC) post-open.
AaruHeaderV2 header; ///< Parsed container header (v2).
uint8_t libraryMajorVersion; ///< Linked library major version.
uint8_t libraryMinorVersion; ///< Linked library minor version.
FILE *imageStream; ///< Underlying FILE* stream (binary mode).
/* Deduplication tables (DDT) */
uint8_t shift; ///< Legacy overall shift (deprecated by data_shift/table_shift).
bool inMemoryDdt; ///< True if primary (and possibly secondary) DDT loaded.
uint64_t *userDataDdt; ///< Legacy flat DDT pointer (NULL when using v2 mini/big arrays).
size_t mappedMemoryDdtSize; ///< Length of mmapped DDT if userDataDdt is mmapped.
uint32_t *sectorPrefixDdt; ///< Legacy CD sector prefix DDT (deprecated by *2).
uint32_t *sectorSuffixDdt; ///< Legacy CD sector suffix DDT.
uint32_t *sectorPrefixDdt2; ///< CD sector prefix DDT V2.
uint32_t *sectorSuffixDdt2; ///< CD sector suffix DDT V2.
TapeDdtHashEntry *tapeDdt; ///< Hash table root for tape DDT entries
DdtHeader2 userDataDdtHeader; ///< Active user data DDT v2 header (primary table meta).
int ddtVersion; ///< DDT version in use (1=legacy, 2=v2 hierarchical).
uint32_t *userDataDdtBig; ///< DDT entries (big variant) primary/secondary current.
uint64_t cachedDdtOffset; ///< File offset of currently cached secondary DDT (0=none).
uint64_t cachedDdtPosition; ///< Position index of cached secondary DDT.
uint64_t primaryDdtOffset; ///< File offset of the primary DDT v2 table.
uint32_t *cachedSecondaryDdtBig; ///< Cached secondary table (big entries) or NULL.
/* Optical auxiliary buffers (NULL if not present) */
uint8_t *sector_prefix; ///< Raw per-sector prefix (e.g., sync+header) uncorrected.
uint8_t *sectorPrefixCorrected; ///< Corrected variant (post error correction) if stored.
uint8_t *sector_suffix; ///< Raw per-sector suffix (EDC/ECC) uncorrected.
uint8_t *sectorSuffixCorrected; ///< Corrected suffix if stored separately.
uint8_t *sector_subchannel; ///< Raw 96-byte subchannel (if captured).
uint8_t *mode2_subheaders; ///< MODE2 Form1/Form2 8-byte subheaders (concatenated).
uint8_t *sector_id; ///< DVD sector ID (4 bytes) if present.
uint8_t *sector_ied; ///< DVD sector IED (2 bytes) if present.
uint8_t *sector_cpr_mai; ///< DVD sector CPR_MAI (6 bytes) if present.
uint8_t *sector_edc; ///< DVD sector EDC (4 bytes) if present.
uint8_t *sector_decrypted_title_key; ///< DVD decrypted title key (5 bytes) if present.
/* Metadata & geometry */
GeometryBlockHeader geometryBlock; ///< Logical geometry block (if present).
MetadataBlockHeader metadataBlockHeader; ///< Metadata block header.
uint8_t *metadataBlock; ///< Raw metadata UTF-16LE concatenated strings.
CicmMetadataBlock cicmBlockHeader; ///< CICM metadata header (if present).
uint8_t *cicmBlock; ///< CICM XML payload.
DumpHardwareHeader dumpHardwareHeader; ///< Dump hardware header.
struct DumpHardwareEntriesWithData *dumpHardwareEntriesWithData; ///< Array of dump hardware entries + strings.
AaruMetadataJsonBlockHeader jsonBlockHeader; ///< JSON metadata block header (if present).
uint8_t *jsonBlock; ///< JSON metadata block payload (UTF-8).
uint8_t *Creator; ///< Who (person) created the image?
uint8_t *MediaTitle; ///< Title of the media represented by the image
uint8_t *Comments; ///< Image comments
uint8_t *MediaManufacturer; ///< Manufacturer of the media represented by the image
uint8_t *MediaModel; ///< Model of the media represented by the image
uint8_t *MediaSerialNumber; ///< Serial number of the media represented by the image
uint8_t *MediaBarcode; ///< Barcode of the media represented by the image
uint8_t *MediaPartNumber; ///< Part number of the media represented by the image
uint8_t *DriveManufacturer; ///< Manufacturer of the drive used to read the media represented by the image
uint8_t *DriveModel; ///< Model of the drive used to read the media represented by the image
uint8_t *DriveSerialNumber; ///< Serial number of the drive used to read the media represented by the image
uint8_t *DriveFirmwareRevision; ///< Firmware revision of the drive used to read the media represented by the image
int32_t MediaSequence; ///< Number in sequence for the media represented by the image
int32_t LastMediaSequence; ///< Last media of the sequence the media represented by the image corresponds to
uint32_t Cylinders; ///< Cylinders of the media represented by the image
uint32_t Heads; ///< Heads of the media represented by the image
uint32_t SectorsPerTrack; ///< Sectors per track of the media represented by the image (for variable image, the
///< smallest)
/* Optical information */
TracksHeader tracksHeader; ///< Tracks header (optical) if present.
TrackEntry *trackEntries; ///< Full track list (tracksHeader.entries elements).
uint8_t numberOfDataTracks; ///< Count of tracks considered "data" (sequence 1..99 heuristics).
TrackEntry *dataTracks; ///< Filtered list of data tracks (subset of trackEntries).
/* Integrity & ECC */
CdEccContext *eccCdContext; ///< CD ECC/EDC helper tables (allocated on demand).
crc64_ctx *crc64Context; ///< Opaque CRC64 context for streaming updates.
/* Index & deduplication lookup */
UT_array *indexEntries; ///< Flattened index entries (UT_array of IndexEntry).
hash_map_t *sectorHashMap; ///< Deduplication hash map (fingerprint->entry mapping).
2025-10-09 01:47:39 +01:00
/* Caches */
struct CacheHeader blockHeaderCache; ///< LRU/Cache header for block headers.
struct CacheHeader blockCache; ///< LRU/Cache header for block payloads.
/* High-level summary */
ImageInfo imageInfo; ///< Exposed high-level image info summary.
/* Tags */
bool *readableSectorTags; ///< Per-sector boolean array (optical tags read successfully?).
mediaTagEntry *mediaTags; ///< Hash table of extra media tags (uthash root).
/* Checksums */
Checksums checksums; ///< Whole-image checksums discovered.
bool calculating_md5; ///< True if whole-image MD5 being calculated on-the-fly.
md5_ctx md5_context; ///< Opaque MD5 context for streaming updates
bool calculating_sha1; ///< True if whole-image SHA-1 being calculated on-the-fly.
sha1_ctx sha1_context; ///< Opaque SHA-1 context for streaming updates
bool calculating_sha256; ///< True if whole-image SHA-256 being calculated on-the-fly.
sha256_ctx sha256_context; ///< Opaque SHA-256 context for streaming updates
bool calculating_spamsum; ///< True if whole-image SpamSum being calculated on-the-fly.
spamsum_ctx *spamsum_context; ///< Opaque SpamSum context for streaming updates
bool calculating_blake3; ///< True if whole-image BLAKE3 being calculated on-the-fly.
blake3_hasher *blake3_context; ///< Opaque BLAKE3 context for streaming updates
/* Write path */
bool isWriting; ///< True if context opened/created for writing.
BlockHeader currentBlockHeader; ///< Header for block currently being assembled (write path).
uint8_t *writingBuffer; ///< Accumulation buffer for current block data.
int currentBlockOffset; ///< Logical offset inside block (units: bytes or sectors depending on path).
int writingBufferPosition; ///< Current size / position within writingBuffer.
uint64_t nextBlockPosition; ///< Absolute file offset where next block will be written.
bool rewinded; ///< True if stream has been rewound after open (write path).
uint64_t last_written_block; ///< Last written block number (write path).
uint8_t currentTrackType; ///< Current track type (when writing optical images with tracks, needed for block
///< compression type).
bool writingLong; ///< True if writing long sectors
size_t sector_prefix_length; ///< Length of sector_prefix
size_t sector_suffix_length; ///< Length of sector_suffix
size_t sector_prefix_offset; ///< Current position in sector_prefix
size_t sector_suffix_offset; ///< Current position in sector_suffix
/* Options */
bool deduplicate; ///< Storage deduplication active (duplicates coalesce).
bool compression_enabled; ///< True if block compression enabled (writing path).
uint32_t lzma_dict_size; ///< LZMA dictionary size (writing path).
/* Tape-specific structures */
tapeFileHashEntry *tapeFiles; ///< Hash table root for tape files
TapePartitionHashEntry *tapePartitions; ///< Hash table root for tape partitions
bool is_tape; ///< True if the image is a tape image
2020-03-01 19:51:13 +00:00
} aaruformatContext;
/** \struct DumpHardwareEntriesWithData
* \brief In-memory representation of a dump hardware entry plus decoded variable-length fields & extents.
*
* All string pointers are NUL-terminated UTF-8 copies of on-disk data (or NULL if absent). extents array may be NULL
* when no ranges were recorded. Freed during context teardown.
*/
2019-03-20 00:23:30 +00:00
typedef struct DumpHardwareEntriesWithData
{
DumpHardwareEntry entry; ///< Fixed-size header with lengths & counts.
struct DumpExtent *extents; ///< Array of extents (entry.extents elements) or NULL.
uint8_t *manufacturer; ///< Manufacturer string (UTF-8) or NULL.
uint8_t *model; ///< Model string or NULL.
uint8_t *revision; ///< Hardware revision string or NULL.
uint8_t *firmware; ///< Firmware version string or NULL.
uint8_t *serial; ///< Serial number string or NULL.
uint8_t *softwareName; ///< Dump software name or NULL.
uint8_t *softwareVersion; ///< Dump software version or NULL.
uint8_t *softwareOperatingSystem; ///< Host operating system string or NULL.
2019-03-20 00:23:30 +00:00
} DumpHardwareEntriesWithData;
#pragma pack(push, 1)
/** \struct DumpExtent
* \brief Inclusive [start,end] logical sector range contributed by a single hardware environment.
*/
2019-03-20 00:23:30 +00:00
typedef struct DumpExtent
{
uint64_t start; ///< Starting LBA (inclusive).
uint64_t end; ///< Ending LBA (inclusive); >= start.
2019-03-20 00:23:30 +00:00
} DumpExtent;
#pragma pack(pop)
2024-04-30 15:51:32 +01:00
#endif // LIBAARUFORMAT_CONTEXT_H