Add support for tape DDT management with hash table implementation

This commit is contained in:
2025-10-07 18:03:19 +01:00
parent c9231a291e
commit 2ec5aabd00
6 changed files with 560 additions and 76 deletions

View File

@@ -137,6 +137,13 @@ typedef struct TapePartitionHashEntry
UT_hash_handle hh; ///< UTHASH handle UT_hash_handle hh; ///< UTHASH handle
} TapePartitionHashEntry; } TapePartitionHashEntry;
typedef struct TapeDdtHashEntry
{
uint64_t key; ///< Key: sector address
uint32_t value; ///< Value: DDT entry
UT_hash_handle hh; ///< UTHASH handle
} TapeDdtHashEntry;
/** \struct aaruformatContext /** \struct aaruformatContext
* \brief Master context representing an open or increation Aaru image. * \brief Master context representing an open or increation Aaru image.
* *
@@ -265,6 +272,8 @@ typedef struct aaruformatContext
tapeFileHashEntry *tapeFiles; ///< Hash table root for tape files tapeFileHashEntry *tapeFiles; ///< Hash table root for tape files
TapePartitionHashEntry *tapePartitions; ///< Hash table root for tape partitions TapePartitionHashEntry *tapePartitions; ///< Hash table root for tape partitions
bool is_tape; ///< True if the image is a tape image
TapeDdtHashEntry *tapeDdt; ///< Hash table root for tape DDT entries
} aaruformatContext; } aaruformatContext;
/** \struct DumpHardwareEntriesWithData /** \struct DumpHardwareEntriesWithData

View File

@@ -71,7 +71,7 @@ AARU_EXPORT void *AARU_CALL aaruf_create(const char *filepath, uint32_t media_ty
uint64_t user_sectors, uint64_t negative_sectors, uint64_t overflow_sectors, uint64_t user_sectors, uint64_t negative_sectors, uint64_t overflow_sectors,
const char *options, const uint8_t *application_name, const char *options, const uint8_t *application_name,
uint8_t application_name_length, uint8_t application_major_version, uint8_t application_name_length, uint8_t application_major_version,
uint8_t application_minor_version); uint8_t application_minor_version, bool is_tape);
AARU_EXPORT int AARU_CALL aaruf_close(void *context); AARU_EXPORT int AARU_CALL aaruf_close(void *context);

View File

@@ -54,6 +54,8 @@ bool set_ddt_single_level_v2(aaruformatContext *ctx, uint64_t sector_addres
uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry); uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry);
bool set_ddt_multi_level_v2(aaruformatContext *ctx, uint64_t sector_address, bool negative, uint64_t offset, bool set_ddt_multi_level_v2(aaruformatContext *ctx, uint64_t sector_address, bool negative, uint64_t offset,
uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry); uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry);
bool set_ddt_tape(aaruformatContext *ctx, uint64_t sector_address, uint64_t offset, uint64_t block_offset,
uint8_t sector_status, uint64_t *ddt_entry);
aaru_options parse_options(const char *options); aaru_options parse_options(const char *options);
uint64_t get_filetime_uint64(); uint64_t get_filetime_uint64();
int32_t aaruf_close_current_block(aaruformatContext *ctx); int32_t aaruf_close_current_block(aaruformatContext *ctx);

View File

@@ -536,6 +536,156 @@ static int32_t write_single_level_ddt(aaruformatContext *ctx)
return AARUF_STATUS_OK; return AARUF_STATUS_OK;
} }
/**
* @brief Converts tape DDT hash table to array format and writes it as a single-level DDT.
*
* This function is specifically designed for tape media images where sectors have been tracked
* using a sparse hash table (UTHASH) during write operations. It converts the hash-based tape
* DDT into a traditional array-based DDT structure suitable for serialization to disk. The
* function performs a complete transformation from the sparse hash representation to a dense
* array representation, then delegates the actual write operation to write_single_level_ddt().
*
* The conversion process involves:
* 1. Validating the context is for tape media
* 2. Scanning the hash table to determine the maximum sector address (key)
* 3. Allocating a contiguous array large enough to hold all entries up to max_key
* 4. Populating the array by copying hash table entries to their corresponding indices
* 5. Initializing a DDT v2 header with appropriate metadata
* 6. Calling write_single_level_ddt() to serialize the DDT to disk
*
* **Hash Table to Array Conversion:**
* The tape DDT hash table uses sector addresses as keys and DDT entries as values. This function
* creates a zero-initialized array of size (max_key + 1) and copies each hash entry to
* array[entry->key] = entry->value. Sectors not present in the hash table remain as zero entries
* in the array, which indicates SectorStatusNotDumped in the DDT format.
*
* **Memory Allocation:**
* The function always uses BigDdtSizeType (32-bit entries) for tape DDTs, allocating
* (max_key + 1) * sizeof(uint32_t) bytes. This provides sufficient capacity for the 28-bit
* data + 4-bit status encoding used in tape DDT entries.
*
* **DDT Header Configuration:**
* The userDataDdtHeader is configured for a single-level DDT v2 structure:
* - identifier: DeDuplicationTable2
* - type: UserData
* - compression: Determined by ctx->compression_enabled (Lzma or None)
* - levels: 1 (single-level structure)
* - tableLevel: 0 (top-level table)
* - tableShift: 0 (no multi-level indirection)
* - sizeType: BigDdtSizeType (32-bit entries)
* - entries/blocks: max_key + 1
* - negative/overflow: 0 (not used for tape)
*
* @param ctx Pointer to the aaruformat context. Must not be NULL and must be in write mode.
* The context must have is_tape set to true and tapeDdt hash table populated.
* The ctx->userDataDdtBig array will be allocated and populated by this function.
* The ctx->userDataDdtHeader will be initialized with DDT metadata.
*
* @return Returns one of the following status codes:
* @retval AARUF_STATUS_OK (0) Successfully converted and wrote the tape DDT. This occurs when:
* - The context is valid and is_tape is true
* - Memory allocation for the DDT array succeeds
* - The hash table entries are successfully copied to the array
* - write_single_level_ddt() completes successfully
* - The DDT is written to disk and indexed
*
* @retval AARUF_STATUS_INVALID_CONTEXT (-2) The context is not for tape media. This occurs when:
* - ctx->is_tape is false
* - This function was called for a disk/optical image instead of tape
*
* @retval AARUF_ERROR_NOT_ENOUGH_MEMORY (-6) Memory allocation failed. This occurs when:
* - calloc() fails to allocate the userDataDdtBig array
* - Insufficient system memory for (max_key + 1) * 4 bytes
*
* @retval AARUF_ERROR_CANNOT_WRITE_HEADER (-8) Writing the DDT failed. This can occur when:
* - write_single_level_ddt() fails to write the DDT header
* - File I/O errors prevent writing the DDT data
* - Disk full or other storage errors
* - This error is propagated from write_single_level_ddt()
*
* @note This function is only called during image finalization (aaruf_close) for tape images.
* It should not be called for disk or optical media images.
*
* @note Hash Table Iteration:
* - Uses HASH_ITER macro from UTHASH to safely traverse all entries
* - Finds maximum key in first pass to determine array size
* - Copies entries in second pass to populate the array
* - Empty (zero) array slots represent sectors not written to tape
*
* @note Memory Ownership:
* - Allocates ctx->userDataDdtBig which becomes owned by the context
* - The allocated array is freed during context cleanup (not in this function)
* - The original hash table (ctx->tapeDdt) is freed separately during cleanup
*
* @note Single-Level DDT Choice:
* - Tape DDTs always use single-level structure (tableShift = 0)
* - Multi-level DDTs are not used because tape access patterns are typically sparse
* - The hash table already provides efficient sparse storage during write
* - Conversion to dense array only happens once at close time
*
* @note Compression:
* - The actual compression is handled by write_single_level_ddt()
* - Compression type is determined by ctx->compression_enabled flag
* - If enabled, LZMA compression is applied to the DDT array
* - Compression may be disabled if it doesn't reduce size
*
* @warning The function assumes tapeDdt hash table is properly populated. An empty hash table
* will result in a DDT with a single zero entry (max_key = 0, entries = 1).
*
* @warning This function modifies ctx->userDataDdtHeader and ctx->userDataDdtBig. These must
* not be modified by other code during the close operation.
*
* @warning The allocated array size is (max_key + 1), which could be very large if tape sectors
* have high addresses with sparse distribution. Memory usage should be considered.
*
* @see set_ddt_tape() for how entries are added to the hash table during write operations
* @see write_single_level_ddt() for the actual DDT serialization logic
* @see TapeDdtHashEntry for the hash table entry structure
* @internal
*/
static int32_t write_tape_ddt(aaruformatContext *ctx)
{
if(!ctx->is_tape) return AARUF_STATUS_INVALID_CONTEXT;
// Traverse the tape DDT uthash and find the biggest key
uint64_t max_key = 0;
TapeDdtHashEntry *entry, *tmp;
HASH_ITER(hh, ctx->tapeDdt, entry, tmp)
if(entry->key > max_key) max_key = entry->key;
// Initialize context user data DDT header
ctx->userDataDdtHeader.identifier = DeDuplicationTable2;
ctx->userDataDdtHeader.type = UserData;
ctx->userDataDdtHeader.compression = ctx->compression_enabled ? Lzma : None;
ctx->userDataDdtHeader.levels = 1; // Single level
ctx->userDataDdtHeader.tableLevel = 0; // Top level
ctx->userDataDdtHeader.previousLevelOffset = 0; // No previous level for single-level DDT
ctx->userDataDdtHeader.negative = 0;
ctx->userDataDdtHeader.overflow = 0;
ctx->userDataDdtHeader.tableShift = 0; // Single level
ctx->userDataDdtHeader.sizeType = BigDdtSizeType;
ctx->userDataDdtHeader.entries = max_key + 1;
ctx->userDataDdtHeader.blocks = max_key + 1;
ctx->userDataDdtHeader.start = 0;
ctx->userDataDdtHeader.length = ctx->userDataDdtHeader.entries * sizeof(uint32_t);
ctx->userDataDdtHeader.cmpLength = ctx->userDataDdtHeader.length;
// Initialize memory for user data DDT
ctx->userDataDdtBig = calloc(ctx->userDataDdtHeader.entries, sizeof(uint32_t));
if(ctx->userDataDdtBig == NULL)
{
TRACE("Failed to allocate memory for tape DDT table");
return AARUF_ERROR_NOT_ENOUGH_MEMORY;
}
// Populate user data DDT from tape DDT uthash
HASH_ITER(hh, ctx->tapeDdt, entry, tmp)
if(entry->key < ctx->userDataDdtHeader.blocks) ctx->userDataDdtBig[entry->key] = entry->value;
// Do not repeat code
return write_single_level_ddt(ctx);
}
/** /**
* @brief Finalize any active checksum calculations and append a checksum block. * @brief Finalize any active checksum calculations and append a checksum block.
* *

View File

@@ -32,28 +32,119 @@
* Allocates and initializes a new aaruformat context and image file with the specified parameters. * Allocates and initializes a new aaruformat context and image file with the specified parameters.
* This function sets up all necessary data structures including headers, DDT (deduplication table), * This function sets up all necessary data structures including headers, DDT (deduplication table),
* caches, and index entries for writing a new AaruFormat image. It also handles file creation, * caches, and index entries for writing a new AaruFormat image. It also handles file creation,
* memory allocation, and proper initialization of the writing context. * memory allocation, and proper initialization of the writing context. The function supports both
* block-based media (disks, optical media) and sequential tape media with different initialization
* strategies optimized for each media type.
* *
* @param filepath Path to the image file to create. * **Media Type Handling:**
* @param media_type Media type identifier. * The function creates different internal structures based on the `is_tape` parameter:
* @param sector_size Size of each sector in bytes. *
* @param user_sectors Number of user data sectors. * **Block Media (is_tape = false):**
* @param negative_sectors Number of negative sectors. * - Initializes full DDT (Deduplication Table) version 2 for sector-level deduplication
* @param overflow_sectors Number of overflow sectors. * - Allocates primary DDT table (userDataDdtMini or userDataDdtBig) as a preallocated array
* @param options String with creation options (parsed for alignment and shift parameters). * - Configures multi-level DDT support for large images (> 138,412,552 sectors)
* @param application_name Pointer to the application name string. * - Enables optional deduplication hash map for detecting duplicate sectors
* @param application_name_length Length of the application name string (must be ≤ AARU_HEADER_APP_NAME_LEN). * - Reserves space for DDT at the beginning of the file (after header, block-aligned)
* @param application_major_version Major version of the application. * - Data blocks start after DDT table to maintain sequential layout
* @param application_minor_version Minor version of the application. * - DDT size is fixed and known upfront based on sector count
*
* **Tape Media (is_tape = true):**
* - Initializes DDT for sector-level deduplication using a different strategy
* - Uses a growing hash table (tapeDdt) instead of a preallocated array
* - Sets ctx->is_tape flag and initializes ctx->tapeDdt to NULL (populated on first write)
* - Data blocks start immediately after the header (block-aligned)
* - Hash table grows dynamically as blocks are written
* - Optimized for sequential write patterns typical of tape media
* - Tape file/partition metadata is managed separately via additional hash tables
* - More memory-efficient for tapes with unknown final size
*
* **Initialization Flow:**
* 1. Parse creation options (compression, alignment, deduplication, checksums)
* 2. Allocate and zero-initialize context structure
* 3. Create/open image file in binary write mode
* 4. Initialize AaruFormat header with application and version information
* 5. Set up image metadata and sector size information
* 6. Initialize block and header caches for performance
* 7. Initialize ECC context for Compact Disc support
* 8. Branch based on media type:
* - Block media: Configure DDT structures and calculate offsets with preallocated array
* - Tape media: Set tape flags and initialize for dynamic hash table DDT
* 9. Initialize index entries array for tracking all blocks
* 10. Configure compression, checksums, and deduplication based on options
* 11. Position file pointer at calculated data start position
*
* **DDT Configuration (Block Media Only):**
* The function automatically selects optimal DDT parameters:
* - Single-level DDT (tableShift=0): For images < 138,412,552 sectors
* - Multi-level DDT (tableShift=22): For images ≥ 138,412,552 sectors
* - Small entries (16-bit): Default, supports most image sizes efficiently
* - Big entries (32-bit): Reserved for future use with very large images
*
* The DDT offset calculation ensures proper alignment:
* - Primary DDT placed immediately after header (block-aligned)
* - Data blocks positioned after DDT table (block-aligned)
* - Alignment controlled by blockAlignmentShift from options
*
* @param filepath Path to the image file to create. The file will be created if it doesn't exist,
* or overwritten if it does. Must be a valid writable path.
*
* @param media_type Media type identifier (e.g., CompactDisc, DVD, HardDisk, Tape formats).
* This affects how the image is structured and which features are enabled.
*
* @param sector_size Size of each sector/block in bytes. Common values:
* - 512 bytes: Hard disks, floppy disks
* - 2048 bytes: CD-ROM, DVD
* - Variable: Tape media (block size varies by format)
*
* @param user_sectors Number of user data sectors/blocks in the image. This is the main
* data area excluding negative (lead-in) and overflow (lead-out) regions.
* For tape media, this may be an estimate as the final size is often unknown.
*
* @param negative_sectors Number of negative sectors (typically lead-in area for optical media).
* Set to 0 for media types without lead-in areas. Not used for tape media.
*
* @param overflow_sectors Number of overflow sectors (typically lead-out area for optical media).
* Set to 0 for media types without lead-out areas. Not used for tape media.
*
* @param options String with creation options in key=value format, semicolon-separated.
* Supported options:
* - "compress=true|false": Enable/disable LZMA compression
* - "deduplicate=true|false": Enable/disable sector deduplication (all media types)
* - "md5=true|false": Calculate MD5 checksum during write
* - "sha1=true|false": Calculate SHA-1 checksum during write
* - "sha256=true|false": Calculate SHA-256 checksum during write
* - "spamsum=true|false": Calculate SpamSum fuzzy hash during write
* - "blake3=true|false": Calculate BLAKE3 checksum during write
* - "block_alignment=N": Block alignment shift value (default varies)
* - "data_shift=N": Data shift value for DDT granularity
* - "table_shift=N": Table shift for multi-level DDT (-1 for auto, block media only)
* - "dictionary=N": LZMA dictionary size in bytes
* Example: "compress=true;deduplicate=true;md5=true;sha1=true"
*
* @param application_name Pointer to the application name string (UTF-16LE raw bytes).
* This identifies the software that created the image.
*
* @param application_name_length Length of the application name string in bytes.
* Must be ≤ AARU_HEADER_APP_NAME_LEN (64 bytes).
*
* @param application_major_version Major version of the creating application (0-255).
*
* @param application_minor_version Minor version of the creating application (0-255).
*
* @param is_tape Boolean flag indicating tape media type:
* - true: Initialize for tape media (sequential, dynamic hash table DDT, file/partition metadata)
* - false: Initialize for block media (random access, preallocated array DDT)
* *
* @return Returns one of the following: * @return Returns one of the following:
* @retval aaruformatContext* Successfully created and initialized context. The returned pointer contains: * @retval aaruformatContext* Successfully created and initialized context. The returned pointer contains:
* - Properly initialized AaruFormat headers and metadata * - Properly initialized AaruFormat headers and metadata
* - Allocated and configured DDT structures for deduplication * - For block media: Allocated and configured DDT structures with preallocated arrays
* - For tape media: Tape flags set, DDT initialized as NULL (grows on demand)
* - Initialized block and header caches for performance * - Initialized block and header caches for performance
* - Open file stream ready for writing operations * - Open file stream ready for writing operations
* - Index entries array ready for block tracking * - Index entries array ready for block tracking
* - ECC context initialized for Compact Disc support * - ECC context initialized for Compact Disc support
* - Checksum contexts initialized based on options
* *
* @retval NULL Creation failed. The specific error can be determined by checking errno, which will be set to: * @retval NULL Creation failed. The specific error can be determined by checking errno, which will be set to:
* - AARUF_ERROR_NOT_ENOUGH_MEMORY (-9) when memory allocation fails for: * - AARUF_ERROR_NOT_ENOUGH_MEMORY (-9) when memory allocation fails for:
@@ -61,19 +152,19 @@
* * Readable sector tags array allocation * * Readable sector tags array allocation
* * Application version string allocation * * Application version string allocation
* * Image version string allocation * * Image version string allocation
* * DDT table allocation (userDataDdtMini or userDataDdtBig) * * DDT table allocation (userDataDdtMini or userDataDdtBig, block media only)
* * Index entries array allocation * * Index entries array allocation
* - AARUF_ERROR_CANNOT_CREATE_FILE (-19) when file operations fail: * - AARUF_ERROR_CANNOT_CREATE_FILE (-19) when file operations fail:
* * Unable to open the specified filepath for writing * * Unable to open the specified filepath for writing
* * File seek operations fail during initialization * * File seek operations fail during initialization
* * File system errors or permission issues * * File system errors or permission issues
* - AARUF_ERROR_INVALID_APP_NAME_LENGTH (-20) when: * - AARUF_ERROR_INVALID_APP_NAME_LENGTH (-20) when:
* * application_name_length exceeds AARU_HEADER_APP_NAME_LEN * * application_name_length exceeds AARU_HEADER_APP_NAME_LEN (64 bytes)
* *
* @note Memory Management: * @note Memory Management:
* - The function performs extensive memory allocation for various context structures * - The function performs extensive memory allocation for various context structures
* - On failure, all previously allocated memory is properly cleaned up * - On failure, all previously allocated memory is properly cleaned up
* - The returned context must be freed using appropriate cleanup functions * - The returned context must be freed using aaruf_close() when finished
* *
* @note File Operations: * @note File Operations:
* - Creates a new file at the specified path (overwrites existing files) * - Creates a new file at the specified path (overwrites existing files)
@@ -81,32 +172,63 @@
* - Positions the file pointer at the calculated data start position * - Positions the file pointer at the calculated data start position
* - File alignment is handled based on parsed options * - File alignment is handled based on parsed options
* *
* @note DDT Initialization: * @note DDT Initialization (Block Media Only):
* - Uses DDT version 2 format with configurable compression and alignment * - Uses DDT version 2 format with configurable compression and alignment
* - Supports both small (16-bit) and big (32-bit) DDT entry sizes * - Supports both small (16-bit) and big (32-bit) DDT entry sizes
* - Calculates optimal table sizes based on sector counts and shift parameters * - Calculates optimal table sizes based on sector counts and shift parameters
* - All DDT entries are initialized to zero (indicating unallocated sectors) * - All DDT entries are initialized to zero (indicating unallocated sectors)
* - Multi-level DDT is used for images with ≥ 138,412,552 total sectors
* - Single-level DDT is used for smaller images for efficiency
* - DDT is a fixed-size preallocated array written to file at known offset
*
* @note Tape Media Initialization:
* - Tape images use a dynamic hash table DDT for sector-level deduplication
* - File and partition metadata is managed via separate hash tables
* - ctx->is_tape is set to 1 to indicate tape mode throughout the library
* - ctx->tapeDdt is initialized to NULL and grows dynamically as blocks are written
* - Data blocks can start immediately after header for optimal sequential access
* - The hash table DDT allows for efficient deduplication without knowing final size
* - More memory-efficient for tapes with unpredictable or very large sizes
* - Deduplication hash map may still be used alongside tapeDdt if enabled in options
* *
* @note Options Parsing: * @note Options Parsing:
* - The options string is parsed to extract block_alignment, data_shift, and table_shift * - The options string is parsed to extract block_alignment, data_shift, and table_shift
* - These parameters affect memory usage, performance, and file organization * - These parameters affect memory usage, performance, and file organization
* - Invalid options may result in suboptimal performance but won't cause failure * - Invalid options may result in suboptimal performance but won't cause failure
* - Compression and checksums can be enabled independently via options
*
* @note Checksum Initialization:
* - MD5, SHA-1, SHA-256, SpamSum, and BLAKE3 can be calculated during write
* - Checksum contexts are initialized only if requested in options
* - Checksums are computed incrementally as sectors/blocks are written
* - Final checksums are stored in checksum block during image finalization
* *
* @warning The created context is in writing mode and expects proper finalization * @warning The created context is in writing mode and expects proper finalization
* before closing to ensure index and metadata are written correctly. * before closing to ensure index and metadata are written correctly.
* *
* @warning Application name length validation is strict - exceeding the limit will * @warning Application name length validation is strict - exceeding the limit will
* cause creation failure with AARUF_ERROR_INVALID_APP_NAME_LENGTH. * cause creation failure with AARUF_ERROR_INVALID_APP_NAME_LENGTH.
*
* @warning For tape media, the DDT structure is fundamentally different (hash table vs array).
* The is_tape flag must accurately reflect the media type being created.
*
* @warning The negative_sectors and overflow_sectors parameters are used only for
* block media. For tape media, these parameters are ignored.
*
* @see aaruf_close() for proper context cleanup and image finalization
* @see aaruf_write_sector() for writing sectors to block media images
* @see aaruf_set_tape_file() for defining tape file metadata
* @see aaruf_set_tape_partition() for defining tape partition metadata
*/ */
void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32_t sector_size, void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32_t sector_size,
const uint64_t user_sectors, const uint64_t negative_sectors, const uint64_t overflow_sectors, const uint64_t user_sectors, const uint64_t negative_sectors, const uint64_t overflow_sectors,
const char *options, const uint8_t *application_name, const uint8_t application_name_length, const char *options, const uint8_t *application_name, const uint8_t application_name_length,
const uint8_t application_major_version, const uint8_t application_minor_version) const uint8_t application_major_version, const uint8_t application_minor_version, const bool is_tape)
{ {
TRACE("Entering aaruf_create(%s, %u, %u, %llu, %llu, %llu, %s, %s, %u, %u, %u)", filepath, media_type, sector_size, TRACE("Entering aaruf_create(%s, %u, %u, %llu, %llu, %llu, %s, %s, %u, %u, %u, %d)", filepath, media_type,
user_sectors, negative_sectors, overflow_sectors, options, sector_size, user_sectors, negative_sectors, overflow_sectors, options,
application_name ? (const char *)application_name : "NULL", application_name_length, application_name ? (const char *)application_name : "NULL", application_name_length,
application_major_version, application_minor_version); application_major_version, application_minor_version, is_tape);
// Parse the options // Parse the options
TRACE("Parsing options"); TRACE("Parsing options");
@@ -216,71 +338,95 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32
ctx->libraryMajorVersion = LIBAARUFORMAT_MAJOR_VERSION; ctx->libraryMajorVersion = LIBAARUFORMAT_MAJOR_VERSION;
ctx->libraryMinorVersion = LIBAARUFORMAT_MINOR_VERSION; ctx->libraryMinorVersion = LIBAARUFORMAT_MINOR_VERSION;
// Initialize DDT2 if(!is_tape)
TRACE("Initializing DDT2"); { // Initialize DDT2
ctx->inMemoryDdt = true; TRACE("Initializing DDT2");
ctx->userDataDdtHeader.identifier = DeDuplicationTable2; ctx->inMemoryDdt = true;
ctx->userDataDdtHeader.type = UserData; ctx->userDataDdtHeader.identifier = DeDuplicationTable2;
ctx->userDataDdtHeader.compression = None; ctx->userDataDdtHeader.type = UserData;
ctx->userDataDdtHeader.levels = 2; ctx->userDataDdtHeader.compression = None;
ctx->userDataDdtHeader.tableLevel = 0; ctx->userDataDdtHeader.tableLevel = 0;
ctx->userDataDdtHeader.previousLevelOffset = 0; ctx->userDataDdtHeader.previousLevelOffset = 0;
ctx->userDataDdtHeader.negative = negative_sectors; ctx->userDataDdtHeader.negative = negative_sectors;
ctx->userDataDdtHeader.blocks = user_sectors + overflow_sectors + negative_sectors; ctx->userDataDdtHeader.blocks = user_sectors + overflow_sectors + negative_sectors;
ctx->userDataDdtHeader.overflow = overflow_sectors; ctx->userDataDdtHeader.overflow = overflow_sectors;
ctx->userDataDdtHeader.start = 0; ctx->userDataDdtHeader.start = 0;
ctx->userDataDdtHeader.blockAlignmentShift = parsed_options.block_alignment; ctx->userDataDdtHeader.blockAlignmentShift = parsed_options.block_alignment;
ctx->userDataDdtHeader.dataShift = parsed_options.data_shift; ctx->userDataDdtHeader.dataShift = parsed_options.data_shift;
ctx->userDataDdtHeader.sizeType = 1; ctx->userDataDdtHeader.sizeType = 1;
ctx->userDataDdtHeader.entries = ctx->userDataDdtHeader.blocks / (1 << ctx->userDataDdtHeader.tableShift); ctx->userDataDdtHeader.entries = ctx->userDataDdtHeader.blocks / (1 << ctx->userDataDdtHeader.tableShift);
if(parsed_options.table_shift == -1) if(parsed_options.table_shift == -1)
{ {
uint64_t total_sectors = user_sectors + overflow_sectors + negative_sectors; const uint64_t total_sectors = user_sectors + overflow_sectors + negative_sectors;
if(total_sectors < 0x8388608ULL) if(total_sectors < 0x8388608ULL)
ctx->userDataDdtHeader.tableShift = 0; {
ctx->userDataDdtHeader.levels = 1;
ctx->userDataDdtHeader.tableShift = 0;
}
else
{
ctx->userDataDdtHeader.levels = 2;
ctx->userDataDdtHeader.tableShift = 22;
}
}
else else
ctx->userDataDdtHeader.tableShift = 22; {
ctx->userDataDdtHeader.levels = parsed_options.table_shift > 0 ? 2 : 1;
ctx->userDataDdtHeader.tableShift = parsed_options.table_shift;
}
if(ctx->userDataDdtHeader.blocks % (1 << ctx->userDataDdtHeader.tableShift) != 0)
ctx->userDataDdtHeader.entries++;
TRACE("Initializing primary/single DDT");
if(ctx->userDataDdtHeader.sizeType == SmallDdtSizeType)
ctx->userDataDdtMini =
(uint16_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint16_t)); // All entries to zero
else if(ctx->userDataDdtHeader.sizeType == BigDdtSizeType)
ctx->userDataDdtBig =
(uint32_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint32_t)); // All entries to zero
// Set the primary DDT offset (just after the header, block aligned)
ctx->primaryDdtOffset = sizeof(AaruHeaderV2); // Start just after the header
const uint64_t alignmentMask = (1ULL << ctx->userDataDdtHeader.blockAlignmentShift) - 1;
ctx->primaryDdtOffset = ctx->primaryDdtOffset + alignmentMask & ~alignmentMask;
TRACE("Primary DDT will be placed at offset %" PRIu64, ctx->primaryDdtOffset);
// Calculate size of primary DDT table
const uint64_t primaryTableSize = ctx->userDataDdtHeader.sizeType == SmallDdtSizeType
? ctx->userDataDdtHeader.entries * sizeof(uint16_t)
: ctx->userDataDdtHeader.entries * sizeof(uint32_t);
// Calculate where data blocks can start (after primary DDT + header)
if(ctx->userDataDdtHeader.tableShift > 0)
{
const uint64_t dataStartPosition = ctx->primaryDdtOffset + sizeof(DdtHeader2) + primaryTableSize;
ctx->nextBlockPosition = dataStartPosition + alignmentMask & ~alignmentMask;
}
else
ctx->nextBlockPosition = ctx->primaryDdtOffset; // Single-level DDT can start anywhere
} }
else else
ctx->userDataDdtHeader.tableShift = parsed_options.table_shift;
if(ctx->userDataDdtHeader.blocks % (1 << ctx->userDataDdtHeader.tableShift) != 0) ctx->userDataDdtHeader.entries++;
TRACE("Initializing primary/single DDT");
if(ctx->userDataDdtHeader.sizeType == SmallDdtSizeType)
ctx->userDataDdtMini =
(uint16_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint16_t)); // All entries to zero
else if(ctx->userDataDdtHeader.sizeType == BigDdtSizeType)
ctx->userDataDdtBig =
(uint32_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint32_t)); // All entries to zero
// Set the primary DDT offset (just after the header, block aligned)
ctx->primaryDdtOffset = sizeof(AaruHeaderV2); // Start just after the header
const uint64_t alignmentMask = (1ULL << ctx->userDataDdtHeader.blockAlignmentShift) - 1;
ctx->primaryDdtOffset = ctx->primaryDdtOffset + alignmentMask & ~alignmentMask;
TRACE("Primary DDT will be placed at offset %" PRIu64, ctx->primaryDdtOffset);
// Calculate size of primary DDT table
const uint64_t primaryTableSize = ctx->userDataDdtHeader.sizeType == SmallDdtSizeType
? ctx->userDataDdtHeader.entries * sizeof(uint16_t)
: ctx->userDataDdtHeader.entries * sizeof(uint32_t);
// Calculate where data blocks can start (after primary DDT + header)
if(ctx->userDataDdtHeader.tableShift > 0)
{ {
const uint64_t dataStartPosition = ctx->primaryDdtOffset + sizeof(DdtHeader2) + primaryTableSize; // Fill needed values
ctx->nextBlockPosition = dataStartPosition + alignmentMask & ~alignmentMask; ctx->userDataDdtHeader.blockAlignmentShift = parsed_options.block_alignment;
ctx->userDataDdtHeader.dataShift = parsed_options.data_shift;
// Calculate aligned next block position
const uint64_t alignmentMask = (1ULL << parsed_options.block_alignment) - 1;
ctx->nextBlockPosition = sizeof(AaruHeaderV2); // Start just after the header
ctx->nextBlockPosition = ctx->nextBlockPosition + alignmentMask & ~alignmentMask;
ctx->is_tape = 1;
ctx->tapeDdt = NULL;
} }
else
ctx->nextBlockPosition = ctx->primaryDdtOffset; // Single-level DDT can start anywhere
TRACE("Data blocks will start at position %" PRIu64, ctx->nextBlockPosition); TRACE("Data blocks will start at position %" PRIu64, ctx->nextBlockPosition);
// Position file pointer at the data start position // Position file pointer at the data start position
if(fseek(ctx->imageStream, (long)ctx->nextBlockPosition, SEEK_SET) != 0) if(fseek(ctx->imageStream, ctx->nextBlockPosition, SEEK_SET) != 0)
{ {
FATAL("Could not seek to data start position"); FATAL("Could not seek to data start position");
free(ctx->readableSectorTags); free(ctx->readableSectorTags);

View File

@@ -1796,3 +1796,180 @@ bool set_ddt_multi_level_v2(aaruformatContext *ctx, uint64_t sector_address, boo
TRACE("Exiting set_ddt_multi_level_v2() = true"); TRACE("Exiting set_ddt_multi_level_v2() = true");
return true; return true;
} }
/**
* @brief Sets a DDT entry for tape media using a hash-based lookup table.
*
* This function is specifically designed for tape media images where sectors are accessed
* non-sequentially and the traditional DDT array structure is inefficient. Instead of using
* a large contiguous array, it uses a hash table (UTHASH) to store only the sectors that
* have been written, providing sparse storage for tape media.
*
* The function performs the following operations:
* 1. Validates the context and verifies it's a tape image
* 2. Constructs a DDT entry encoding offset, block alignment, and sector status
* 3. Creates a hash table entry with the sector address as the key
* 4. Inserts or replaces the entry in the tape DDT hash table
*
* **DDT Entry Format:**
* The DDT entry is a 64-bit value with the following bit layout:
* ```
* Bits 0-(dataShift-1): Sector offset within block (masked by dataShift)
* Bits dataShift-27: Block index (block_offset >> blockAlignmentShift)
* Bits 28-31: Sector status (4 bits for status flags)
* Bits 32-63: Unused (reserved for future use)
* ```
*
* **Hash Table Management:**
* Uses HASH_REPLACE macro from UTHASH library which:
* - Adds new entries if the key (sector_address) doesn't exist
* - Replaces existing entries if the key is found (automatically frees old entry)
* - Maintains O(1) average lookup time for sector address resolution
*
* **Overflow Detection:**
* The function checks if the constructed DDT entry exceeds 28 bits (0xFFFFFFF).
* This limit ensures the sector status can fit in the upper 4 bits while leaving
* room for future extensions in the upper 32 bits.
*
* @param ctx Pointer to the aaruformat context. Must not be NULL.
* The context must have a valid imageStream and is_tape must be true.
* The ctx->tapeDdt hash table will be updated with the new entry.
* The ctx->userDataDdtHeader contains alignment and shift parameters.
*
* @param sector_address Logical sector address on the tape to set. This serves as
* the unique key in the hash table. Multiple calls with the
* same sector_address will replace the previous entry.
*
* @param offset Byte offset within the aligned block where the sector data begins.
* This value is masked by (1 << dataShift) - 1 to extract only the
* lower bits representing the offset within the block.
*
* @param block_offset Absolute byte offset in the image file where the data block starts.
* This is right-shifted by blockAlignmentShift to get the block index,
* which is stored in the DDT entry's middle bits.
*
* @param sector_status Status flags for the sector (4 bits). Common values include:
* - 0x0 (SectorStatusNotDumped): Sector not yet acquired during image dumping
* - 0x1 (SectorStatusDumped): Sector successfully dumped without error
* - 0x2 (SectorStatusErrored): Error during dumping; data may be incomplete or corrupt
* - 0x3 (SectorStatusMode1Correct): Valid MODE 1 data with regenerable suffix/prefix
* - 0x4 (SectorStatusMode2Form1Ok): Suffix verified/regenerable for MODE 2 Form 1
* - 0x5 (SectorStatusMode2Form2Ok): Suffix matches MODE 2 Form 2 with valid CRC
* - 0x6 (SectorStatusMode2Form2NoCrc): Suffix matches MODE 2 Form 2 but CRC empty/missing
* - 0x7 (SectorStatusTwin): Pointer references a twin sector table
* - 0x8 (SectorStatusUnrecorded): Sector physically unrecorded; repeated reads non-deterministic
* - 0x9 (SectorStatusEncrypted): Content encrypted and stored encrypted in image
* - 0xA (SectorStatusUnencrypted): Content originally encrypted but stored decrypted in image
* See SectorStatus enum for complete list of defined values
*
* @param ddt_entry Pointer to a 64-bit value that will receive the constructed DDT entry.
* - If *ddt_entry is 0: A new entry is constructed from the provided parameters
* - If *ddt_entry is non-zero: The existing value is used directly
* The constructed or provided value is stored in the hash table.
*
* @return Returns one of the following status codes:
* @retval true Successfully created and inserted the DDT entry. This occurs when:
* - The context and image stream are valid
* - The image is confirmed to be a tape image (is_tape == true)
* - The DDT entry fits within the 28-bit limit (< 0xFFFFFFF)
* - Memory allocation for the hash entry succeeds
* - The entry is successfully inserted or replaced in the hash table
*
* @retval false Failed to set the DDT entry. This can happen when:
* - ctx is NULL or ctx->imageStream is NULL (invalid context)
* - ctx->is_tape is false (wrong function called for non-tape media)
* - The DDT entry exceeds 0xFFFFFFF (media too large for big DDT)
* - Memory allocation fails for the new hash table entry (out of memory)
*
* @note This function is only for tape images. For disk images, use set_ddt_single_level_v2()
* or set_ddt_multi_level_v2() instead, which use array-based DDT structures.
*
* @note Memory Management:
* - Allocates a new TapeDdtHashEntry for each sector
* - HASH_REPLACE automatically frees replaced entries
* - All hash entries remain in context until cleanup
* - The tapeDdt hash table must be freed during context destruction
*
* @note Tape Media Characteristics:
* - Tape sectors are typically accessed sequentially during streaming
* - File marks and partition boundaries create sparse address spaces
* - Hash table provides efficient storage for sparse sector maps
* - Supports variable block sizes common in tape formats
*
* @note Error Handling:
* - All errors are logged with FATAL level messages
* - Function returns false immediately on any error condition
* - TRACE logging marks entry/exit points for debugging
* - No partial state changes occur on failure
*
* @warning The DDT entry overflow check at 0xFFFFFFF (28 bits) is critical. Exceeding
* this limit indicates the media is too large to fit in the current DDT format,
* and continuing would cause data corruption.
*
* @warning This function modifies the shared tapeDdt hash table. In multi-threaded
* environments, external synchronization is required to prevent race conditions.
*
* @see TapeDdtHashEntry for the hash table entry structure
* @see set_ddt_entry_v2() for the main DDT entry point that dispatches to this function
* @see get_ddt_tape() for retrieving tape DDT entries from the hash table
*/
bool set_ddt_tape(aaruformatContext *ctx, uint64_t sector_address, const uint64_t offset, const uint64_t block_offset,
const uint8_t sector_status, uint64_t *ddt_entry)
{
TRACE("Entering set_ddt_tape(%p, %" PRIu64 ", %llu, %llu, %d)", ctx, sector_address, offset, block_offset,
sector_status);
// Check if the context and image stream are valid
if(ctx == NULL || ctx->imageStream == NULL)
{
FATAL("Invalid context or image stream.");
TRACE("Exiting set_ddt_tape() = false");
return false;
}
// Should not really be here
if(!ctx->is_tape)
{
FATAL("Image is not tape, wrong function called.");
TRACE("Exiting set_ddt_tape() = false");
return false;
}
if(*ddt_entry == 0)
{
const uint64_t block_index = block_offset >> ctx->userDataDdtHeader.blockAlignmentShift;
*ddt_entry = offset & (1ULL << ctx->userDataDdtHeader.dataShift) - 1 | block_index
<< ctx->userDataDdtHeader.dataShift;
// Overflow detection for DDT entry
if(*ddt_entry > 0xFFFFFFF)
{
FATAL("DDT overflow: media does not fit in big DDT");
TRACE("Exiting set_ddt_tape() = false");
return false;
}
*ddt_entry |= (uint64_t)sector_status << 28;
}
// Create DDT hash entry
TapeDdtHashEntry *new_entry = calloc(1, sizeof(TapeDdtHashEntry));
TapeDdtHashEntry *old_entry = NULL;
if(new_entry == NULL)
{
FATAL("Cannot allocate memory for new tape DDT hash entry.");
TRACE("Exiting set_ddt_tape() = false");
return false;
}
TRACE("Setting tape DDT entry %d to %u", sector_address, (uint32_t)*ddt_entry);
new_entry->key = sector_address;
new_entry->value = *ddt_entry;
// Insert entry into tape DDT
HASH_REPLACE(hh, ctx->tapeDdt, key, sizeof(uint64_t), new_entry, old_entry);
if(old_entry) free(old_entry);
TRACE("Exiting set_ddt_tape() = true");
return true;
}