diff --git a/include/aaruformat/context.h b/include/aaruformat/context.h index b1856de..fe65814 100644 --- a/include/aaruformat/context.h +++ b/include/aaruformat/context.h @@ -137,6 +137,13 @@ typedef struct TapePartitionHashEntry UT_hash_handle hh; ///< UTHASH handle } TapePartitionHashEntry; +typedef struct TapeDdtHashEntry +{ + uint64_t key; ///< Key: sector address + uint32_t value; ///< Value: DDT entry + UT_hash_handle hh; ///< UTHASH handle +} TapeDdtHashEntry; + /** \struct aaruformatContext * \brief Master context representing an open or in‑creation Aaru image. * @@ -265,6 +272,8 @@ typedef struct aaruformatContext tapeFileHashEntry *tapeFiles; ///< Hash table root for tape files TapePartitionHashEntry *tapePartitions; ///< Hash table root for tape partitions + bool is_tape; ///< True if the image is a tape image + TapeDdtHashEntry *tapeDdt; ///< Hash table root for tape DDT entries } aaruformatContext; /** \struct DumpHardwareEntriesWithData diff --git a/include/aaruformat/decls.h b/include/aaruformat/decls.h index 9dc6242..019485c 100644 --- a/include/aaruformat/decls.h +++ b/include/aaruformat/decls.h @@ -71,7 +71,7 @@ AARU_EXPORT void *AARU_CALL aaruf_create(const char *filepath, uint32_t media_ty uint64_t user_sectors, uint64_t negative_sectors, uint64_t overflow_sectors, const char *options, const uint8_t *application_name, uint8_t application_name_length, uint8_t application_major_version, - uint8_t application_minor_version); + uint8_t application_minor_version, bool is_tape); AARU_EXPORT int AARU_CALL aaruf_close(void *context); diff --git a/include/internal.h b/include/internal.h index 8b6745e..20acea3 100644 --- a/include/internal.h +++ b/include/internal.h @@ -54,6 +54,8 @@ bool set_ddt_single_level_v2(aaruformatContext *ctx, uint64_t sector_addres uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry); bool set_ddt_multi_level_v2(aaruformatContext *ctx, uint64_t sector_address, bool negative, uint64_t offset, uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry); +bool set_ddt_tape(aaruformatContext *ctx, uint64_t sector_address, uint64_t offset, uint64_t block_offset, + uint8_t sector_status, uint64_t *ddt_entry); aaru_options parse_options(const char *options); uint64_t get_filetime_uint64(); int32_t aaruf_close_current_block(aaruformatContext *ctx); diff --git a/src/close.c b/src/close.c index 6e80d7d..824e609 100644 --- a/src/close.c +++ b/src/close.c @@ -536,6 +536,156 @@ static int32_t write_single_level_ddt(aaruformatContext *ctx) return AARUF_STATUS_OK; } +/** + * @brief Converts tape DDT hash table to array format and writes it as a single-level DDT. + * + * This function is specifically designed for tape media images where sectors have been tracked + * using a sparse hash table (UTHASH) during write operations. It converts the hash-based tape + * DDT into a traditional array-based DDT structure suitable for serialization to disk. The + * function performs a complete transformation from the sparse hash representation to a dense + * array representation, then delegates the actual write operation to write_single_level_ddt(). + * + * The conversion process involves: + * 1. Validating the context is for tape media + * 2. Scanning the hash table to determine the maximum sector address (key) + * 3. Allocating a contiguous array large enough to hold all entries up to max_key + * 4. Populating the array by copying hash table entries to their corresponding indices + * 5. Initializing a DDT v2 header with appropriate metadata + * 6. Calling write_single_level_ddt() to serialize the DDT to disk + * + * **Hash Table to Array Conversion:** + * The tape DDT hash table uses sector addresses as keys and DDT entries as values. This function + * creates a zero-initialized array of size (max_key + 1) and copies each hash entry to + * array[entry->key] = entry->value. Sectors not present in the hash table remain as zero entries + * in the array, which indicates SectorStatusNotDumped in the DDT format. + * + * **Memory Allocation:** + * The function always uses BigDdtSizeType (32-bit entries) for tape DDTs, allocating + * (max_key + 1) * sizeof(uint32_t) bytes. This provides sufficient capacity for the 28-bit + * data + 4-bit status encoding used in tape DDT entries. + * + * **DDT Header Configuration:** + * The userDataDdtHeader is configured for a single-level DDT v2 structure: + * - identifier: DeDuplicationTable2 + * - type: UserData + * - compression: Determined by ctx->compression_enabled (Lzma or None) + * - levels: 1 (single-level structure) + * - tableLevel: 0 (top-level table) + * - tableShift: 0 (no multi-level indirection) + * - sizeType: BigDdtSizeType (32-bit entries) + * - entries/blocks: max_key + 1 + * - negative/overflow: 0 (not used for tape) + * + * @param ctx Pointer to the aaruformat context. Must not be NULL and must be in write mode. + * The context must have is_tape set to true and tapeDdt hash table populated. + * The ctx->userDataDdtBig array will be allocated and populated by this function. + * The ctx->userDataDdtHeader will be initialized with DDT metadata. + * + * @return Returns one of the following status codes: + * @retval AARUF_STATUS_OK (0) Successfully converted and wrote the tape DDT. This occurs when: + * - The context is valid and is_tape is true + * - Memory allocation for the DDT array succeeds + * - The hash table entries are successfully copied to the array + * - write_single_level_ddt() completes successfully + * - The DDT is written to disk and indexed + * + * @retval AARUF_STATUS_INVALID_CONTEXT (-2) The context is not for tape media. This occurs when: + * - ctx->is_tape is false + * - This function was called for a disk/optical image instead of tape + * + * @retval AARUF_ERROR_NOT_ENOUGH_MEMORY (-6) Memory allocation failed. This occurs when: + * - calloc() fails to allocate the userDataDdtBig array + * - Insufficient system memory for (max_key + 1) * 4 bytes + * + * @retval AARUF_ERROR_CANNOT_WRITE_HEADER (-8) Writing the DDT failed. This can occur when: + * - write_single_level_ddt() fails to write the DDT header + * - File I/O errors prevent writing the DDT data + * - Disk full or other storage errors + * - This error is propagated from write_single_level_ddt() + * + * @note This function is only called during image finalization (aaruf_close) for tape images. + * It should not be called for disk or optical media images. + * + * @note Hash Table Iteration: + * - Uses HASH_ITER macro from UTHASH to safely traverse all entries + * - Finds maximum key in first pass to determine array size + * - Copies entries in second pass to populate the array + * - Empty (zero) array slots represent sectors not written to tape + * + * @note Memory Ownership: + * - Allocates ctx->userDataDdtBig which becomes owned by the context + * - The allocated array is freed during context cleanup (not in this function) + * - The original hash table (ctx->tapeDdt) is freed separately during cleanup + * + * @note Single-Level DDT Choice: + * - Tape DDTs always use single-level structure (tableShift = 0) + * - Multi-level DDTs are not used because tape access patterns are typically sparse + * - The hash table already provides efficient sparse storage during write + * - Conversion to dense array only happens once at close time + * + * @note Compression: + * - The actual compression is handled by write_single_level_ddt() + * - Compression type is determined by ctx->compression_enabled flag + * - If enabled, LZMA compression is applied to the DDT array + * - Compression may be disabled if it doesn't reduce size + * + * @warning The function assumes tapeDdt hash table is properly populated. An empty hash table + * will result in a DDT with a single zero entry (max_key = 0, entries = 1). + * + * @warning This function modifies ctx->userDataDdtHeader and ctx->userDataDdtBig. These must + * not be modified by other code during the close operation. + * + * @warning The allocated array size is (max_key + 1), which could be very large if tape sectors + * have high addresses with sparse distribution. Memory usage should be considered. + * + * @see set_ddt_tape() for how entries are added to the hash table during write operations + * @see write_single_level_ddt() for the actual DDT serialization logic + * @see TapeDdtHashEntry for the hash table entry structure + * @internal + */ +static int32_t write_tape_ddt(aaruformatContext *ctx) +{ + if(!ctx->is_tape) return AARUF_STATUS_INVALID_CONTEXT; + + // Traverse the tape DDT uthash and find the biggest key + uint64_t max_key = 0; + TapeDdtHashEntry *entry, *tmp; + HASH_ITER(hh, ctx->tapeDdt, entry, tmp) + if(entry->key > max_key) max_key = entry->key; + + // Initialize context user data DDT header + ctx->userDataDdtHeader.identifier = DeDuplicationTable2; + ctx->userDataDdtHeader.type = UserData; + ctx->userDataDdtHeader.compression = ctx->compression_enabled ? Lzma : None; + ctx->userDataDdtHeader.levels = 1; // Single level + ctx->userDataDdtHeader.tableLevel = 0; // Top level + ctx->userDataDdtHeader.previousLevelOffset = 0; // No previous level for single-level DDT + ctx->userDataDdtHeader.negative = 0; + ctx->userDataDdtHeader.overflow = 0; + ctx->userDataDdtHeader.tableShift = 0; // Single level + ctx->userDataDdtHeader.sizeType = BigDdtSizeType; + ctx->userDataDdtHeader.entries = max_key + 1; + ctx->userDataDdtHeader.blocks = max_key + 1; + ctx->userDataDdtHeader.start = 0; + ctx->userDataDdtHeader.length = ctx->userDataDdtHeader.entries * sizeof(uint32_t); + ctx->userDataDdtHeader.cmpLength = ctx->userDataDdtHeader.length; + + // Initialize memory for user data DDT + ctx->userDataDdtBig = calloc(ctx->userDataDdtHeader.entries, sizeof(uint32_t)); + if(ctx->userDataDdtBig == NULL) + { + TRACE("Failed to allocate memory for tape DDT table"); + return AARUF_ERROR_NOT_ENOUGH_MEMORY; + } + + // Populate user data DDT from tape DDT uthash + HASH_ITER(hh, ctx->tapeDdt, entry, tmp) + if(entry->key < ctx->userDataDdtHeader.blocks) ctx->userDataDdtBig[entry->key] = entry->value; + + // Do not repeat code + return write_single_level_ddt(ctx); +} + /** * @brief Finalize any active checksum calculations and append a checksum block. * diff --git a/src/create.c b/src/create.c index 5a31dbe..a3a70b2 100644 --- a/src/create.c +++ b/src/create.c @@ -32,28 +32,119 @@ * Allocates and initializes a new aaruformat context and image file with the specified parameters. * This function sets up all necessary data structures including headers, DDT (deduplication table), * caches, and index entries for writing a new AaruFormat image. It also handles file creation, - * memory allocation, and proper initialization of the writing context. + * memory allocation, and proper initialization of the writing context. The function supports both + * block-based media (disks, optical media) and sequential tape media with different initialization + * strategies optimized for each media type. * - * @param filepath Path to the image file to create. - * @param media_type Media type identifier. - * @param sector_size Size of each sector in bytes. - * @param user_sectors Number of user data sectors. - * @param negative_sectors Number of negative sectors. - * @param overflow_sectors Number of overflow sectors. - * @param options String with creation options (parsed for alignment and shift parameters). - * @param application_name Pointer to the application name string. - * @param application_name_length Length of the application name string (must be ≤ AARU_HEADER_APP_NAME_LEN). - * @param application_major_version Major version of the application. - * @param application_minor_version Minor version of the application. + * **Media Type Handling:** + * The function creates different internal structures based on the `is_tape` parameter: + * + * **Block Media (is_tape = false):** + * - Initializes full DDT (Deduplication Table) version 2 for sector-level deduplication + * - Allocates primary DDT table (userDataDdtMini or userDataDdtBig) as a preallocated array + * - Configures multi-level DDT support for large images (> 138,412,552 sectors) + * - Enables optional deduplication hash map for detecting duplicate sectors + * - Reserves space for DDT at the beginning of the file (after header, block-aligned) + * - Data blocks start after DDT table to maintain sequential layout + * - DDT size is fixed and known upfront based on sector count + * + * **Tape Media (is_tape = true):** + * - Initializes DDT for sector-level deduplication using a different strategy + * - Uses a growing hash table (tapeDdt) instead of a preallocated array + * - Sets ctx->is_tape flag and initializes ctx->tapeDdt to NULL (populated on first write) + * - Data blocks start immediately after the header (block-aligned) + * - Hash table grows dynamically as blocks are written + * - Optimized for sequential write patterns typical of tape media + * - Tape file/partition metadata is managed separately via additional hash tables + * - More memory-efficient for tapes with unknown final size + * + * **Initialization Flow:** + * 1. Parse creation options (compression, alignment, deduplication, checksums) + * 2. Allocate and zero-initialize context structure + * 3. Create/open image file in binary write mode + * 4. Initialize AaruFormat header with application and version information + * 5. Set up image metadata and sector size information + * 6. Initialize block and header caches for performance + * 7. Initialize ECC context for Compact Disc support + * 8. Branch based on media type: + * - Block media: Configure DDT structures and calculate offsets with preallocated array + * - Tape media: Set tape flags and initialize for dynamic hash table DDT + * 9. Initialize index entries array for tracking all blocks + * 10. Configure compression, checksums, and deduplication based on options + * 11. Position file pointer at calculated data start position + * + * **DDT Configuration (Block Media Only):** + * The function automatically selects optimal DDT parameters: + * - Single-level DDT (tableShift=0): For images < 138,412,552 sectors + * - Multi-level DDT (tableShift=22): For images ≥ 138,412,552 sectors + * - Small entries (16-bit): Default, supports most image sizes efficiently + * - Big entries (32-bit): Reserved for future use with very large images + * + * The DDT offset calculation ensures proper alignment: + * - Primary DDT placed immediately after header (block-aligned) + * - Data blocks positioned after DDT table (block-aligned) + * - Alignment controlled by blockAlignmentShift from options + * + * @param filepath Path to the image file to create. The file will be created if it doesn't exist, + * or overwritten if it does. Must be a valid writable path. + * + * @param media_type Media type identifier (e.g., CompactDisc, DVD, HardDisk, Tape formats). + * This affects how the image is structured and which features are enabled. + * + * @param sector_size Size of each sector/block in bytes. Common values: + * - 512 bytes: Hard disks, floppy disks + * - 2048 bytes: CD-ROM, DVD + * - Variable: Tape media (block size varies by format) + * + * @param user_sectors Number of user data sectors/blocks in the image. This is the main + * data area excluding negative (lead-in) and overflow (lead-out) regions. + * For tape media, this may be an estimate as the final size is often unknown. + * + * @param negative_sectors Number of negative sectors (typically lead-in area for optical media). + * Set to 0 for media types without lead-in areas. Not used for tape media. + * + * @param overflow_sectors Number of overflow sectors (typically lead-out area for optical media). + * Set to 0 for media types without lead-out areas. Not used for tape media. + * + * @param options String with creation options in key=value format, semicolon-separated. + * Supported options: + * - "compress=true|false": Enable/disable LZMA compression + * - "deduplicate=true|false": Enable/disable sector deduplication (all media types) + * - "md5=true|false": Calculate MD5 checksum during write + * - "sha1=true|false": Calculate SHA-1 checksum during write + * - "sha256=true|false": Calculate SHA-256 checksum during write + * - "spamsum=true|false": Calculate SpamSum fuzzy hash during write + * - "blake3=true|false": Calculate BLAKE3 checksum during write + * - "block_alignment=N": Block alignment shift value (default varies) + * - "data_shift=N": Data shift value for DDT granularity + * - "table_shift=N": Table shift for multi-level DDT (-1 for auto, block media only) + * - "dictionary=N": LZMA dictionary size in bytes + * Example: "compress=true;deduplicate=true;md5=true;sha1=true" + * + * @param application_name Pointer to the application name string (UTF-16LE raw bytes). + * This identifies the software that created the image. + * + * @param application_name_length Length of the application name string in bytes. + * Must be ≤ AARU_HEADER_APP_NAME_LEN (64 bytes). + * + * @param application_major_version Major version of the creating application (0-255). + * + * @param application_minor_version Minor version of the creating application (0-255). + * + * @param is_tape Boolean flag indicating tape media type: + * - true: Initialize for tape media (sequential, dynamic hash table DDT, file/partition metadata) + * - false: Initialize for block media (random access, preallocated array DDT) * * @return Returns one of the following: * @retval aaruformatContext* Successfully created and initialized context. The returned pointer contains: * - Properly initialized AaruFormat headers and metadata - * - Allocated and configured DDT structures for deduplication + * - For block media: Allocated and configured DDT structures with preallocated arrays + * - For tape media: Tape flags set, DDT initialized as NULL (grows on demand) * - Initialized block and header caches for performance * - Open file stream ready for writing operations * - Index entries array ready for block tracking * - ECC context initialized for Compact Disc support + * - Checksum contexts initialized based on options * * @retval NULL Creation failed. The specific error can be determined by checking errno, which will be set to: * - AARUF_ERROR_NOT_ENOUGH_MEMORY (-9) when memory allocation fails for: @@ -61,19 +152,19 @@ * * Readable sector tags array allocation * * Application version string allocation * * Image version string allocation - * * DDT table allocation (userDataDdtMini or userDataDdtBig) + * * DDT table allocation (userDataDdtMini or userDataDdtBig, block media only) * * Index entries array allocation * - AARUF_ERROR_CANNOT_CREATE_FILE (-19) when file operations fail: * * Unable to open the specified filepath for writing * * File seek operations fail during initialization * * File system errors or permission issues * - AARUF_ERROR_INVALID_APP_NAME_LENGTH (-20) when: - * * application_name_length exceeds AARU_HEADER_APP_NAME_LEN + * * application_name_length exceeds AARU_HEADER_APP_NAME_LEN (64 bytes) * * @note Memory Management: * - The function performs extensive memory allocation for various context structures * - On failure, all previously allocated memory is properly cleaned up - * - The returned context must be freed using appropriate cleanup functions + * - The returned context must be freed using aaruf_close() when finished * * @note File Operations: * - Creates a new file at the specified path (overwrites existing files) @@ -81,32 +172,63 @@ * - Positions the file pointer at the calculated data start position * - File alignment is handled based on parsed options * - * @note DDT Initialization: + * @note DDT Initialization (Block Media Only): * - Uses DDT version 2 format with configurable compression and alignment * - Supports both small (16-bit) and big (32-bit) DDT entry sizes * - Calculates optimal table sizes based on sector counts and shift parameters * - All DDT entries are initialized to zero (indicating unallocated sectors) + * - Multi-level DDT is used for images with ≥ 138,412,552 total sectors + * - Single-level DDT is used for smaller images for efficiency + * - DDT is a fixed-size preallocated array written to file at known offset + * + * @note Tape Media Initialization: + * - Tape images use a dynamic hash table DDT for sector-level deduplication + * - File and partition metadata is managed via separate hash tables + * - ctx->is_tape is set to 1 to indicate tape mode throughout the library + * - ctx->tapeDdt is initialized to NULL and grows dynamically as blocks are written + * - Data blocks can start immediately after header for optimal sequential access + * - The hash table DDT allows for efficient deduplication without knowing final size + * - More memory-efficient for tapes with unpredictable or very large sizes + * - Deduplication hash map may still be used alongside tapeDdt if enabled in options * * @note Options Parsing: * - The options string is parsed to extract block_alignment, data_shift, and table_shift * - These parameters affect memory usage, performance, and file organization * - Invalid options may result in suboptimal performance but won't cause failure + * - Compression and checksums can be enabled independently via options + * + * @note Checksum Initialization: + * - MD5, SHA-1, SHA-256, SpamSum, and BLAKE3 can be calculated during write + * - Checksum contexts are initialized only if requested in options + * - Checksums are computed incrementally as sectors/blocks are written + * - Final checksums are stored in checksum block during image finalization * * @warning The created context is in writing mode and expects proper finalization * before closing to ensure index and metadata are written correctly. * * @warning Application name length validation is strict - exceeding the limit will * cause creation failure with AARUF_ERROR_INVALID_APP_NAME_LENGTH. + * + * @warning For tape media, the DDT structure is fundamentally different (hash table vs array). + * The is_tape flag must accurately reflect the media type being created. + * + * @warning The negative_sectors and overflow_sectors parameters are used only for + * block media. For tape media, these parameters are ignored. + * + * @see aaruf_close() for proper context cleanup and image finalization + * @see aaruf_write_sector() for writing sectors to block media images + * @see aaruf_set_tape_file() for defining tape file metadata + * @see aaruf_set_tape_partition() for defining tape partition metadata */ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32_t sector_size, const uint64_t user_sectors, const uint64_t negative_sectors, const uint64_t overflow_sectors, const char *options, const uint8_t *application_name, const uint8_t application_name_length, - const uint8_t application_major_version, const uint8_t application_minor_version) + const uint8_t application_major_version, const uint8_t application_minor_version, const bool is_tape) { - TRACE("Entering aaruf_create(%s, %u, %u, %llu, %llu, %llu, %s, %s, %u, %u, %u)", filepath, media_type, sector_size, - user_sectors, negative_sectors, overflow_sectors, options, + TRACE("Entering aaruf_create(%s, %u, %u, %llu, %llu, %llu, %s, %s, %u, %u, %u, %d)", filepath, media_type, + sector_size, user_sectors, negative_sectors, overflow_sectors, options, application_name ? (const char *)application_name : "NULL", application_name_length, - application_major_version, application_minor_version); + application_major_version, application_minor_version, is_tape); // Parse the options TRACE("Parsing options"); @@ -216,71 +338,95 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32 ctx->libraryMajorVersion = LIBAARUFORMAT_MAJOR_VERSION; ctx->libraryMinorVersion = LIBAARUFORMAT_MINOR_VERSION; - // Initialize DDT2 - TRACE("Initializing DDT2"); - ctx->inMemoryDdt = true; - ctx->userDataDdtHeader.identifier = DeDuplicationTable2; - ctx->userDataDdtHeader.type = UserData; - ctx->userDataDdtHeader.compression = None; - ctx->userDataDdtHeader.levels = 2; - ctx->userDataDdtHeader.tableLevel = 0; - ctx->userDataDdtHeader.previousLevelOffset = 0; - ctx->userDataDdtHeader.negative = negative_sectors; - ctx->userDataDdtHeader.blocks = user_sectors + overflow_sectors + negative_sectors; - ctx->userDataDdtHeader.overflow = overflow_sectors; - ctx->userDataDdtHeader.start = 0; - ctx->userDataDdtHeader.blockAlignmentShift = parsed_options.block_alignment; - ctx->userDataDdtHeader.dataShift = parsed_options.data_shift; - ctx->userDataDdtHeader.sizeType = 1; - ctx->userDataDdtHeader.entries = ctx->userDataDdtHeader.blocks / (1 << ctx->userDataDdtHeader.tableShift); + if(!is_tape) + { // Initialize DDT2 + TRACE("Initializing DDT2"); + ctx->inMemoryDdt = true; + ctx->userDataDdtHeader.identifier = DeDuplicationTable2; + ctx->userDataDdtHeader.type = UserData; + ctx->userDataDdtHeader.compression = None; + ctx->userDataDdtHeader.tableLevel = 0; + ctx->userDataDdtHeader.previousLevelOffset = 0; + ctx->userDataDdtHeader.negative = negative_sectors; + ctx->userDataDdtHeader.blocks = user_sectors + overflow_sectors + negative_sectors; + ctx->userDataDdtHeader.overflow = overflow_sectors; + ctx->userDataDdtHeader.start = 0; + ctx->userDataDdtHeader.blockAlignmentShift = parsed_options.block_alignment; + ctx->userDataDdtHeader.dataShift = parsed_options.data_shift; + ctx->userDataDdtHeader.sizeType = 1; + ctx->userDataDdtHeader.entries = ctx->userDataDdtHeader.blocks / (1 << ctx->userDataDdtHeader.tableShift); - if(parsed_options.table_shift == -1) - { - uint64_t total_sectors = user_sectors + overflow_sectors + negative_sectors; + if(parsed_options.table_shift == -1) + { + const uint64_t total_sectors = user_sectors + overflow_sectors + negative_sectors; - if(total_sectors < 0x8388608ULL) - ctx->userDataDdtHeader.tableShift = 0; + if(total_sectors < 0x8388608ULL) + { + ctx->userDataDdtHeader.levels = 1; + ctx->userDataDdtHeader.tableShift = 0; + } + else + { + ctx->userDataDdtHeader.levels = 2; + ctx->userDataDdtHeader.tableShift = 22; + } + } else - ctx->userDataDdtHeader.tableShift = 22; + { + ctx->userDataDdtHeader.levels = parsed_options.table_shift > 0 ? 2 : 1; + ctx->userDataDdtHeader.tableShift = parsed_options.table_shift; + } + + if(ctx->userDataDdtHeader.blocks % (1 << ctx->userDataDdtHeader.tableShift) != 0) + ctx->userDataDdtHeader.entries++; + + TRACE("Initializing primary/single DDT"); + if(ctx->userDataDdtHeader.sizeType == SmallDdtSizeType) + ctx->userDataDdtMini = + (uint16_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint16_t)); // All entries to zero + else if(ctx->userDataDdtHeader.sizeType == BigDdtSizeType) + ctx->userDataDdtBig = + (uint32_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint32_t)); // All entries to zero + + // Set the primary DDT offset (just after the header, block aligned) + ctx->primaryDdtOffset = sizeof(AaruHeaderV2); // Start just after the header + const uint64_t alignmentMask = (1ULL << ctx->userDataDdtHeader.blockAlignmentShift) - 1; + ctx->primaryDdtOffset = ctx->primaryDdtOffset + alignmentMask & ~alignmentMask; + + TRACE("Primary DDT will be placed at offset %" PRIu64, ctx->primaryDdtOffset); + + // Calculate size of primary DDT table + const uint64_t primaryTableSize = ctx->userDataDdtHeader.sizeType == SmallDdtSizeType + ? ctx->userDataDdtHeader.entries * sizeof(uint16_t) + : ctx->userDataDdtHeader.entries * sizeof(uint32_t); + + // Calculate where data blocks can start (after primary DDT + header) + if(ctx->userDataDdtHeader.tableShift > 0) + { + const uint64_t dataStartPosition = ctx->primaryDdtOffset + sizeof(DdtHeader2) + primaryTableSize; + ctx->nextBlockPosition = dataStartPosition + alignmentMask & ~alignmentMask; + } + else + ctx->nextBlockPosition = ctx->primaryDdtOffset; // Single-level DDT can start anywhere } else - ctx->userDataDdtHeader.tableShift = parsed_options.table_shift; - - if(ctx->userDataDdtHeader.blocks % (1 << ctx->userDataDdtHeader.tableShift) != 0) ctx->userDataDdtHeader.entries++; - - TRACE("Initializing primary/single DDT"); - if(ctx->userDataDdtHeader.sizeType == SmallDdtSizeType) - ctx->userDataDdtMini = - (uint16_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint16_t)); // All entries to zero - else if(ctx->userDataDdtHeader.sizeType == BigDdtSizeType) - ctx->userDataDdtBig = - (uint32_t *)calloc(ctx->userDataDdtHeader.entries, sizeof(uint32_t)); // All entries to zero - - // Set the primary DDT offset (just after the header, block aligned) - ctx->primaryDdtOffset = sizeof(AaruHeaderV2); // Start just after the header - const uint64_t alignmentMask = (1ULL << ctx->userDataDdtHeader.blockAlignmentShift) - 1; - ctx->primaryDdtOffset = ctx->primaryDdtOffset + alignmentMask & ~alignmentMask; - - TRACE("Primary DDT will be placed at offset %" PRIu64, ctx->primaryDdtOffset); - - // Calculate size of primary DDT table - const uint64_t primaryTableSize = ctx->userDataDdtHeader.sizeType == SmallDdtSizeType - ? ctx->userDataDdtHeader.entries * sizeof(uint16_t) - : ctx->userDataDdtHeader.entries * sizeof(uint32_t); - - // Calculate where data blocks can start (after primary DDT + header) - if(ctx->userDataDdtHeader.tableShift > 0) { - const uint64_t dataStartPosition = ctx->primaryDdtOffset + sizeof(DdtHeader2) + primaryTableSize; - ctx->nextBlockPosition = dataStartPosition + alignmentMask & ~alignmentMask; + // Fill needed values + ctx->userDataDdtHeader.blockAlignmentShift = parsed_options.block_alignment; + ctx->userDataDdtHeader.dataShift = parsed_options.data_shift; + + // Calculate aligned next block position + const uint64_t alignmentMask = (1ULL << parsed_options.block_alignment) - 1; + ctx->nextBlockPosition = sizeof(AaruHeaderV2); // Start just after the header + ctx->nextBlockPosition = ctx->nextBlockPosition + alignmentMask & ~alignmentMask; + ctx->is_tape = 1; + ctx->tapeDdt = NULL; } - else - ctx->nextBlockPosition = ctx->primaryDdtOffset; // Single-level DDT can start anywhere TRACE("Data blocks will start at position %" PRIu64, ctx->nextBlockPosition); // Position file pointer at the data start position - if(fseek(ctx->imageStream, (long)ctx->nextBlockPosition, SEEK_SET) != 0) + if(fseek(ctx->imageStream, ctx->nextBlockPosition, SEEK_SET) != 0) { FATAL("Could not seek to data start position"); free(ctx->readableSectorTags); diff --git a/src/ddt/ddt_v2.c b/src/ddt/ddt_v2.c index 02eaedd..d9c3969 100644 --- a/src/ddt/ddt_v2.c +++ b/src/ddt/ddt_v2.c @@ -1796,3 +1796,180 @@ bool set_ddt_multi_level_v2(aaruformatContext *ctx, uint64_t sector_address, boo TRACE("Exiting set_ddt_multi_level_v2() = true"); return true; } + +/** + * @brief Sets a DDT entry for tape media using a hash-based lookup table. + * + * This function is specifically designed for tape media images where sectors are accessed + * non-sequentially and the traditional DDT array structure is inefficient. Instead of using + * a large contiguous array, it uses a hash table (UTHASH) to store only the sectors that + * have been written, providing sparse storage for tape media. + * + * The function performs the following operations: + * 1. Validates the context and verifies it's a tape image + * 2. Constructs a DDT entry encoding offset, block alignment, and sector status + * 3. Creates a hash table entry with the sector address as the key + * 4. Inserts or replaces the entry in the tape DDT hash table + * + * **DDT Entry Format:** + * The DDT entry is a 64-bit value with the following bit layout: + * ``` + * Bits 0-(dataShift-1): Sector offset within block (masked by dataShift) + * Bits dataShift-27: Block index (block_offset >> blockAlignmentShift) + * Bits 28-31: Sector status (4 bits for status flags) + * Bits 32-63: Unused (reserved for future use) + * ``` + * + * **Hash Table Management:** + * Uses HASH_REPLACE macro from UTHASH library which: + * - Adds new entries if the key (sector_address) doesn't exist + * - Replaces existing entries if the key is found (automatically frees old entry) + * - Maintains O(1) average lookup time for sector address resolution + * + * **Overflow Detection:** + * The function checks if the constructed DDT entry exceeds 28 bits (0xFFFFFFF). + * This limit ensures the sector status can fit in the upper 4 bits while leaving + * room for future extensions in the upper 32 bits. + * + * @param ctx Pointer to the aaruformat context. Must not be NULL. + * The context must have a valid imageStream and is_tape must be true. + * The ctx->tapeDdt hash table will be updated with the new entry. + * The ctx->userDataDdtHeader contains alignment and shift parameters. + * + * @param sector_address Logical sector address on the tape to set. This serves as + * the unique key in the hash table. Multiple calls with the + * same sector_address will replace the previous entry. + * + * @param offset Byte offset within the aligned block where the sector data begins. + * This value is masked by (1 << dataShift) - 1 to extract only the + * lower bits representing the offset within the block. + * + * @param block_offset Absolute byte offset in the image file where the data block starts. + * This is right-shifted by blockAlignmentShift to get the block index, + * which is stored in the DDT entry's middle bits. + * + * @param sector_status Status flags for the sector (4 bits). Common values include: + * - 0x0 (SectorStatusNotDumped): Sector not yet acquired during image dumping + * - 0x1 (SectorStatusDumped): Sector successfully dumped without error + * - 0x2 (SectorStatusErrored): Error during dumping; data may be incomplete or corrupt + * - 0x3 (SectorStatusMode1Correct): Valid MODE 1 data with regenerable suffix/prefix + * - 0x4 (SectorStatusMode2Form1Ok): Suffix verified/regenerable for MODE 2 Form 1 + * - 0x5 (SectorStatusMode2Form2Ok): Suffix matches MODE 2 Form 2 with valid CRC + * - 0x6 (SectorStatusMode2Form2NoCrc): Suffix matches MODE 2 Form 2 but CRC empty/missing + * - 0x7 (SectorStatusTwin): Pointer references a twin sector table + * - 0x8 (SectorStatusUnrecorded): Sector physically unrecorded; repeated reads non-deterministic + * - 0x9 (SectorStatusEncrypted): Content encrypted and stored encrypted in image + * - 0xA (SectorStatusUnencrypted): Content originally encrypted but stored decrypted in image + * See SectorStatus enum for complete list of defined values + * + * @param ddt_entry Pointer to a 64-bit value that will receive the constructed DDT entry. + * - If *ddt_entry is 0: A new entry is constructed from the provided parameters + * - If *ddt_entry is non-zero: The existing value is used directly + * The constructed or provided value is stored in the hash table. + * + * @return Returns one of the following status codes: + * @retval true Successfully created and inserted the DDT entry. This occurs when: + * - The context and image stream are valid + * - The image is confirmed to be a tape image (is_tape == true) + * - The DDT entry fits within the 28-bit limit (< 0xFFFFFFF) + * - Memory allocation for the hash entry succeeds + * - The entry is successfully inserted or replaced in the hash table + * + * @retval false Failed to set the DDT entry. This can happen when: + * - ctx is NULL or ctx->imageStream is NULL (invalid context) + * - ctx->is_tape is false (wrong function called for non-tape media) + * - The DDT entry exceeds 0xFFFFFFF (media too large for big DDT) + * - Memory allocation fails for the new hash table entry (out of memory) + * + * @note This function is only for tape images. For disk images, use set_ddt_single_level_v2() + * or set_ddt_multi_level_v2() instead, which use array-based DDT structures. + * + * @note Memory Management: + * - Allocates a new TapeDdtHashEntry for each sector + * - HASH_REPLACE automatically frees replaced entries + * - All hash entries remain in context until cleanup + * - The tapeDdt hash table must be freed during context destruction + * + * @note Tape Media Characteristics: + * - Tape sectors are typically accessed sequentially during streaming + * - File marks and partition boundaries create sparse address spaces + * - Hash table provides efficient storage for sparse sector maps + * - Supports variable block sizes common in tape formats + * + * @note Error Handling: + * - All errors are logged with FATAL level messages + * - Function returns false immediately on any error condition + * - TRACE logging marks entry/exit points for debugging + * - No partial state changes occur on failure + * + * @warning The DDT entry overflow check at 0xFFFFFFF (28 bits) is critical. Exceeding + * this limit indicates the media is too large to fit in the current DDT format, + * and continuing would cause data corruption. + * + * @warning This function modifies the shared tapeDdt hash table. In multi-threaded + * environments, external synchronization is required to prevent race conditions. + * + * @see TapeDdtHashEntry for the hash table entry structure + * @see set_ddt_entry_v2() for the main DDT entry point that dispatches to this function + * @see get_ddt_tape() for retrieving tape DDT entries from the hash table + */ +bool set_ddt_tape(aaruformatContext *ctx, uint64_t sector_address, const uint64_t offset, const uint64_t block_offset, + const uint8_t sector_status, uint64_t *ddt_entry) +{ + TRACE("Entering set_ddt_tape(%p, %" PRIu64 ", %llu, %llu, %d)", ctx, sector_address, offset, block_offset, + sector_status); + + // Check if the context and image stream are valid + if(ctx == NULL || ctx->imageStream == NULL) + { + FATAL("Invalid context or image stream."); + TRACE("Exiting set_ddt_tape() = false"); + return false; + } + + // Should not really be here + if(!ctx->is_tape) + { + FATAL("Image is not tape, wrong function called."); + TRACE("Exiting set_ddt_tape() = false"); + return false; + } + + if(*ddt_entry == 0) + { + const uint64_t block_index = block_offset >> ctx->userDataDdtHeader.blockAlignmentShift; + *ddt_entry = offset & (1ULL << ctx->userDataDdtHeader.dataShift) - 1 | block_index + << ctx->userDataDdtHeader.dataShift; + // Overflow detection for DDT entry + if(*ddt_entry > 0xFFFFFFF) + { + FATAL("DDT overflow: media does not fit in big DDT"); + TRACE("Exiting set_ddt_tape() = false"); + return false; + } + + *ddt_entry |= (uint64_t)sector_status << 28; + } + + // Create DDT hash entry + TapeDdtHashEntry *new_entry = calloc(1, sizeof(TapeDdtHashEntry)); + TapeDdtHashEntry *old_entry = NULL; + if(new_entry == NULL) + { + FATAL("Cannot allocate memory for new tape DDT hash entry."); + TRACE("Exiting set_ddt_tape() = false"); + return false; + } + + TRACE("Setting tape DDT entry %d to %u", sector_address, (uint32_t)*ddt_entry); + + new_entry->key = sector_address; + new_entry->value = *ddt_entry; + + // Insert entry into tape DDT + HASH_REPLACE(hh, ctx->tapeDdt, key, sizeof(uint64_t), new_entry, old_entry); + if(old_entry) free(old_entry); + + TRACE("Exiting set_ddt_tape() = true"); + return true; +}