mirror of
https://github.com/aaru-dps/libaaruformat.git
synced 2025-12-16 19:24:40 +00:00
Add support for tape DDT management with hash table implementation
This commit is contained in:
@@ -137,6 +137,13 @@ typedef struct TapePartitionHashEntry
|
||||
UT_hash_handle hh; ///< UTHASH handle
|
||||
} TapePartitionHashEntry;
|
||||
|
||||
typedef struct TapeDdtHashEntry
|
||||
{
|
||||
uint64_t key; ///< Key: sector address
|
||||
uint32_t value; ///< Value: DDT entry
|
||||
UT_hash_handle hh; ///< UTHASH handle
|
||||
} TapeDdtHashEntry;
|
||||
|
||||
/** \struct aaruformatContext
|
||||
* \brief Master context representing an open or in‑creation Aaru image.
|
||||
*
|
||||
@@ -265,6 +272,8 @@ typedef struct aaruformatContext
|
||||
|
||||
tapeFileHashEntry *tapeFiles; ///< Hash table root for tape files
|
||||
TapePartitionHashEntry *tapePartitions; ///< Hash table root for tape partitions
|
||||
bool is_tape; ///< True if the image is a tape image
|
||||
TapeDdtHashEntry *tapeDdt; ///< Hash table root for tape DDT entries
|
||||
} aaruformatContext;
|
||||
|
||||
/** \struct DumpHardwareEntriesWithData
|
||||
|
||||
@@ -71,7 +71,7 @@ AARU_EXPORT void *AARU_CALL aaruf_create(const char *filepath, uint32_t media_ty
|
||||
uint64_t user_sectors, uint64_t negative_sectors, uint64_t overflow_sectors,
|
||||
const char *options, const uint8_t *application_name,
|
||||
uint8_t application_name_length, uint8_t application_major_version,
|
||||
uint8_t application_minor_version);
|
||||
uint8_t application_minor_version, bool is_tape);
|
||||
|
||||
AARU_EXPORT int AARU_CALL aaruf_close(void *context);
|
||||
|
||||
|
||||
@@ -54,6 +54,8 @@ bool set_ddt_single_level_v2(aaruformatContext *ctx, uint64_t sector_addres
|
||||
uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry);
|
||||
bool set_ddt_multi_level_v2(aaruformatContext *ctx, uint64_t sector_address, bool negative, uint64_t offset,
|
||||
uint64_t block_offset, uint8_t sector_status, uint64_t *ddt_entry);
|
||||
bool set_ddt_tape(aaruformatContext *ctx, uint64_t sector_address, uint64_t offset, uint64_t block_offset,
|
||||
uint8_t sector_status, uint64_t *ddt_entry);
|
||||
aaru_options parse_options(const char *options);
|
||||
uint64_t get_filetime_uint64();
|
||||
int32_t aaruf_close_current_block(aaruformatContext *ctx);
|
||||
|
||||
150
src/close.c
150
src/close.c
@@ -536,6 +536,156 @@ static int32_t write_single_level_ddt(aaruformatContext *ctx)
|
||||
return AARUF_STATUS_OK;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Converts tape DDT hash table to array format and writes it as a single-level DDT.
|
||||
*
|
||||
* This function is specifically designed for tape media images where sectors have been tracked
|
||||
* using a sparse hash table (UTHASH) during write operations. It converts the hash-based tape
|
||||
* DDT into a traditional array-based DDT structure suitable for serialization to disk. The
|
||||
* function performs a complete transformation from the sparse hash representation to a dense
|
||||
* array representation, then delegates the actual write operation to write_single_level_ddt().
|
||||
*
|
||||
* The conversion process involves:
|
||||
* 1. Validating the context is for tape media
|
||||
* 2. Scanning the hash table to determine the maximum sector address (key)
|
||||
* 3. Allocating a contiguous array large enough to hold all entries up to max_key
|
||||
* 4. Populating the array by copying hash table entries to their corresponding indices
|
||||
* 5. Initializing a DDT v2 header with appropriate metadata
|
||||
* 6. Calling write_single_level_ddt() to serialize the DDT to disk
|
||||
*
|
||||
* **Hash Table to Array Conversion:**
|
||||
* The tape DDT hash table uses sector addresses as keys and DDT entries as values. This function
|
||||
* creates a zero-initialized array of size (max_key + 1) and copies each hash entry to
|
||||
* array[entry->key] = entry->value. Sectors not present in the hash table remain as zero entries
|
||||
* in the array, which indicates SectorStatusNotDumped in the DDT format.
|
||||
*
|
||||
* **Memory Allocation:**
|
||||
* The function always uses BigDdtSizeType (32-bit entries) for tape DDTs, allocating
|
||||
* (max_key + 1) * sizeof(uint32_t) bytes. This provides sufficient capacity for the 28-bit
|
||||
* data + 4-bit status encoding used in tape DDT entries.
|
||||
*
|
||||
* **DDT Header Configuration:**
|
||||
* The userDataDdtHeader is configured for a single-level DDT v2 structure:
|
||||
* - identifier: DeDuplicationTable2
|
||||
* - type: UserData
|
||||
* - compression: Determined by ctx->compression_enabled (Lzma or None)
|
||||
* - levels: 1 (single-level structure)
|
||||
* - tableLevel: 0 (top-level table)
|
||||
* - tableShift: 0 (no multi-level indirection)
|
||||
* - sizeType: BigDdtSizeType (32-bit entries)
|
||||
* - entries/blocks: max_key + 1
|
||||
* - negative/overflow: 0 (not used for tape)
|
||||
*
|
||||
* @param ctx Pointer to the aaruformat context. Must not be NULL and must be in write mode.
|
||||
* The context must have is_tape set to true and tapeDdt hash table populated.
|
||||
* The ctx->userDataDdtBig array will be allocated and populated by this function.
|
||||
* The ctx->userDataDdtHeader will be initialized with DDT metadata.
|
||||
*
|
||||
* @return Returns one of the following status codes:
|
||||
* @retval AARUF_STATUS_OK (0) Successfully converted and wrote the tape DDT. This occurs when:
|
||||
* - The context is valid and is_tape is true
|
||||
* - Memory allocation for the DDT array succeeds
|
||||
* - The hash table entries are successfully copied to the array
|
||||
* - write_single_level_ddt() completes successfully
|
||||
* - The DDT is written to disk and indexed
|
||||
*
|
||||
* @retval AARUF_STATUS_INVALID_CONTEXT (-2) The context is not for tape media. This occurs when:
|
||||
* - ctx->is_tape is false
|
||||
* - This function was called for a disk/optical image instead of tape
|
||||
*
|
||||
* @retval AARUF_ERROR_NOT_ENOUGH_MEMORY (-6) Memory allocation failed. This occurs when:
|
||||
* - calloc() fails to allocate the userDataDdtBig array
|
||||
* - Insufficient system memory for (max_key + 1) * 4 bytes
|
||||
*
|
||||
* @retval AARUF_ERROR_CANNOT_WRITE_HEADER (-8) Writing the DDT failed. This can occur when:
|
||||
* - write_single_level_ddt() fails to write the DDT header
|
||||
* - File I/O errors prevent writing the DDT data
|
||||
* - Disk full or other storage errors
|
||||
* - This error is propagated from write_single_level_ddt()
|
||||
*
|
||||
* @note This function is only called during image finalization (aaruf_close) for tape images.
|
||||
* It should not be called for disk or optical media images.
|
||||
*
|
||||
* @note Hash Table Iteration:
|
||||
* - Uses HASH_ITER macro from UTHASH to safely traverse all entries
|
||||
* - Finds maximum key in first pass to determine array size
|
||||
* - Copies entries in second pass to populate the array
|
||||
* - Empty (zero) array slots represent sectors not written to tape
|
||||
*
|
||||
* @note Memory Ownership:
|
||||
* - Allocates ctx->userDataDdtBig which becomes owned by the context
|
||||
* - The allocated array is freed during context cleanup (not in this function)
|
||||
* - The original hash table (ctx->tapeDdt) is freed separately during cleanup
|
||||
*
|
||||
* @note Single-Level DDT Choice:
|
||||
* - Tape DDTs always use single-level structure (tableShift = 0)
|
||||
* - Multi-level DDTs are not used because tape access patterns are typically sparse
|
||||
* - The hash table already provides efficient sparse storage during write
|
||||
* - Conversion to dense array only happens once at close time
|
||||
*
|
||||
* @note Compression:
|
||||
* - The actual compression is handled by write_single_level_ddt()
|
||||
* - Compression type is determined by ctx->compression_enabled flag
|
||||
* - If enabled, LZMA compression is applied to the DDT array
|
||||
* - Compression may be disabled if it doesn't reduce size
|
||||
*
|
||||
* @warning The function assumes tapeDdt hash table is properly populated. An empty hash table
|
||||
* will result in a DDT with a single zero entry (max_key = 0, entries = 1).
|
||||
*
|
||||
* @warning This function modifies ctx->userDataDdtHeader and ctx->userDataDdtBig. These must
|
||||
* not be modified by other code during the close operation.
|
||||
*
|
||||
* @warning The allocated array size is (max_key + 1), which could be very large if tape sectors
|
||||
* have high addresses with sparse distribution. Memory usage should be considered.
|
||||
*
|
||||
* @see set_ddt_tape() for how entries are added to the hash table during write operations
|
||||
* @see write_single_level_ddt() for the actual DDT serialization logic
|
||||
* @see TapeDdtHashEntry for the hash table entry structure
|
||||
* @internal
|
||||
*/
|
||||
static int32_t write_tape_ddt(aaruformatContext *ctx)
|
||||
{
|
||||
if(!ctx->is_tape) return AARUF_STATUS_INVALID_CONTEXT;
|
||||
|
||||
// Traverse the tape DDT uthash and find the biggest key
|
||||
uint64_t max_key = 0;
|
||||
TapeDdtHashEntry *entry, *tmp;
|
||||
HASH_ITER(hh, ctx->tapeDdt, entry, tmp)
|
||||
if(entry->key > max_key) max_key = entry->key;
|
||||
|
||||
// Initialize context user data DDT header
|
||||
ctx->userDataDdtHeader.identifier = DeDuplicationTable2;
|
||||
ctx->userDataDdtHeader.type = UserData;
|
||||
ctx->userDataDdtHeader.compression = ctx->compression_enabled ? Lzma : None;
|
||||
ctx->userDataDdtHeader.levels = 1; // Single level
|
||||
ctx->userDataDdtHeader.tableLevel = 0; // Top level
|
||||
ctx->userDataDdtHeader.previousLevelOffset = 0; // No previous level for single-level DDT
|
||||
ctx->userDataDdtHeader.negative = 0;
|
||||
ctx->userDataDdtHeader.overflow = 0;
|
||||
ctx->userDataDdtHeader.tableShift = 0; // Single level
|
||||
ctx->userDataDdtHeader.sizeType = BigDdtSizeType;
|
||||
ctx->userDataDdtHeader.entries = max_key + 1;
|
||||
ctx->userDataDdtHeader.blocks = max_key + 1;
|
||||
ctx->userDataDdtHeader.start = 0;
|
||||
ctx->userDataDdtHeader.length = ctx->userDataDdtHeader.entries * sizeof(uint32_t);
|
||||
ctx->userDataDdtHeader.cmpLength = ctx->userDataDdtHeader.length;
|
||||
|
||||
// Initialize memory for user data DDT
|
||||
ctx->userDataDdtBig = calloc(ctx->userDataDdtHeader.entries, sizeof(uint32_t));
|
||||
if(ctx->userDataDdtBig == NULL)
|
||||
{
|
||||
TRACE("Failed to allocate memory for tape DDT table");
|
||||
return AARUF_ERROR_NOT_ENOUGH_MEMORY;
|
||||
}
|
||||
|
||||
// Populate user data DDT from tape DDT uthash
|
||||
HASH_ITER(hh, ctx->tapeDdt, entry, tmp)
|
||||
if(entry->key < ctx->userDataDdtHeader.blocks) ctx->userDataDdtBig[entry->key] = entry->value;
|
||||
|
||||
// Do not repeat code
|
||||
return write_single_level_ddt(ctx);
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Finalize any active checksum calculations and append a checksum block.
|
||||
*
|
||||
|
||||
202
src/create.c
202
src/create.c
@@ -32,28 +32,119 @@
|
||||
* Allocates and initializes a new aaruformat context and image file with the specified parameters.
|
||||
* This function sets up all necessary data structures including headers, DDT (deduplication table),
|
||||
* caches, and index entries for writing a new AaruFormat image. It also handles file creation,
|
||||
* memory allocation, and proper initialization of the writing context.
|
||||
* memory allocation, and proper initialization of the writing context. The function supports both
|
||||
* block-based media (disks, optical media) and sequential tape media with different initialization
|
||||
* strategies optimized for each media type.
|
||||
*
|
||||
* @param filepath Path to the image file to create.
|
||||
* @param media_type Media type identifier.
|
||||
* @param sector_size Size of each sector in bytes.
|
||||
* @param user_sectors Number of user data sectors.
|
||||
* @param negative_sectors Number of negative sectors.
|
||||
* @param overflow_sectors Number of overflow sectors.
|
||||
* @param options String with creation options (parsed for alignment and shift parameters).
|
||||
* @param application_name Pointer to the application name string.
|
||||
* @param application_name_length Length of the application name string (must be ≤ AARU_HEADER_APP_NAME_LEN).
|
||||
* @param application_major_version Major version of the application.
|
||||
* @param application_minor_version Minor version of the application.
|
||||
* **Media Type Handling:**
|
||||
* The function creates different internal structures based on the `is_tape` parameter:
|
||||
*
|
||||
* **Block Media (is_tape = false):**
|
||||
* - Initializes full DDT (Deduplication Table) version 2 for sector-level deduplication
|
||||
* - Allocates primary DDT table (userDataDdtMini or userDataDdtBig) as a preallocated array
|
||||
* - Configures multi-level DDT support for large images (> 138,412,552 sectors)
|
||||
* - Enables optional deduplication hash map for detecting duplicate sectors
|
||||
* - Reserves space for DDT at the beginning of the file (after header, block-aligned)
|
||||
* - Data blocks start after DDT table to maintain sequential layout
|
||||
* - DDT size is fixed and known upfront based on sector count
|
||||
*
|
||||
* **Tape Media (is_tape = true):**
|
||||
* - Initializes DDT for sector-level deduplication using a different strategy
|
||||
* - Uses a growing hash table (tapeDdt) instead of a preallocated array
|
||||
* - Sets ctx->is_tape flag and initializes ctx->tapeDdt to NULL (populated on first write)
|
||||
* - Data blocks start immediately after the header (block-aligned)
|
||||
* - Hash table grows dynamically as blocks are written
|
||||
* - Optimized for sequential write patterns typical of tape media
|
||||
* - Tape file/partition metadata is managed separately via additional hash tables
|
||||
* - More memory-efficient for tapes with unknown final size
|
||||
*
|
||||
* **Initialization Flow:**
|
||||
* 1. Parse creation options (compression, alignment, deduplication, checksums)
|
||||
* 2. Allocate and zero-initialize context structure
|
||||
* 3. Create/open image file in binary write mode
|
||||
* 4. Initialize AaruFormat header with application and version information
|
||||
* 5. Set up image metadata and sector size information
|
||||
* 6. Initialize block and header caches for performance
|
||||
* 7. Initialize ECC context for Compact Disc support
|
||||
* 8. Branch based on media type:
|
||||
* - Block media: Configure DDT structures and calculate offsets with preallocated array
|
||||
* - Tape media: Set tape flags and initialize for dynamic hash table DDT
|
||||
* 9. Initialize index entries array for tracking all blocks
|
||||
* 10. Configure compression, checksums, and deduplication based on options
|
||||
* 11. Position file pointer at calculated data start position
|
||||
*
|
||||
* **DDT Configuration (Block Media Only):**
|
||||
* The function automatically selects optimal DDT parameters:
|
||||
* - Single-level DDT (tableShift=0): For images < 138,412,552 sectors
|
||||
* - Multi-level DDT (tableShift=22): For images ≥ 138,412,552 sectors
|
||||
* - Small entries (16-bit): Default, supports most image sizes efficiently
|
||||
* - Big entries (32-bit): Reserved for future use with very large images
|
||||
*
|
||||
* The DDT offset calculation ensures proper alignment:
|
||||
* - Primary DDT placed immediately after header (block-aligned)
|
||||
* - Data blocks positioned after DDT table (block-aligned)
|
||||
* - Alignment controlled by blockAlignmentShift from options
|
||||
*
|
||||
* @param filepath Path to the image file to create. The file will be created if it doesn't exist,
|
||||
* or overwritten if it does. Must be a valid writable path.
|
||||
*
|
||||
* @param media_type Media type identifier (e.g., CompactDisc, DVD, HardDisk, Tape formats).
|
||||
* This affects how the image is structured and which features are enabled.
|
||||
*
|
||||
* @param sector_size Size of each sector/block in bytes. Common values:
|
||||
* - 512 bytes: Hard disks, floppy disks
|
||||
* - 2048 bytes: CD-ROM, DVD
|
||||
* - Variable: Tape media (block size varies by format)
|
||||
*
|
||||
* @param user_sectors Number of user data sectors/blocks in the image. This is the main
|
||||
* data area excluding negative (lead-in) and overflow (lead-out) regions.
|
||||
* For tape media, this may be an estimate as the final size is often unknown.
|
||||
*
|
||||
* @param negative_sectors Number of negative sectors (typically lead-in area for optical media).
|
||||
* Set to 0 for media types without lead-in areas. Not used for tape media.
|
||||
*
|
||||
* @param overflow_sectors Number of overflow sectors (typically lead-out area for optical media).
|
||||
* Set to 0 for media types without lead-out areas. Not used for tape media.
|
||||
*
|
||||
* @param options String with creation options in key=value format, semicolon-separated.
|
||||
* Supported options:
|
||||
* - "compress=true|false": Enable/disable LZMA compression
|
||||
* - "deduplicate=true|false": Enable/disable sector deduplication (all media types)
|
||||
* - "md5=true|false": Calculate MD5 checksum during write
|
||||
* - "sha1=true|false": Calculate SHA-1 checksum during write
|
||||
* - "sha256=true|false": Calculate SHA-256 checksum during write
|
||||
* - "spamsum=true|false": Calculate SpamSum fuzzy hash during write
|
||||
* - "blake3=true|false": Calculate BLAKE3 checksum during write
|
||||
* - "block_alignment=N": Block alignment shift value (default varies)
|
||||
* - "data_shift=N": Data shift value for DDT granularity
|
||||
* - "table_shift=N": Table shift for multi-level DDT (-1 for auto, block media only)
|
||||
* - "dictionary=N": LZMA dictionary size in bytes
|
||||
* Example: "compress=true;deduplicate=true;md5=true;sha1=true"
|
||||
*
|
||||
* @param application_name Pointer to the application name string (UTF-16LE raw bytes).
|
||||
* This identifies the software that created the image.
|
||||
*
|
||||
* @param application_name_length Length of the application name string in bytes.
|
||||
* Must be ≤ AARU_HEADER_APP_NAME_LEN (64 bytes).
|
||||
*
|
||||
* @param application_major_version Major version of the creating application (0-255).
|
||||
*
|
||||
* @param application_minor_version Minor version of the creating application (0-255).
|
||||
*
|
||||
* @param is_tape Boolean flag indicating tape media type:
|
||||
* - true: Initialize for tape media (sequential, dynamic hash table DDT, file/partition metadata)
|
||||
* - false: Initialize for block media (random access, preallocated array DDT)
|
||||
*
|
||||
* @return Returns one of the following:
|
||||
* @retval aaruformatContext* Successfully created and initialized context. The returned pointer contains:
|
||||
* - Properly initialized AaruFormat headers and metadata
|
||||
* - Allocated and configured DDT structures for deduplication
|
||||
* - For block media: Allocated and configured DDT structures with preallocated arrays
|
||||
* - For tape media: Tape flags set, DDT initialized as NULL (grows on demand)
|
||||
* - Initialized block and header caches for performance
|
||||
* - Open file stream ready for writing operations
|
||||
* - Index entries array ready for block tracking
|
||||
* - ECC context initialized for Compact Disc support
|
||||
* - Checksum contexts initialized based on options
|
||||
*
|
||||
* @retval NULL Creation failed. The specific error can be determined by checking errno, which will be set to:
|
||||
* - AARUF_ERROR_NOT_ENOUGH_MEMORY (-9) when memory allocation fails for:
|
||||
@@ -61,19 +152,19 @@
|
||||
* * Readable sector tags array allocation
|
||||
* * Application version string allocation
|
||||
* * Image version string allocation
|
||||
* * DDT table allocation (userDataDdtMini or userDataDdtBig)
|
||||
* * DDT table allocation (userDataDdtMini or userDataDdtBig, block media only)
|
||||
* * Index entries array allocation
|
||||
* - AARUF_ERROR_CANNOT_CREATE_FILE (-19) when file operations fail:
|
||||
* * Unable to open the specified filepath for writing
|
||||
* * File seek operations fail during initialization
|
||||
* * File system errors or permission issues
|
||||
* - AARUF_ERROR_INVALID_APP_NAME_LENGTH (-20) when:
|
||||
* * application_name_length exceeds AARU_HEADER_APP_NAME_LEN
|
||||
* * application_name_length exceeds AARU_HEADER_APP_NAME_LEN (64 bytes)
|
||||
*
|
||||
* @note Memory Management:
|
||||
* - The function performs extensive memory allocation for various context structures
|
||||
* - On failure, all previously allocated memory is properly cleaned up
|
||||
* - The returned context must be freed using appropriate cleanup functions
|
||||
* - The returned context must be freed using aaruf_close() when finished
|
||||
*
|
||||
* @note File Operations:
|
||||
* - Creates a new file at the specified path (overwrites existing files)
|
||||
@@ -81,32 +172,63 @@
|
||||
* - Positions the file pointer at the calculated data start position
|
||||
* - File alignment is handled based on parsed options
|
||||
*
|
||||
* @note DDT Initialization:
|
||||
* @note DDT Initialization (Block Media Only):
|
||||
* - Uses DDT version 2 format with configurable compression and alignment
|
||||
* - Supports both small (16-bit) and big (32-bit) DDT entry sizes
|
||||
* - Calculates optimal table sizes based on sector counts and shift parameters
|
||||
* - All DDT entries are initialized to zero (indicating unallocated sectors)
|
||||
* - Multi-level DDT is used for images with ≥ 138,412,552 total sectors
|
||||
* - Single-level DDT is used for smaller images for efficiency
|
||||
* - DDT is a fixed-size preallocated array written to file at known offset
|
||||
*
|
||||
* @note Tape Media Initialization:
|
||||
* - Tape images use a dynamic hash table DDT for sector-level deduplication
|
||||
* - File and partition metadata is managed via separate hash tables
|
||||
* - ctx->is_tape is set to 1 to indicate tape mode throughout the library
|
||||
* - ctx->tapeDdt is initialized to NULL and grows dynamically as blocks are written
|
||||
* - Data blocks can start immediately after header for optimal sequential access
|
||||
* - The hash table DDT allows for efficient deduplication without knowing final size
|
||||
* - More memory-efficient for tapes with unpredictable or very large sizes
|
||||
* - Deduplication hash map may still be used alongside tapeDdt if enabled in options
|
||||
*
|
||||
* @note Options Parsing:
|
||||
* - The options string is parsed to extract block_alignment, data_shift, and table_shift
|
||||
* - These parameters affect memory usage, performance, and file organization
|
||||
* - Invalid options may result in suboptimal performance but won't cause failure
|
||||
* - Compression and checksums can be enabled independently via options
|
||||
*
|
||||
* @note Checksum Initialization:
|
||||
* - MD5, SHA-1, SHA-256, SpamSum, and BLAKE3 can be calculated during write
|
||||
* - Checksum contexts are initialized only if requested in options
|
||||
* - Checksums are computed incrementally as sectors/blocks are written
|
||||
* - Final checksums are stored in checksum block during image finalization
|
||||
*
|
||||
* @warning The created context is in writing mode and expects proper finalization
|
||||
* before closing to ensure index and metadata are written correctly.
|
||||
*
|
||||
* @warning Application name length validation is strict - exceeding the limit will
|
||||
* cause creation failure with AARUF_ERROR_INVALID_APP_NAME_LENGTH.
|
||||
*
|
||||
* @warning For tape media, the DDT structure is fundamentally different (hash table vs array).
|
||||
* The is_tape flag must accurately reflect the media type being created.
|
||||
*
|
||||
* @warning The negative_sectors and overflow_sectors parameters are used only for
|
||||
* block media. For tape media, these parameters are ignored.
|
||||
*
|
||||
* @see aaruf_close() for proper context cleanup and image finalization
|
||||
* @see aaruf_write_sector() for writing sectors to block media images
|
||||
* @see aaruf_set_tape_file() for defining tape file metadata
|
||||
* @see aaruf_set_tape_partition() for defining tape partition metadata
|
||||
*/
|
||||
void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32_t sector_size,
|
||||
const uint64_t user_sectors, const uint64_t negative_sectors, const uint64_t overflow_sectors,
|
||||
const char *options, const uint8_t *application_name, const uint8_t application_name_length,
|
||||
const uint8_t application_major_version, const uint8_t application_minor_version)
|
||||
const uint8_t application_major_version, const uint8_t application_minor_version, const bool is_tape)
|
||||
{
|
||||
TRACE("Entering aaruf_create(%s, %u, %u, %llu, %llu, %llu, %s, %s, %u, %u, %u)", filepath, media_type, sector_size,
|
||||
user_sectors, negative_sectors, overflow_sectors, options,
|
||||
TRACE("Entering aaruf_create(%s, %u, %u, %llu, %llu, %llu, %s, %s, %u, %u, %u, %d)", filepath, media_type,
|
||||
sector_size, user_sectors, negative_sectors, overflow_sectors, options,
|
||||
application_name ? (const char *)application_name : "NULL", application_name_length,
|
||||
application_major_version, application_minor_version);
|
||||
application_major_version, application_minor_version, is_tape);
|
||||
|
||||
// Parse the options
|
||||
TRACE("Parsing options");
|
||||
@@ -216,13 +338,13 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32
|
||||
ctx->libraryMajorVersion = LIBAARUFORMAT_MAJOR_VERSION;
|
||||
ctx->libraryMinorVersion = LIBAARUFORMAT_MINOR_VERSION;
|
||||
|
||||
// Initialize DDT2
|
||||
if(!is_tape)
|
||||
{ // Initialize DDT2
|
||||
TRACE("Initializing DDT2");
|
||||
ctx->inMemoryDdt = true;
|
||||
ctx->userDataDdtHeader.identifier = DeDuplicationTable2;
|
||||
ctx->userDataDdtHeader.type = UserData;
|
||||
ctx->userDataDdtHeader.compression = None;
|
||||
ctx->userDataDdtHeader.levels = 2;
|
||||
ctx->userDataDdtHeader.tableLevel = 0;
|
||||
ctx->userDataDdtHeader.previousLevelOffset = 0;
|
||||
ctx->userDataDdtHeader.negative = negative_sectors;
|
||||
@@ -236,17 +358,27 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32
|
||||
|
||||
if(parsed_options.table_shift == -1)
|
||||
{
|
||||
uint64_t total_sectors = user_sectors + overflow_sectors + negative_sectors;
|
||||
const uint64_t total_sectors = user_sectors + overflow_sectors + negative_sectors;
|
||||
|
||||
if(total_sectors < 0x8388608ULL)
|
||||
{
|
||||
ctx->userDataDdtHeader.levels = 1;
|
||||
ctx->userDataDdtHeader.tableShift = 0;
|
||||
else
|
||||
ctx->userDataDdtHeader.tableShift = 22;
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->userDataDdtHeader.levels = 2;
|
||||
ctx->userDataDdtHeader.tableShift = 22;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
ctx->userDataDdtHeader.levels = parsed_options.table_shift > 0 ? 2 : 1;
|
||||
ctx->userDataDdtHeader.tableShift = parsed_options.table_shift;
|
||||
}
|
||||
|
||||
if(ctx->userDataDdtHeader.blocks % (1 << ctx->userDataDdtHeader.tableShift) != 0) ctx->userDataDdtHeader.entries++;
|
||||
if(ctx->userDataDdtHeader.blocks % (1 << ctx->userDataDdtHeader.tableShift) != 0)
|
||||
ctx->userDataDdtHeader.entries++;
|
||||
|
||||
TRACE("Initializing primary/single DDT");
|
||||
if(ctx->userDataDdtHeader.sizeType == SmallDdtSizeType)
|
||||
@@ -276,11 +408,25 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32
|
||||
}
|
||||
else
|
||||
ctx->nextBlockPosition = ctx->primaryDdtOffset; // Single-level DDT can start anywhere
|
||||
}
|
||||
else
|
||||
{
|
||||
// Fill needed values
|
||||
ctx->userDataDdtHeader.blockAlignmentShift = parsed_options.block_alignment;
|
||||
ctx->userDataDdtHeader.dataShift = parsed_options.data_shift;
|
||||
|
||||
// Calculate aligned next block position
|
||||
const uint64_t alignmentMask = (1ULL << parsed_options.block_alignment) - 1;
|
||||
ctx->nextBlockPosition = sizeof(AaruHeaderV2); // Start just after the header
|
||||
ctx->nextBlockPosition = ctx->nextBlockPosition + alignmentMask & ~alignmentMask;
|
||||
ctx->is_tape = 1;
|
||||
ctx->tapeDdt = NULL;
|
||||
}
|
||||
|
||||
TRACE("Data blocks will start at position %" PRIu64, ctx->nextBlockPosition);
|
||||
|
||||
// Position file pointer at the data start position
|
||||
if(fseek(ctx->imageStream, (long)ctx->nextBlockPosition, SEEK_SET) != 0)
|
||||
if(fseek(ctx->imageStream, ctx->nextBlockPosition, SEEK_SET) != 0)
|
||||
{
|
||||
FATAL("Could not seek to data start position");
|
||||
free(ctx->readableSectorTags);
|
||||
|
||||
177
src/ddt/ddt_v2.c
177
src/ddt/ddt_v2.c
@@ -1796,3 +1796,180 @@ bool set_ddt_multi_level_v2(aaruformatContext *ctx, uint64_t sector_address, boo
|
||||
TRACE("Exiting set_ddt_multi_level_v2() = true");
|
||||
return true;
|
||||
}
|
||||
|
||||
/**
|
||||
* @brief Sets a DDT entry for tape media using a hash-based lookup table.
|
||||
*
|
||||
* This function is specifically designed for tape media images where sectors are accessed
|
||||
* non-sequentially and the traditional DDT array structure is inefficient. Instead of using
|
||||
* a large contiguous array, it uses a hash table (UTHASH) to store only the sectors that
|
||||
* have been written, providing sparse storage for tape media.
|
||||
*
|
||||
* The function performs the following operations:
|
||||
* 1. Validates the context and verifies it's a tape image
|
||||
* 2. Constructs a DDT entry encoding offset, block alignment, and sector status
|
||||
* 3. Creates a hash table entry with the sector address as the key
|
||||
* 4. Inserts or replaces the entry in the tape DDT hash table
|
||||
*
|
||||
* **DDT Entry Format:**
|
||||
* The DDT entry is a 64-bit value with the following bit layout:
|
||||
* ```
|
||||
* Bits 0-(dataShift-1): Sector offset within block (masked by dataShift)
|
||||
* Bits dataShift-27: Block index (block_offset >> blockAlignmentShift)
|
||||
* Bits 28-31: Sector status (4 bits for status flags)
|
||||
* Bits 32-63: Unused (reserved for future use)
|
||||
* ```
|
||||
*
|
||||
* **Hash Table Management:**
|
||||
* Uses HASH_REPLACE macro from UTHASH library which:
|
||||
* - Adds new entries if the key (sector_address) doesn't exist
|
||||
* - Replaces existing entries if the key is found (automatically frees old entry)
|
||||
* - Maintains O(1) average lookup time for sector address resolution
|
||||
*
|
||||
* **Overflow Detection:**
|
||||
* The function checks if the constructed DDT entry exceeds 28 bits (0xFFFFFFF).
|
||||
* This limit ensures the sector status can fit in the upper 4 bits while leaving
|
||||
* room for future extensions in the upper 32 bits.
|
||||
*
|
||||
* @param ctx Pointer to the aaruformat context. Must not be NULL.
|
||||
* The context must have a valid imageStream and is_tape must be true.
|
||||
* The ctx->tapeDdt hash table will be updated with the new entry.
|
||||
* The ctx->userDataDdtHeader contains alignment and shift parameters.
|
||||
*
|
||||
* @param sector_address Logical sector address on the tape to set. This serves as
|
||||
* the unique key in the hash table. Multiple calls with the
|
||||
* same sector_address will replace the previous entry.
|
||||
*
|
||||
* @param offset Byte offset within the aligned block where the sector data begins.
|
||||
* This value is masked by (1 << dataShift) - 1 to extract only the
|
||||
* lower bits representing the offset within the block.
|
||||
*
|
||||
* @param block_offset Absolute byte offset in the image file where the data block starts.
|
||||
* This is right-shifted by blockAlignmentShift to get the block index,
|
||||
* which is stored in the DDT entry's middle bits.
|
||||
*
|
||||
* @param sector_status Status flags for the sector (4 bits). Common values include:
|
||||
* - 0x0 (SectorStatusNotDumped): Sector not yet acquired during image dumping
|
||||
* - 0x1 (SectorStatusDumped): Sector successfully dumped without error
|
||||
* - 0x2 (SectorStatusErrored): Error during dumping; data may be incomplete or corrupt
|
||||
* - 0x3 (SectorStatusMode1Correct): Valid MODE 1 data with regenerable suffix/prefix
|
||||
* - 0x4 (SectorStatusMode2Form1Ok): Suffix verified/regenerable for MODE 2 Form 1
|
||||
* - 0x5 (SectorStatusMode2Form2Ok): Suffix matches MODE 2 Form 2 with valid CRC
|
||||
* - 0x6 (SectorStatusMode2Form2NoCrc): Suffix matches MODE 2 Form 2 but CRC empty/missing
|
||||
* - 0x7 (SectorStatusTwin): Pointer references a twin sector table
|
||||
* - 0x8 (SectorStatusUnrecorded): Sector physically unrecorded; repeated reads non-deterministic
|
||||
* - 0x9 (SectorStatusEncrypted): Content encrypted and stored encrypted in image
|
||||
* - 0xA (SectorStatusUnencrypted): Content originally encrypted but stored decrypted in image
|
||||
* See SectorStatus enum for complete list of defined values
|
||||
*
|
||||
* @param ddt_entry Pointer to a 64-bit value that will receive the constructed DDT entry.
|
||||
* - If *ddt_entry is 0: A new entry is constructed from the provided parameters
|
||||
* - If *ddt_entry is non-zero: The existing value is used directly
|
||||
* The constructed or provided value is stored in the hash table.
|
||||
*
|
||||
* @return Returns one of the following status codes:
|
||||
* @retval true Successfully created and inserted the DDT entry. This occurs when:
|
||||
* - The context and image stream are valid
|
||||
* - The image is confirmed to be a tape image (is_tape == true)
|
||||
* - The DDT entry fits within the 28-bit limit (< 0xFFFFFFF)
|
||||
* - Memory allocation for the hash entry succeeds
|
||||
* - The entry is successfully inserted or replaced in the hash table
|
||||
*
|
||||
* @retval false Failed to set the DDT entry. This can happen when:
|
||||
* - ctx is NULL or ctx->imageStream is NULL (invalid context)
|
||||
* - ctx->is_tape is false (wrong function called for non-tape media)
|
||||
* - The DDT entry exceeds 0xFFFFFFF (media too large for big DDT)
|
||||
* - Memory allocation fails for the new hash table entry (out of memory)
|
||||
*
|
||||
* @note This function is only for tape images. For disk images, use set_ddt_single_level_v2()
|
||||
* or set_ddt_multi_level_v2() instead, which use array-based DDT structures.
|
||||
*
|
||||
* @note Memory Management:
|
||||
* - Allocates a new TapeDdtHashEntry for each sector
|
||||
* - HASH_REPLACE automatically frees replaced entries
|
||||
* - All hash entries remain in context until cleanup
|
||||
* - The tapeDdt hash table must be freed during context destruction
|
||||
*
|
||||
* @note Tape Media Characteristics:
|
||||
* - Tape sectors are typically accessed sequentially during streaming
|
||||
* - File marks and partition boundaries create sparse address spaces
|
||||
* - Hash table provides efficient storage for sparse sector maps
|
||||
* - Supports variable block sizes common in tape formats
|
||||
*
|
||||
* @note Error Handling:
|
||||
* - All errors are logged with FATAL level messages
|
||||
* - Function returns false immediately on any error condition
|
||||
* - TRACE logging marks entry/exit points for debugging
|
||||
* - No partial state changes occur on failure
|
||||
*
|
||||
* @warning The DDT entry overflow check at 0xFFFFFFF (28 bits) is critical. Exceeding
|
||||
* this limit indicates the media is too large to fit in the current DDT format,
|
||||
* and continuing would cause data corruption.
|
||||
*
|
||||
* @warning This function modifies the shared tapeDdt hash table. In multi-threaded
|
||||
* environments, external synchronization is required to prevent race conditions.
|
||||
*
|
||||
* @see TapeDdtHashEntry for the hash table entry structure
|
||||
* @see set_ddt_entry_v2() for the main DDT entry point that dispatches to this function
|
||||
* @see get_ddt_tape() for retrieving tape DDT entries from the hash table
|
||||
*/
|
||||
bool set_ddt_tape(aaruformatContext *ctx, uint64_t sector_address, const uint64_t offset, const uint64_t block_offset,
|
||||
const uint8_t sector_status, uint64_t *ddt_entry)
|
||||
{
|
||||
TRACE("Entering set_ddt_tape(%p, %" PRIu64 ", %llu, %llu, %d)", ctx, sector_address, offset, block_offset,
|
||||
sector_status);
|
||||
|
||||
// Check if the context and image stream are valid
|
||||
if(ctx == NULL || ctx->imageStream == NULL)
|
||||
{
|
||||
FATAL("Invalid context or image stream.");
|
||||
TRACE("Exiting set_ddt_tape() = false");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Should not really be here
|
||||
if(!ctx->is_tape)
|
||||
{
|
||||
FATAL("Image is not tape, wrong function called.");
|
||||
TRACE("Exiting set_ddt_tape() = false");
|
||||
return false;
|
||||
}
|
||||
|
||||
if(*ddt_entry == 0)
|
||||
{
|
||||
const uint64_t block_index = block_offset >> ctx->userDataDdtHeader.blockAlignmentShift;
|
||||
*ddt_entry = offset & (1ULL << ctx->userDataDdtHeader.dataShift) - 1 | block_index
|
||||
<< ctx->userDataDdtHeader.dataShift;
|
||||
// Overflow detection for DDT entry
|
||||
if(*ddt_entry > 0xFFFFFFF)
|
||||
{
|
||||
FATAL("DDT overflow: media does not fit in big DDT");
|
||||
TRACE("Exiting set_ddt_tape() = false");
|
||||
return false;
|
||||
}
|
||||
|
||||
*ddt_entry |= (uint64_t)sector_status << 28;
|
||||
}
|
||||
|
||||
// Create DDT hash entry
|
||||
TapeDdtHashEntry *new_entry = calloc(1, sizeof(TapeDdtHashEntry));
|
||||
TapeDdtHashEntry *old_entry = NULL;
|
||||
if(new_entry == NULL)
|
||||
{
|
||||
FATAL("Cannot allocate memory for new tape DDT hash entry.");
|
||||
TRACE("Exiting set_ddt_tape() = false");
|
||||
return false;
|
||||
}
|
||||
|
||||
TRACE("Setting tape DDT entry %d to %u", sector_address, (uint32_t)*ddt_entry);
|
||||
|
||||
new_entry->key = sector_address;
|
||||
new_entry->value = *ddt_entry;
|
||||
|
||||
// Insert entry into tape DDT
|
||||
HASH_REPLACE(hh, ctx->tapeDdt, key, sizeof(uint64_t), new_entry, old_entry);
|
||||
if(old_entry) free(old_entry);
|
||||
|
||||
TRACE("Exiting set_ddt_tape() = true");
|
||||
return true;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user