libaaruformat 1.0
Aaru Data Preservation Suite - Format Library
Loading...
Searching...
No Matches
create.c
Go to the documentation of this file.
1/*
2 * This file is part of the Aaru Data Preservation Suite.
3 * Copyright (c) 2019-2025 Natalia Portillo.
4 *
5 * This library is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2.1 of the
8 * License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18#include <errno.h>
19#include <stdbool.h>
20#include <stdint.h>
21#include <stdio.h>
22#include <stdlib.h>
23#include <string.h>
24
25#include "aaruformat.h"
26#include "enums.h"
27#include "internal.h"
28#include "log.h"
29
31{
32 if(ctx == NULL) return;
33
34 if(ctx->sector_hash_map != NULL)
35 {
37 ctx->sector_hash_map = NULL;
38 }
39
40 if(ctx->index_entries != NULL)
41 {
42 utarray_free(ctx->index_entries);
43 ctx->index_entries = NULL;
44 }
45
46 if(ctx->user_data_ddt2 != NULL)
47 {
48 free(ctx->user_data_ddt2);
49 ctx->user_data_ddt2 = NULL;
50 }
51
52 if(ctx->spamsum_context != NULL)
53 {
55 ctx->spamsum_context = NULL;
56 }
57
58 if(ctx->blake3_context != NULL)
59 {
60 free(ctx->blake3_context);
61 ctx->blake3_context = NULL;
62 }
63
64 if(ctx->ecc_cd_context != NULL)
65 {
66 free(ctx->ecc_cd_context);
67 ctx->ecc_cd_context = NULL;
68 }
69
70 if(ctx->readableSectorTags != NULL)
71 {
72 free(ctx->readableSectorTags);
73 ctx->readableSectorTags = NULL;
74 }
75
76 // ApplicationVersion and Version are fixed-size arrays, not pointers - no need to free
77
78 if(ctx->imageStream != NULL)
79 {
80 fclose(ctx->imageStream);
81 ctx->imageStream = NULL;
82 }
83
84 free(ctx);
85}
86
279void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32_t sector_size,
280 const uint64_t user_sectors, const uint64_t negative_sectors, const uint64_t overflow_sectors,
281 const char *options, const uint8_t *application_name, const uint8_t application_name_length,
282 const uint8_t application_major_version, const uint8_t application_minor_version, const bool is_tape)
283{
284 TRACE("Entering aaruf_create(%s, %u, %u, %llu, %llu, %llu, %s, %s, %u, %u, %u, %d)", filepath, media_type,
285 sector_size, user_sectors, negative_sectors, overflow_sectors, options,
286 application_name ? (const char *)application_name : "NULL", application_name_length,
287 application_major_version, application_minor_version, is_tape);
288
289 // Parse the options
290 TRACE("Parsing options");
291 const aaru_options parsed_options = parse_options(options);
292
293 // Allocate context
294 TRACE("Allocating memory for context");
295 aaruformat_context *ctx = malloc(sizeof(aaruformat_context));
296 if(ctx == NULL)
297 {
298 FATAL("Not enough memory to create context");
300
301 TRACE("Exiting aaruf_create() = NULL");
302 return NULL;
303 }
304
305 memset(ctx, 0, sizeof(aaruformat_context));
306
307 // Create the image file
308 TRACE("Creating image file %s", filepath);
309 ctx->imageStream = fopen(filepath, "wb+");
310 if(ctx->imageStream == NULL)
311 {
312 FATAL("Error %d opening file %s for writing", errno, filepath);
314
315 TRACE("Exiting aaruf_create() = NULL");
317 return NULL;
318 }
319
320 if(application_name_length > AARU_HEADER_APP_NAME_LEN)
321 {
322 FATAL("Application name too long (%u bytes, maximum %u bytes)", application_name_length,
325
326 TRACE("Exiting aaruf_create() = NULL");
328 return NULL;
329 }
330
331 // Initialize header
332 TRACE("Initializing header");
334 memcpy(ctx->header.application, application_name, application_name_length);
336 ctx->header.imageMinorVersion = 0;
337 ctx->header.applicationMajorVersion = application_major_version;
338 ctx->header.applicationMinorVersion = application_minor_version;
339 ctx->header.mediaType = media_type;
340 ctx->header.indexOffset = 0;
343
344 ctx->readableSectorTags = (bool *)malloc(sizeof(bool) * MaxSectorTag);
345
346 if(ctx->readableSectorTags == NULL)
347 {
349
350 TRACE("Exiting aaruf_create() = NULL");
352 return NULL;
353 }
354
355 memset(ctx->readableSectorTags, 0, sizeof(bool) * MaxSectorTag);
356
357 // Initialize image info
358 TRACE("Initializing image info");
359
360 // Copy application name (UTF-8) to image_info
361 memset(ctx->image_info.Application, 0, 64);
362 size_t copy_len = application_name_length < 63 ? application_name_length : 63;
363 memcpy(ctx->image_info.Application, application_name, copy_len);
364 ctx->image_info.Application[63] = '\0';
365
366 // Set application version string directly in the fixed-size array
367 memset(ctx->image_info.ApplicationVersion, 0, 32);
370
371 // Set image version string directly in the fixed-size array
372 memset(ctx->image_info.Version, 0, 32);
373 sprintf(ctx->image_info.Version, "%d.%d", ctx->header.imageMajorVersion, ctx->header.imageMinorVersion);
374
376 ctx->image_info.ImageSize = 0;
380 ctx->image_info.SectorSize = sector_size;
381 ctx->image_info.Sectors = user_sectors;
382
383 // Initialize caches
384 TRACE("Initializing caches");
385 ctx->block_header_cache.cache = NULL;
386 const uint64_t cache_divisor = (uint64_t)ctx->image_info.SectorSize * (1ULL << ctx->shift);
387 ctx->block_header_cache.max_items = cache_divisor == 0 ? 0 : MAX_CACHE_SIZE / cache_divisor;
388 ctx->block_cache.cache = NULL;
390
391 // TODO: Cache tracks and sessions?
392
393 // Initialize ECC for Compact Disc
394 TRACE("Initializing Compact Disc ECC");
396
397 ctx->magic = AARU_MAGIC;
400
401 if(!is_tape)
402 { // Initialize DDT2
403 TRACE("Initializing DDT2");
404 ctx->in_memory_ddt = true;
410 ctx->user_data_ddt_header.negative = negative_sectors;
411 ctx->user_data_ddt_header.blocks = user_sectors + overflow_sectors + negative_sectors;
412 ctx->user_data_ddt_header.overflow = overflow_sectors;
415 ctx->user_data_ddt_header.dataShift = parsed_options.data_shift;
416
417 if(parsed_options.table_shift == -1)
418 {
419 const uint64_t total_sectors = user_sectors + overflow_sectors + negative_sectors;
420
421 if(total_sectors < 0x8388608ULL)
423 else
425 }
426 else
428 parsed_options.table_shift > 0 ? (uint8_t)parsed_options.table_shift : 0;
429
431
432 uint8_t effective_table_shift = ctx->user_data_ddt_header.tableShift;
433 if(effective_table_shift >= 63)
434 {
435 TRACE("Clamping table shift from %u to 62 to avoid overflow", effective_table_shift);
436 effective_table_shift = 62;
437 ctx->user_data_ddt_header.tableShift = effective_table_shift;
438 }
439
440 const uint64_t sectors_per_entry = 1ULL << effective_table_shift;
441 ctx->user_data_ddt_header.entries = ctx->user_data_ddt_header.blocks / sectors_per_entry;
442 if(ctx->user_data_ddt_header.blocks % sectors_per_entry != 0 || ctx->user_data_ddt_header.entries == 0)
444
445 TRACE("Initializing primary/single DDT");
446 ctx->user_data_ddt2 =
447 (uint64_t *)calloc(ctx->user_data_ddt_header.entries, sizeof(uint64_t)); // All entries to zero
448 if(ctx->user_data_ddt2 == NULL)
449 {
450 FATAL("Not enough memory to allocate primary DDT (big)");
452 TRACE("Exiting aaruf_create() = NULL");
454 return NULL;
455 }
456
457 // Set the primary DDT offset (just after the header, block aligned)
458 ctx->primary_ddt_offset = sizeof(AaruHeaderV2); // Start just after the header
459 const uint64_t alignment_mask = (1ULL << ctx->user_data_ddt_header.blockAlignmentShift) - 1;
460 ctx->primary_ddt_offset = ctx->primary_ddt_offset + alignment_mask & ~alignment_mask;
461
462 TRACE("Primary DDT will be placed at offset %" PRIu64, ctx->primary_ddt_offset);
463
464 // Calculate size of primary DDT table
465 const uint64_t primary_table_size = ctx->user_data_ddt_header.entries * sizeof(uint64_t);
466
467 // Calculate where data blocks can start (after primary DDT + header)
469 {
470 const uint64_t data_start_position = ctx->primary_ddt_offset + sizeof(DdtHeader2) + primary_table_size;
471 ctx->next_block_position = data_start_position + alignment_mask & ~alignment_mask;
472 }
473 else
474 ctx->next_block_position = ctx->primary_ddt_offset; // Single-level DDT can start anywhere
475 }
476 else
477 {
478 // Fill needed values
480 ctx->user_data_ddt_header.dataShift = parsed_options.data_shift;
481
482 // Calculate aligned next block position
483 const uint64_t alignment_mask = (1ULL << parsed_options.block_alignment) - 1;
484 ctx->next_block_position = sizeof(AaruHeaderV2); // Start just after the header
485 ctx->next_block_position = ctx->next_block_position + alignment_mask & ~alignment_mask;
486 ctx->is_tape = 1;
487 ctx->tape_ddt = NULL;
488 }
489
490 TRACE("Data blocks will start at position %" PRIu64, ctx->next_block_position);
491
492 // Position file pointer at the data start position
493 if(fseek(ctx->imageStream, ctx->next_block_position, SEEK_SET) != 0)
494 {
495 FATAL("Could not seek to data start position");
497 TRACE("Exiting aaruf_create() = NULL");
499 return NULL;
500 }
501
502 // Initialize index entries array
503 TRACE("Initializing index entries array");
504 const UT_icd index_entry_icd = {sizeof(IndexEntry), NULL, NULL, NULL};
505 utarray_new(ctx->index_entries, &index_entry_icd);
506
507 if(ctx->index_entries == NULL)
508 {
509 FATAL("Not enough memory to create index entries array");
511
512 TRACE("Exiting aaruf_create() = NULL");
514 return NULL;
515 }
516
517 ctx->compression_enabled = parsed_options.compress;
518 ctx->lzma_dict_size = parsed_options.dictionary;
519 ctx->deduplicate = parsed_options.deduplicate;
520 if(ctx->deduplicate)
521 ctx->sector_hash_map = create_map(ctx->user_data_ddt_header.blocks * 25 / 100); // 25% of total sectors
522
523 ctx->rewinded = false;
524 ctx->last_written_block = 0;
525
526 if(parsed_options.md5)
527 {
528 ctx->calculating_md5 = true;
530 }
531 if(parsed_options.sha1)
532 {
533 ctx->calculating_sha1 = true;
535 }
536 if(parsed_options.sha256)
537 {
538 ctx->calculating_sha256 = true;
540 }
541 if(parsed_options.spamsum)
542 {
543 ctx->calculating_spamsum = true;
545 }
546 if(parsed_options.blake3)
547 {
548 ctx->blake3_context = calloc(1, sizeof(blake3_hasher));
549 if(ctx->blake3_context != NULL)
550 {
551 ctx->calculating_blake3 = true;
552 blake3_hasher_init(ctx->blake3_context);
553 }
554 }
555
556 // Is writing
557 ctx->is_writing = true;
558
559 TRACE("Exiting aaruf_create() = %p", ctx);
560 // Return context
561 return ctx;
562}
#define LIBAARUFORMAT_MAJOR_VERSION
Definition aaruformat.h:22
#define LIBAARUFORMAT_MINOR_VERSION
Definition aaruformat.h:23
#define AARU_MAGIC
Magic identifier for AaruFormat container (ASCII "AARUFRMT").
Definition consts.h:64
#define MAX_CACHE_SIZE
Maximum read cache size (bytes).
Definition consts.h:79
#define AARUF_VERSION_V2
Second on‑disk version (C implementation).
Definition consts.h:75
void * aaruf_create(const char *filepath, const uint32_t media_type, const uint32_t sector_size, const uint64_t user_sectors, const uint64_t negative_sectors, const uint64_t overflow_sectors, const char *options, const uint8_t *application_name, const uint8_t application_name_length, const uint8_t application_major_version, const uint8_t application_minor_version, const bool is_tape)
Creates a new AaruFormat image file.
Definition create.c:279
static void cleanup_failed_create(aaruformat_context *ctx)
Definition create.c:30
void aaruf_sha256_init(sha256_ctx *ctx)
Definition sha256.c:76
void aaruf_md5_init(md5_ctx *ctx)
Definition md5.c:425
void aaruf_spamsum_free(spamsum_ctx *ctx)
Frees a spamsum (fuzzy hash) context.
Definition spamsum.c:75
spamsum_ctx * aaruf_spamsum_init(void)
Definition spamsum.c:37
void aaruf_sha1_init(sha1_ctx *ctx)
Definition sha1.c:34
void * aaruf_ecc_cd_init()
Initializes a Compact Disc ECC context.
Definition ecc_cd.c:35
int32_t aaruf_get_xml_mediatype(int32_t type)
Definition helpers.c:339
@ DeDuplicationTable2
Block containing a deduplication table v2.
Definition enums.h:143
@ UserData
User (main) data.
Definition enums.h:46
@ None
Not compressed.
Definition enums.h:33
#define AARUF_ERROR_CANNOT_CREATE_FILE
Output file could not be created / opened for write.
Definition errors.h:58
#define AARUF_ERROR_INVALID_APP_NAME_LENGTH
Application name field length invalid (sanity limit).
Definition errors.h:59
#define AARUF_ERROR_NOT_ENOUGH_MEMORY
Memory allocation failure (critical).
Definition errors.h:48
@ MaxSectorTag
Definition aaru.h:916
hash_map_t * create_map(size_t size)
Creates a new hash map with the specified initial size.
Definition hash_map.c:49
void free_map(hash_map_t *map)
Frees all memory associated with a hash map.
Definition hash_map.c:73
#define AARU_HEADER_APP_NAME_LEN
Size in bytes (UTF-16LE) of application name field (32 UTF-16 code units).
Definition header.h:59
uint64_t get_filetime_uint64()
Gets the current time as a 64-bit FILETIME value.
Definition time.c:45
aaru_options parse_options(const char *options)
Parses the options string for AaruFormat image creation/opening.
Definition options.c:38
#define FATAL(fmt,...)
Definition log.h:40
#define TRACE(fmt,...)
Definition log.h:25
Version 2 container header with GUID, alignment shifts, and feature negotiation bitmaps.
Definition header.h:107
uint8_t application[64]
UTF-16LE creator application name (fixed 64 bytes).
Definition header.h:109
uint8_t applicationMajorVersion
Creator application major version.
Definition header.h:112
uint64_t identifier
File magic (AARU_MAGIC).
Definition header.h:108
int64_t lastWrittenTime
Last modification FILETIME (100 ns since 1601-01-01 UTC).
Definition header.h:117
uint64_t indexOffset
Absolute byte offset to primary index block (MUST be > 0; 0 => corrupt/unreadable).
Definition header.h:115
uint8_t applicationMinorVersion
Creator application minor / patch version.
Definition header.h:113
uint32_t mediaType
Media type enumeration (value from MediaType).
Definition header.h:114
uint8_t imageMinorVersion
Container format minor version.
Definition header.h:111
int64_t creationTime
Creation FILETIME (100 ns since 1601-01-01 UTC).
Definition header.h:116
uint8_t imageMajorVersion
Container format major version.
Definition header.h:110
struct CacheEntry * cache
Hash root (uthash). NULL when empty.
Definition lru.h:48
uint64_t max_items
Hard limit for number of entries (policy: enforce/ignore depends on implementation).
Definition lru.h:47
Lookup tables and state for Compact Disc EDC/ECC (P/Q) regeneration / verification.
Definition context.h:86
Header preceding a version 2 hierarchical deduplication table.
Definition ddt.h:142
uint16_t type
Data classification (DataType) for sectors referenced by this table.
Definition ddt.h:144
uint64_t start
Base internal index covered by this table (used for secondary tables; currently informational).
Definition ddt.h:153
uint16_t overflow
Trailing dumped sectors beyond user area (overflow range), still mapped with entries.
Definition ddt.h:151
uint64_t entries
Number of entries contained in (uncompressed) table payload.
Definition ddt.h:158
uint8_t levels
Total number of hierarchy levels (root depth); > 0.
Definition ddt.h:146
uint32_t identifier
Block identifier, must be BlockType::DeDuplicationTable2.
Definition ddt.h:143
uint8_t tableShift
2^tableShift = number of logical sectors per primary entry (multi-level only; 0 for single-level or s...
Definition ddt.h:156
uint64_t blocks
Total internal span (negative + usable + overflow) in logical sectors.
Definition ddt.h:150
uint16_t negative
Leading negative LBA count; added to external L to build internal index.
Definition ddt.h:149
uint8_t blockAlignmentShift
2^blockAlignmentShift = block alignment boundary in bytes.
Definition ddt.h:154
uint8_t tableLevel
Zero-based level index of this table (0 = root, increases downward).
Definition ddt.h:147
uint16_t compression
Compression algorithm for this table body (CompressionType).
Definition ddt.h:145
uint8_t dataShift
2^dataShift = sectors represented per increment in blockIndex field.
Definition ddt.h:155
uint64_t previousLevelOffset
Absolute byte offset of the parent (previous) level table; 0 if root.
Definition ddt.h:148
uint32_t MediaType
Media type identifier (see MediaType enum; 0=Unknown)
Definition aaru.h:881
uint8_t MetadataMediaType
Media type for sidecar generation (internal archival use)
Definition aaru.h:882
uint32_t SectorSize
Size of each logical sector in bytes (512, 2048, 2352, 4096, etc.)
Definition aaru.h:875
char Application[64]
Name of application that created the image (NUL-terminated)
Definition aaru.h:877
uint64_t ImageSize
Size of the image payload in bytes (excludes headers/metadata)
Definition aaru.h:873
int64_t CreationTime
Image creation timestamp (Windows FILETIME: 100ns since 1601-01-01 UTC)
Definition aaru.h:879
int64_t LastModificationTime
Last modification timestamp (Windows FILETIME format)
Definition aaru.h:880
char Version[32]
Image format version string (NUL-terminated, e.g., "6.0")
Definition aaru.h:876
uint64_t Sectors
Total count of addressable logical sectors/blocks.
Definition aaru.h:874
char ApplicationVersion[32]
Version of the creating application (NUL-terminated)
Definition aaru.h:878
Single index entry describing a block's type, (optional) data classification, and file offset.
Definition index.h:109
Parsed user-specified tunables controlling compression, deduplication, hashing and DDT geometry.
Definition options.h:217
bool deduplicate
Storage dedup flag (DDT always exists).
Definition options.h:219
uint8_t data_shift
Global data shift: low bits encode sector offset inside a block (2^data_shift span).
Definition options.h:223
uint32_t dictionary
LZMA dictionary size in bytes (>= 4096 recommended). Default: 33554432 (32 MiB).
Definition options.h:221
bool compress
Enable adaptive compression (LZMA for data blocks, FLAC for audio). Default: true.
Definition options.h:218
bool sha256
Generate SHA-256 checksum (ChecksumAlgorithm::Sha256) when finalizing image.
Definition options.h:227
bool spamsum
Generate SpamSum fuzzy hash (ChecksumAlgorithm::SpamSum) if enabled.
Definition options.h:229
int8_t table_shift
DDT table shift (multi-level fan-out exponent). Default: heuristically calculated.
Definition options.h:222
bool md5
Generate MD5 checksum (ChecksumAlgorithm::Md5) when finalizing image.
Definition options.h:225
bool blake3
Generate BLAKE3 checksum if supported (not stored if algorithm unavailable).
Definition options.h:228
bool sha1
Generate SHA-1 checksum (ChecksumAlgorithm::Sha1) when finalizing image.
Definition options.h:226
uint8_t block_alignment
log2 underlying block alignment (2^n bytes). Default: 9 (512 bytes).
Definition options.h:224
Master context representing an open or in‑creation Aaru image.
Definition context.h:172
DdtHeader2 user_data_ddt_header
Active user data DDT v2 header (primary table meta).
Definition context.h:189
uint8_t library_major_version
Linked library major version.
Definition context.h:177
bool deduplicate
Storage deduplication active (duplicates coalesce).
Definition context.h:298
bool compression_enabled
True if block compression enabled (writing path).
Definition context.h:299
uint64_t last_written_block
Last written block number (write path).
Definition context.h:283
hash_map_t * sector_hash_map
Deduplication hash map (fingerprint->entry mapping).
Definition context.h:253
sha256_ctx sha256_context
Opaque SHA-256 context for streaming updates.
Definition context.h:272
bool calculating_sha256
True if whole-image SHA-256 being calculated on-the-fly.
Definition context.h:275
struct CacheHeader block_header_cache
LRU/Cache header for block headers.
Definition context.h:256
md5_ctx md5_context
Opaque MD5 context for streaming updates.
Definition context.h:270
uint64_t * user_data_ddt2
DDT entries (big variant) primary/secondary current.
Definition context.h:187
uint8_t shift
Legacy overall shift (deprecated by data_shift/table_shift).
Definition context.h:195
bool is_tape
True if the image is a tape image.
Definition context.h:304
bool calculating_sha1
True if whole-image SHA-1 being calculated on-the-fly.
Definition context.h:274
CdEccContext * ecc_cd_context
CD ECC/EDC helper tables (allocated on demand).
Definition context.h:248
bool rewinded
True if stream has been rewound after open (write path).
Definition context.h:293
struct CacheHeader block_cache
LRU/Cache header for block payloads.
Definition context.h:257
bool in_memory_ddt
True if primary (and possibly secondary) DDT loaded.
Definition context.h:196
AaruHeaderV2 header
Parsed container header (v2).
Definition context.h:175
bool is_writing
True if context opened/created for writing.
Definition context.h:292
TapeDdtHashEntry * tape_ddt
Hash table root for tape DDT entries.
Definition context.h:182
spamsum_ctx * spamsum_context
Opaque SpamSum context for streaming updates.
Definition context.h:267
uint64_t magic
File magic (AARU_MAGIC) post-open.
Definition context.h:174
bool calculating_spamsum
True if whole-image SpamSum being calculated on-the-fly.
Definition context.h:276
uint64_t primary_ddt_offset
File offset of the primary DDT v2 table.
Definition context.h:192
blake3_hasher * blake3_context
Opaque BLAKE3 context for streaming updates.
Definition context.h:268
bool calculating_blake3
True if whole-image BLAKE3 being calculated on-the-fly.
Definition context.h:277
uint8_t library_minor_version
Linked library minor version;.
Definition context.h:178
uint64_t next_block_position
Absolute file offset where next block will be written.
Definition context.h:282
bool calculating_md5
True if whole-image MD5 being calculated on-the-fly.
Definition context.h:273
FILE * imageStream
Underlying FILE* stream (binary mode).
Definition context.h:176
UT_array * index_entries
Flattened index entries (UT_array of IndexEntry).
Definition context.h:252
ImageInfo image_info
Exposed high-level image info summary.
Definition context.h:260
sha1_ctx sha1_context
Opaque SHA-1 context for streaming updates.
Definition context.h:271
bool * readableSectorTags
Per-sector boolean array (optical tags read successfully?).
Definition context.h:263
uint32_t lzma_dict_size
LZMA dictionary size (writing path).
Definition context.h:297