Add support for on-the-fly BLAKE3 checksum calculation

This commit is contained in:
2025-10-03 04:01:30 +01:00
parent 5a8a8c630f
commit 506b153930
5 changed files with 80 additions and 16 deletions

View File

@@ -19,6 +19,7 @@
#ifndef LIBAARUFORMAT_CONTEXT_H
#define LIBAARUFORMAT_CONTEXT_H
#include "blake3.h"
#include "crc64.h"
#include "hash_map.h"
#include "lru.h"
@@ -100,10 +101,12 @@ typedef struct Checksums
bool hasMd5; ///< True if md5[] buffer populated.
bool hasSha1; ///< True if sha1[] buffer populated.
bool hasSha256; ///< True if sha256[] buffer populated.
bool hasBlake3; ///< True if blake3[] buffer populated.
bool hasSpamSum; ///< True if spamsum pointer allocated and signature read.
uint8_t md5[MD5_DIGEST_LENGTH]; ///< MD5 digest (16 bytes).
uint8_t sha1[SHA1_DIGEST_LENGTH]; ///< SHA-1 digest (20 bytes).
uint8_t sha256[SHA256_DIGEST_LENGTH]; ///< SHA-256 digest (32 bytes).
uint8_t blake3[BLAKE3_OUT_LEN]; ///< BLAKE3 digest (32 bytes).
uint8_t *spamsum; ///< SpamSum fuzzy hash (ASCII), allocated length+1 with trailing 0.
} Checksums;
@@ -214,16 +217,18 @@ typedef struct aaruformatContext
hash_map_t *sectorHashMap; ///< Deduplication hash map (fingerprint->entry mapping).
bool deduplicate; ///< Storage deduplication active (duplicates coalesce).
bool rewinded; ///< True if stream has been rewound after open (write path).
uint64_t last_written_block; ///< Last written block number (write path).
bool calculating_md5; ///< True if whole-image MD5 being calculated on-the-fly.
md5_ctx md5_context; ///< Opaque MD5 context for streaming updates
bool calculating_sha1; ///< True if whole-image SHA-1 being calculated on-the-fly.
sha1_ctx sha1_context; ///< Opaque SHA-1 context for streaming updates
bool calculating_sha256; ///< True if whole-image SHA-256 being calculated on-the-fly.
sha256_ctx sha256_context; ///< Opaque SHA-256 context for streaming updates
bool calculating_spamsum; ///< True if whole-image SpamSum being calculated on-the-fly.
spamsum_ctx *spamsum_context; ///< Opaque SpamSum context for streaming updates
bool rewinded; ///< True if stream has been rewound after open (write path).
uint64_t last_written_block; ///< Last written block number (write path).
bool calculating_md5; ///< True if whole-image MD5 being calculated on-the-fly.
md5_ctx md5_context; ///< Opaque MD5 context for streaming updates
bool calculating_sha1; ///< True if whole-image SHA-1 being calculated on-the-fly.
sha1_ctx sha1_context; ///< Opaque SHA-1 context for streaming updates
bool calculating_sha256; ///< True if whole-image SHA-256 being calculated on-the-fly.
sha256_ctx sha256_context; ///< Opaque SHA-256 context for streaming updates
bool calculating_spamsum; ///< True if whole-image SpamSum being calculated on-the-fly.
spamsum_ctx *spamsum_context; ///< Opaque SpamSum context for streaming updates
bool calculating_blake3; ///< True if whole-image BLAKE3 being calculated on-the-fly.
blake3_hasher *blake3_context; ///< Opaque BLAKE3 context for streaming updates
} aaruformatContext;
/** \struct DumpHardwareEntriesWithData

View File

@@ -158,7 +158,8 @@ typedef enum
Md5 = 1, ///< MD5 hash.
Sha1 = 2, ///< SHA-1 hash.
Sha256 = 3, ///< SHA-256 hash.
SpamSum = 4 ///< SpamSum (context-triggered piecewise hash).
SpamSum = 4, ///< SpamSum (context-triggered piecewise hash).
Blake3 = 5, ///< BLAKE3 hash.
} ChecksumAlgorithm;
/**
@@ -238,6 +239,31 @@ typedef enum
SectorStatusUnencrypted = 0xA ///< Content originally encrypted but stored decrypted in image.
} SectorStatus;
/**
* \enum FeaturesCompatible
* \brief Bit-mask of optional, backward-compatible features stored in an image.
*
* These flags advertise additional data structures or capabilities embedded in the
* image that older readers MAY safely ignore. An unknown bit MUST be treated as
* "feature unsupported" without failing to open the image. Writers set the bits for
* features they included; readers test them to enable extended behaviors.
*
* Usage example:
* \code{.c}
* uint64_t features = header->featuresCompatible; // value read from on-disk header
* if(features & AARU_FEATURE_RW_BLAKE3)
* {
* // Image contains BLAKE3 checksums; enable BLAKE3 verification path.
* }
* \endcode
*
* Future compatible features SHALL use the next available bit (1ULL << n).
*/
typedef enum
{
AARU_FEATURE_RW_BLAKE3 = 0x1, ///< BLAKE3 checksum is present (read/write support for BLAKE3 hashes).
} FeaturesCompatible;
#ifndef _MSC_VER
#pragma clang diagnostic pop
#endif

View File

@@ -523,10 +523,16 @@ int aaruf_close(void *context)
aaruf_spamsum_final(ctx->spamsum_context, ctx->checksums.spamsum);
aaruf_spamsum_free(ctx->spamsum_context);
}
if(ctx->calculating_blake3)
{
ctx->checksums.hasBlake3 = true;
blake3_hasher_finalize(ctx->blake3_context, ctx->checksums.blake3, BLAKE3_OUT_LEN);
free(ctx->blake3_context);
}
// Write the checksums block
bool has_checksums =
ctx->checksums.hasMd5 || ctx->checksums.hasSha1 || ctx->checksums.hasSha256 || ctx->checksums.hasSpamSum;
bool has_checksums = ctx->checksums.hasMd5 || ctx->checksums.hasSha1 || ctx->checksums.hasSha256 ||
ctx->checksums.hasSpamSum || ctx->checksums.hasBlake3;
if(has_checksums)
{
@@ -590,11 +596,24 @@ int aaruf_close(void *context)
spamsum_entry.length = strlen((const char *)ctx->checksums.spamsum);
spamsum_entry.type = SpamSum;
fwrite(&spamsum_entry, sizeof(ChecksumEntry), 1, ctx->imageStream);
fwrite(&ctx->checksums.spamsum, spamsum_entry.length, 1, ctx->imageStream);
fwrite(ctx->checksums.spamsum, spamsum_entry.length, 1, ctx->imageStream);
checksum_header.length += sizeof(ChecksumEntry) + spamsum_entry.length;
checksum_header.entries++;
}
if(ctx->checksums.hasBlake3)
{
TRACE("Writing BLAKE3 checksum entry");
ChecksumEntry blake3_entry = {0};
blake3_entry.length = BLAKE3_OUT_LEN;
blake3_entry.type = Blake3;
fwrite(&blake3_entry, sizeof(ChecksumEntry), 1, ctx->imageStream);
fwrite(&ctx->checksums.blake3, BLAKE3_OUT_LEN, 1, ctx->imageStream);
checksum_header.length += sizeof(ChecksumEntry) + BLAKE3_OUT_LEN;
checksum_header.entries++;
ctx->header.featureCompatible |= AARU_FEATURE_RW_BLAKE3;
}
fseek(ctx->imageStream, checksum_position, SEEK_SET);
TRACE("Writing checksum header");
fwrite(&checksum_header, sizeof(ChecksumHeader), 1, ctx->imageStream);

View File

@@ -328,7 +328,16 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32
if(parsed_options.spamsum)
{
ctx->calculating_spamsum = true;
ctx->spamsum_context = aaruf_spamsum_init();
ctx->spamsum_context = aaruf_spamsum_init();
}
if(parsed_options.blake3)
{
ctx->blake3_context = calloc(1, sizeof(blake3_hasher));
if(ctx->blake3_context != NULL)
{
ctx->calculating_blake3 = true;
blake3_hasher_init(ctx->blake3_context);
}
}
// Is writing

View File

@@ -160,6 +160,8 @@ int32_t aaruf_write_sector(void *context, uint64_t sector_address, bool negative
if(ctx->calculating_sha256) ctx->calculating_sha256 = false;
// Disable SpamSum calculation
if(ctx->calculating_spamsum) ctx->calculating_spamsum = false;
// Disable BLAKE3 calculation
if(ctx->calculating_blake3) ctx->calculating_blake3 = false;
}
else
ctx->last_written_block = sector_address;
@@ -176,7 +178,10 @@ int32_t aaruf_write_sector(void *context, uint64_t sector_address, bool negative
aaruf_sha256_update(&ctx->sha256_context, data, length);
// Calculate SpamSum on-the-fly if requested and sector is within user sectors (not negative or overflow)
if(ctx->calculating_sha256 && !negative && sector_address <= ctx->imageInfo.Sectors)
aaruf_spamsum_update(&ctx->spamsum_context, data, length);
aaruf_spamsum_update(ctx->spamsum_context, data, length);
// Calculate BLAKE3 on-the-fly if requested and sector is within user sectors (not negative or overflow)
if(ctx->calculating_blake3 && !negative && sector_address <= ctx->imageInfo.Sectors)
blake3_hasher_update(ctx->blake3_context, data, length);
// TODO: If optical disc check track