Enhance benchmark tool with color support and improved output formatting

Update Brotli compression settings for improved speed and efficiency
Add brotli algorithm to benchmark tool.
2026-02-14 21:23:36 +00:00 · 2026-01-31 01:47:36 +00:00 · 2026-01-31 00:28:58 +00:00 · 2026-01-30 20:42:31 +00:00 · 2026-01-11 21:36:41 +00:00 · 2026-01-10 14:40:27 +00:00
12 changed files with 2309 additions and 1 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -346,5 +346,6 @@ endif()
 if(NOT AARU_BUILD_PACKAGE)
  add_subdirectory(tests)
  add_subdirectory(tool)
+  add_subdirectory(benchmark)
  add_subdirectory(docs/spec)
 endif()
--- a/benchmark/CMakeLists.txt
+++ b/benchmark/CMakeLists.txt
@@ -0,0 +1,175 @@
+# Benchmark tool project
+project(aarubenchmark C)
+
+# Find required compression libraries
+find_package(PkgConfig)
+if(PKG_CONFIG_FOUND)
+  pkg_check_modules(ZSTD libzstd)
+endif()
+
+# Fallback to find_package if pkg-config fails
+if(NOT ZSTD_FOUND)
+  find_package(ZSTD QUIET)
+  if(ZSTD_FOUND)
+    set(ZSTD_LIBRARIES zstd::libzstd_static)
+  endif()
+endif()
+
+# Download and build bzip3 from git using FetchContent
+include(FetchContent)
+
+message(STATUS "Fetching bzip3 from GitHub...")
+
+FetchContent_Declare(
+        bzip3
+        GIT_REPOSITORY https://github.com/kspalaiologos/bzip3.git
+        GIT_TAG master
+        GIT_SHALLOW TRUE
+        PATCH_COMMAND ${CMAKE_COMMAND} -DSOURCE_DIR=<SOURCE_DIR> -P ${CMAKE_CURRENT_SOURCE_DIR}/patch_bzip3.cmake
+)
+
+# Configure bzip3 options
+set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries" FORCE)
+set(BZIP3_ENABLE_ARCH_NATIVE ON CACHE BOOL "Enable CPU-specific optimizations" FORCE)
+set(BZIP3_BUILD_APPS OFF CACHE BOOL "Build bzip3 command-line applications" FORCE)
+
+# Make bzip3 available - this will run their CMakeLists.txt (now patched)
+FetchContent_MakeAvailable(bzip3)
+
+# Check if bzip3 target was created
+if(TARGET bz3)
+  set(BZ3_AVAILABLE TRUE)
+  set(BZ3_LIBRARY bz3)
+  message(STATUS "Bzip3: Built from source (FetchContent)")
+else()
+  set(BZ3_AVAILABLE FALSE)
+  message(STATUS "Bzip3: Failed to build from source")
+endif()
+
+# Download and build Brotli from git using FetchContent
+message(STATUS "Fetching Brotli from GitHub...")
+
+FetchContent_Declare(
+        brotli
+        GIT_REPOSITORY https://github.com/google/brotli.git
+        GIT_TAG v1.1.0
+        GIT_SHALLOW TRUE
+)
+
+# Configure Brotli options - build static libraries only
+set(BROTLI_DISABLE_TESTS ON CACHE BOOL "Disable Brotli tests" FORCE)
+set(BROTLI_BUNDLED_MODE ON CACHE BOOL "Enable bundled mode" FORCE)
+
+# Make Brotli available
+FetchContent_MakeAvailable(brotli)
+
+# Check if Brotli targets were created
+if(TARGET brotlienc AND TARGET brotlidec AND TARGET brotlicommon)
+  set(BROTLI_AVAILABLE TRUE)
+  message(STATUS "Brotli: Built from source (FetchContent)")
+else()
+  set(BROTLI_AVAILABLE FALSE)
+  message(STATUS "Brotli: Failed to build from source")
+endif()
+
+
+# Benchmark executable
+add_executable(aarubenchmark
+               benchmark.c
+               benchmark.h
+               image_ops.c
+               compression_benchmark.c
+               compression.c
+               compression.h
+)
+
+# Set C as the linker language
+set_target_properties(aarubenchmark PROPERTIES LINKER_LANGUAGE C)
+
+# Set up include directories for accessing library headers
+target_include_directories(aarubenchmark PRIVATE
+                           ${CMAKE_SOURCE_DIR}/include
+                           ${CMAKE_SOURCE_DIR}/3rdparty/BLAKE3
+                           ${CMAKE_SOURCE_DIR}/3rdparty/lzma-21.03beta/C
+                           ${CMAKE_SOURCE_DIR}/3rdparty/xxHash
+)
+
+# Add ZSTD include if found
+if(ZSTD_FOUND)
+  target_include_directories(aarubenchmark PRIVATE ${ZSTD_INCLUDE_DIRS})
+  target_compile_definitions(aarubenchmark PRIVATE HAVE_ZSTD=1)
+endif()
+
+# Add bzip3 compile definition if available (includes come from bz3 target)
+if(BZ3_AVAILABLE)
+  target_compile_definitions(aarubenchmark PRIVATE HAVE_BZ3=1)
+endif()
+
+# Add Brotli compile definition and includes if available
+if(BROTLI_AVAILABLE)
+  target_compile_definitions(aarubenchmark PRIVATE HAVE_BROTLI=1)
+  # Get include directories from FetchContent
+  FetchContent_GetProperties(brotli SOURCE_DIR BROTLI_SOURCE_DIR)
+  target_include_directories(aarubenchmark PRIVATE ${BROTLI_SOURCE_DIR}/c/include)
+endif()
+
+# Link to the main library and compression libraries
+target_link_libraries(aarubenchmark
+                      PRIVATE
+                      # Link to the aaruformat library
+                      aaruformat
+)
+
+# Link ZSTD if available
+if(ZSTD_FOUND)
+  if(ZSTD_LINK_LIBRARIES)
+    target_link_libraries(aarubenchmark PRIVATE ${ZSTD_LINK_LIBRARIES})
+  elseif(ZSTD_LIBRARIES)
+    target_link_libraries(aarubenchmark PRIVATE ${ZSTD_LIBRARIES})
+  else()
+    # Fallback: try to find the library directly
+    find_library(ZSTD_LIB NAMES zstd libzstd PATHS /usr/local/lib /opt/homebrew/lib)
+    if(ZSTD_LIB)
+      target_link_libraries(aarubenchmark PRIVATE ${ZSTD_LIB})
+    endif()
+  endif()
+endif()
+
+# Link bzip3 if available
+if(BZ3_AVAILABLE)
+  target_link_libraries(aarubenchmark PRIVATE ${BZ3_LIBRARY})
+endif()
+
+# Link Brotli if available
+if(BROTLI_AVAILABLE)
+  target_link_libraries(aarubenchmark PRIVATE brotlienc brotlidec brotlicommon)
+endif()
+
+
+# On Linux, enable GNU/POSIX feature test macros
+if(UNIX AND NOT APPLE)
+  target_compile_definitions(aarubenchmark PRIVATE _GNU_SOURCE=1 _POSIX_C_SOURCE=200809L)
+endif()
+
+# Set output directory
+set_target_properties(aarubenchmark PROPERTIES
+                      RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
+)
+
+message(STATUS "Configured aarubenchmark tool")
+if(ZSTD_FOUND)
+  message(STATUS "  - Zstd support: YES")
+else()
+  message(STATUS "  - Zstd support: NO (will be disabled)")
+endif()
+if(BZ3_AVAILABLE)
+  message(STATUS "  - Bzip3 support: YES (downloaded from git)")
+else()
+  message(STATUS "  - Bzip3 support: NO (will be disabled)")
+endif()
+if(BROTLI_AVAILABLE)
+  message(STATUS "  - Brotli support: YES (downloaded from git)")
+else()
+  message(STATUS "  - Brotli support: NO (will be disabled)")
+endif()
+
--- a/benchmark/README.md
+++ b/benchmark/README.md
@@ -0,0 +1,134 @@
+# Aaru Format Compression Benchmark Tool
+
+This tool benchmarks different compression algorithms on Aaru format images without modifying the library itself.
+
+## Purpose
+
+The benchmark tool helps determine the most effective compression algorithm for specific image types by:
+- Testing multiple compression algorithms (LZMA, Bzip3, Zstd)
+- Measuring compression ratios and processing times
+- Providing detailed performance metrics
+
+## Features
+
+- **Non-invasive**: Does not modify library code - operates on internal structures directly
+- **Comprehensive**: Tests all major compression algorithms
+- **Progressive**: Shows real-time progress bars for each operation
+- **Isolated**: Compression algorithms are confined to the benchmark tool only
+- **Detailed results**: Provides comparison tables with sizes, ratios, and timing
+
+## Usage
+
+```bash
+aarubenchmark <input.aaruformat>
+```
+
+### Example
+
+```bash
+aarubenchmark myimage.aaruformat
+```
+
+This will:
+1. Open and analyze the input image
+2. For each compression algorithm:
+   - Decompress all data blocks
+   - Recompress with the test algorithm
+   - Write a new image file (e.g., `myimage.aaruformat.LZMA.aaruformat`)
+   - Display progress and timing
+3. Show a summary table comparing all algorithms
+
+## Output
+
+The tool creates test images for each algorithm:
+- `input.aaruformat.LZMA.aaruformat`
+- `input.aaruformat.Bzip3.aaruformat` (if bzip3 is available)
+- `input.aaruformat.Zstd.aaruformat` (if zstd is available)
+
+And displays a comparison table:
+```
+Algorithm  Uncompressed    Compressed      Ratio        Time (s)
+----------  ---------------  ---------------  ------------  ----------
+LZMA       1.50 GB         450.23 MB        30.01%       45.23
+Bzip3      1.50 GB         425.67 MB        28.38%       52.17
+Zstd       1.50 GB         475.89 MB        31.73%       12.45
+
+Best compression: Bzip3
+Fastest: Zstd
+```
+
+## Requirements
+
+### Mandatory
+- libaaruformat (automatically linked)
+- LZMA support (built into library)
+
+### Optional (Automatic)
+- **bzip3**: Automatically downloaded and built from GitHub
+  - No manual installation needed
+  - CMake fetches and builds it automatically
+
+### Optional (Manual)
+- **zstd**: For Zstd compression testing
+  - Install: `brew install zstd` (macOS) or `apt install libzstd-dev` (Linux)
+
+Algorithms without available libraries will be skipped automatically.
+
+## Building
+
+The benchmark tool is built automatically when building the main project:
+
+```bash
+cd libaaruformat
+mkdir build && cd build
+cmake ..
+make
+```
+
+The compiled binary will be in `build/bin/aarubenchmark`.
+
+## Implementation Details
+
+### How It Works
+
+1. **Opens image manually**: Reads header and index using structs directly
+2. **Iterates blocks**: Uses index entries to locate all data blocks
+3. **Decompresses**: Uses library's LZMA decoder to decompress existing data
+4. **Recompresses**: Applies test algorithm with optimal settings
+5. **Writes output**: Creates proper header and index for the new image
+6. **Measures**: Records timing and file sizes for comparison
+
+### Isolation
+
+The benchmark tool is completely isolated:
+- No library code is modified
+- Compression algorithms are in `benchmark/` directory only
+- Only the main CMakeLists.txt is updated to include the benchmark subdirectory
+- The library continues to work exactly as before
+
+### Memory Safety
+
+- All allocations are checked
+- Proper cleanup on errors
+- No memory leaks (validated with valgrind)
+
+## Limitations
+
+- Only tests data block compression (not metadata or other blocks)
+- Requires sufficient disk space for test output files
+- Only works with Aaru format version 2 images
+
+## Contributing
+
+To add a new compression algorithm:
+
+1. Add the algorithm enum to `benchmark.h`
+2. Implement compression function in `compression.c`
+3. Update `get_compression_type()` to return a unique identifier
+4. Add library dependency to `CMakeLists.txt`
+5. Update this README
+
+## License
+
+Same as libaaruformat - LGPL 2.1 or later.
+
--- a/benchmark/benchmark.c
+++ b/benchmark/benchmark.c
@@ -0,0 +1,506 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2026 Natalia Portillo.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "benchmark.h"
+#include <inttypes.h>
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <aaruformat/structs/data.h>
+#include <aaruformat/structs/ddt.h>
+#include <aaruformat/structs/flux.h>
+#include <aaruformat/structs/index.h>
+#include "compression.h"
+
+#define PROGRESS_BAR_WIDTH 40
+
+// ANSI color codes
+#define ANSI_RESET       "\033[0m"
+#define ANSI_BOLD        "\033[1m"
+#define ANSI_DIM         "\033[2m"
+#define ANSI_RED         "\033[31m"
+#define ANSI_GREEN       "\033[32m"
+#define ANSI_YELLOW      "\033[33m"
+#define ANSI_BLUE        "\033[34m"
+#define ANSI_MAGENTA     "\033[35m"
+#define ANSI_CYAN        "\033[36m"
+#define ANSI_WHITE       "\033[37m"
+#define ANSI_BG_BLUE     "\033[44m"
+#define ANSI_BG_GREEN    "\033[42m"
+#define ANSI_CLEAR_LINE  "\033[2K"
+
+// Check if terminal supports colors
+static int use_colors = 0;
+
+static void init_colors(void)
+{
+    // Check if stdout is a terminal and TERM is set
+    use_colors = isatty(STDOUT_FILENO) && getenv("TERM") != NULL;
+}
+
+// Color helper function
+static const char* clr(const char* code)
+{
+    return use_colors ? code : "";
+}
+
+// External library functions
+extern uint64_t aaruf_crc64_data(const uint8_t *data, size_t length);
+extern int32_t  aaruf_lzma_decode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer,
+                                         size_t *src_len, const uint8_t *props, const size_t props_size);
+
+#define LZMA_PROPERTIES_LENGTH 5
+
+// Print progress bar with colors
+static void print_progress(const progress_state *state)
+{
+    if(state->total == 0) return;
+
+    const double percentage = (double)state->current / (double)state->total * 100.0;
+    const int    filled     = (int)(percentage / 100.0 * PROGRESS_BAR_WIDTH);
+
+    printf("\r%s%s%s ", clr(ANSI_CLEAR_LINE), clr(ANSI_CYAN), state->label);
+    printf("%s[", clr(ANSI_RESET));
+
+    // Draw progress bar with gradient effect
+    for(int i = 0; i < PROGRESS_BAR_WIDTH; i++)
+    {
+        if(i < filled)
+            printf("%s█", clr(ANSI_GREEN));
+        else if(i == filled && state->current < state->total)
+            printf("%s▓", clr(ANSI_YELLOW));
+        else
+            printf("%s░", clr(ANSI_DIM));
+    }
+
+    printf("%s] %s%6.1f%%%s", clr(ANSI_RESET), clr(ANSI_BOLD), percentage, clr(ANSI_RESET));
+    fflush(stdout);
+
+    if(state->current >= state->total) printf("\n");
+}
+
+// Get current time in nanoseconds
+static uint64_t get_time_ns(void)
+{
+    struct timespec ts;
+    clock_gettime(CLOCK_MONOTONIC, &ts);
+    return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
+}
+
+// Convert nanoseconds to seconds
+static double ns_to_seconds(const uint64_t ns) { return (double)ns / 1000000000.0; }
+
+// Format bytes as human-readable string with optional color
+static void format_bytes(const uint64_t bytes, char *buffer, const size_t buffer_size)
+{
+    if(bytes < 1024)
+        snprintf(buffer, buffer_size, "%" PRIu64 " B", bytes);
+    else if(bytes < 1024 * 1024)
+        snprintf(buffer, buffer_size, "%.2f KiB", (double)bytes / 1024.0);
+    else if(bytes < 1024 * 1024 * 1024)
+        snprintf(buffer, buffer_size, "%.2f MiB", (double)bytes / 1024.0 / 1024.0);
+    else
+        snprintf(buffer, buffer_size, "%.2f GiB", (double)bytes / 1024.0 / 1024.0 / 1024.0);
+}
+
+// Print a section header
+static void print_section_header(const char *title)
+{
+    printf("\n%s%s══════════════════════════════════════════════════════════════%s\n",
+           clr(ANSI_BOLD), clr(ANSI_CYAN), clr(ANSI_RESET));
+    printf("%s%s  %s%s\n", clr(ANSI_BOLD), clr(ANSI_WHITE), title, clr(ANSI_RESET));
+    printf("%s%s══════════════════════════════════════════════════════════════%s\n\n",
+           clr(ANSI_BOLD), clr(ANSI_CYAN), clr(ANSI_RESET));
+}
+
+// Print a subsection header
+static void print_subsection_header(const char *title)
+{
+    printf("%s%s▶ %s%s\n", clr(ANSI_BOLD), clr(ANSI_YELLOW), title, clr(ANSI_RESET));
+    printf("%s──────────────────────────────────────────────────────────────%s\n",
+           clr(ANSI_DIM), clr(ANSI_RESET));
+}
+
+// Print a key-value pair
+static void print_info(const char *key, const char *value)
+{
+    printf("  %s%-24s%s %s%s%s\n", clr(ANSI_DIM), key, clr(ANSI_RESET), clr(ANSI_WHITE), value, clr(ANSI_RESET));
+}
+
+// Print a key-value pair with numeric value
+static void print_info_num(const char *key, uint64_t value)
+{
+    printf("  %s%-24s%s %s%'" PRIu64 "%s\n", clr(ANSI_DIM), key, clr(ANSI_RESET), clr(ANSI_WHITE), value, clr(ANSI_RESET));
+}
+
+// Get color for compression ratio
+static const char* get_ratio_color(double ratio)
+{
+    if(ratio < 50.0) return ANSI_GREEN;
+    if(ratio < 70.0) return ANSI_YELLOW;
+    return ANSI_RED;
+}
+
+int main(int argc, char *argv[])
+{
+    // Initialize color support
+    init_colors();
+
+    // Set locale for thousands separator in printf
+    setlocale(LC_NUMERIC, "");
+
+    // Print banner
+    printf("\n%s%s", clr(ANSI_BOLD), clr(ANSI_CYAN));
+    printf("    ╔═══════════════════════════════════════════════════════╗\n");
+    printf("    ║       %sAaru Format Compression Benchmark Tool%s          ║\n", clr(ANSI_WHITE), clr(ANSI_CYAN));
+    printf("    ╚═══════════════════════════════════════════════════════╝%s\n\n", clr(ANSI_RESET));
+
+    if(argc < 2)
+    {
+        printf("%sUsage:%s %s <input.aaruformat>\n\n", clr(ANSI_BOLD), clr(ANSI_RESET), argv[0]);
+        printf("%sDescription:%s\n", clr(ANSI_BOLD), clr(ANSI_RESET));
+        printf("  Benchmark compression algorithms on Aaru format images.\n\n");
+        printf("%sAlgorithms tested:%s\n", clr(ANSI_BOLD), clr(ANSI_RESET));
+        printf("  %s•%s LZMA      %s(high compression, slow)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
+        printf("  %s•%s Bzip3     %s(high compression, medium speed)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
+        printf("  %s•%s Brotli    %s(good compression, medium speed)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
+        printf("  %s•%s Zstd      %s(good compression, fast)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
+        printf("  %s•%s Zstd+Dict %s(better compression with trained dictionary)%s\n\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
+        return 1;
+    }
+
+    const char *input_path = argv[1];
+
+    // Open and analyze input image
+    print_section_header("Image Analysis");
+
+    printf("  %sOpening:%s %s%s%s\n", clr(ANSI_DIM), clr(ANSI_RESET), clr(ANSI_WHITE), input_path, clr(ANSI_RESET));
+
+    image_info info;
+    if(open_image(input_path, &info) != 0)
+    {
+        printf("\n  %s✗ Failed to open image%s\n", clr(ANSI_RED), clr(ANSI_RESET));
+        return 1;
+    }
+
+    printf("  %s✓ Image opened successfully%s\n\n", clr(ANSI_GREEN), clr(ANSI_RESET));
+
+    char size_str[64];
+    format_bytes(info.total_uncompressed_size, size_str, sizeof(size_str));
+
+    char version_str[32];
+    snprintf(version_str, sizeof(version_str), "%u.%u", info.major_version, info.minor_version);
+    print_info("Format Version:", version_str);
+    print_info_num("Block Count:", info.block_count);
+    print_info("Total Size:", size_str);
+
+    // ===== ZSTD DICTIONARY TRAINING PHASE (runs once for all algorithms) =====
+    zstd_dict_context *dict_ctx = NULL;
+
+    print_section_header("Zstd Dictionary Training");
+
+    uint64_t sample_target_size = info.total_uncompressed_size / 10;                    // 10%
+    if(sample_target_size > 100 * 1024 * 1024) sample_target_size = 100 * 1024 * 1024;  // 100MB max
+
+    uint8_t *sample_buffer = malloc(sample_target_size);
+    if(sample_buffer == NULL)
+    {
+        printf("  %s⚠ Cannot allocate sample buffer for dictionary training%s\n", clr(ANSI_YELLOW), clr(ANSI_RESET));
+    }
+    else
+    {
+        uint64_t sample_collected = 0;
+        char target_str[64];
+        format_bytes(sample_target_size, target_str, sizeof(target_str));
+        print_info("Target Sample Size:", target_str);
+
+        // Collect samples from first blocks
+        for(uint64_t i = 0; i < info.block_count && sample_collected < sample_target_size; i++)
+        {
+            IndexEntry *entry       = (IndexEntry *)info.index_entries + i;
+            long        block_start = entry->offset;
+
+            if(fseek(info.file, block_start, SEEK_SET) != 0) continue;
+
+            uint32_t identifier;
+            if(fread(&identifier, 1, sizeof(uint32_t), info.file) != sizeof(uint32_t)) continue;
+
+            // Collect from benchmarkable blocks
+            if(identifier == 0x4B4C4244 ||  // DataBlock
+               identifier == 0x2A544444 ||  // DeDuplicationTable (v1)
+               identifier == 0x32544444 ||  // DeDuplicationTable2 (v2)
+               identifier == 0x4C505344)    // DataStreamPayloadBlock
+            {
+                // Read the full header based on type to get cmpLength and length
+                uint16_t compression;
+                uint64_t cmpLength, length;
+
+                fseek(info.file, block_start, SEEK_SET);
+
+                if(identifier == 0x4B4C4244)  // DataBlock
+                {
+                    BlockHeader block_header;
+                    if(fread(&block_header, 1, sizeof(BlockHeader), info.file) != sizeof(BlockHeader)) continue;
+                    compression = block_header.compression;
+                    cmpLength   = block_header.cmpLength;
+                    length      = block_header.length;
+                }
+                else if(identifier == 0x2A544444)  // DDT v1
+                {
+                    DdtHeader ddt_header;
+                    if(fread(&ddt_header, 1, sizeof(DdtHeader), info.file) != sizeof(DdtHeader)) continue;
+                    compression = ddt_header.compression;
+                    cmpLength   = ddt_header.cmpLength;
+                    length      = ddt_header.length;
+                }
+                else if(identifier == 0x32544444)  // DDT v2
+                {
+                    DdtHeader2 ddt_header2;
+                    if(fread(&ddt_header2, 1, sizeof(DdtHeader2), info.file) != sizeof(DdtHeader2)) continue;
+                    compression = ddt_header2.compression;
+                    cmpLength   = ddt_header2.cmpLength;
+                    length      = ddt_header2.length;
+                }
+                else  // DataStreamPayloadBlock (0x4C505344)
+                {
+                    DataStreamPayloadHeader payload_header;
+                    if(fread(&payload_header, 1, sizeof(DataStreamPayloadHeader), info.file) !=
+                       sizeof(DataStreamPayloadHeader))
+                        continue;
+                    compression = payload_header.compression;
+                    cmpLength   = payload_header.cmpLength;
+                    length      = payload_header.length;
+                }
+
+                // Skip empty blocks
+                if(length == 0) continue;
+
+                // Allocate buffer for uncompressed data
+                uint8_t *uncompressed = malloc(length);
+                if(uncompressed == NULL) continue;
+
+                // Decompress data if needed
+                if(compression == 1)  // LZMA
+                {
+                    // Read LZMA properties
+                    uint8_t lzma_props[LZMA_PROPERTIES_LENGTH];
+                    if(fread(lzma_props, 1, LZMA_PROPERTIES_LENGTH, info.file) != LZMA_PROPERTIES_LENGTH)
+                    {
+                        free(uncompressed);
+                        continue;
+                    }
+
+                    // Read compressed data
+                    const size_t compressed_size = cmpLength - LZMA_PROPERTIES_LENGTH;
+                    uint8_t     *compressed      = malloc(compressed_size);
+                    if(compressed == NULL)
+                    {
+                        free(uncompressed);
+                        continue;
+                    }
+
+                    if(fread(compressed, 1, compressed_size, info.file) != compressed_size)
+                    {
+                        free(compressed);
+                        free(uncompressed);
+                        continue;
+                    }
+
+                    // Decompress
+                    size_t decompressed_size = length;
+                    size_t cmp_size          = compressed_size;
+                    if(aaruf_lzma_decode_buffer(uncompressed, &decompressed_size, compressed, &cmp_size, lzma_props,
+                                                LZMA_PROPERTIES_LENGTH) != 0)
+                    {
+                        free(compressed);
+                        free(uncompressed);
+                        continue;
+                    }
+
+                    free(compressed);
+                }
+                else if(compression == 0)  // Uncompressed
+                {
+                    if(fread(uncompressed, 1, length, info.file) != length)
+                    {
+                        free(uncompressed);
+                        continue;
+                    }
+                }
+                else  // Skip other compression types
+                {
+                    free(uncompressed);
+                    continue;
+                }
+
+                // Add to sample buffer
+                uint64_t to_copy =
+                    (sample_collected + length > sample_target_size) ? (sample_target_size - sample_collected) : length;
+                if(to_copy > 0)
+                {
+                    memcpy(sample_buffer + sample_collected, uncompressed, to_copy);
+                    sample_collected += to_copy;
+                }
+
+                free(uncompressed);
+            }
+        }
+
+        char collected_str[64], dict_size_str[64];
+        format_bytes(sample_collected, collected_str, sizeof(collected_str));
+        print_info("Collected:", collected_str);
+
+        if(sample_collected > 0)
+        {
+            printf("  %sTraining...%s", clr(ANSI_DIM), clr(ANSI_RESET));
+            fflush(stdout);
+
+            dict_ctx = train_zstd_dictionary(sample_buffer, sample_collected, 512 * 1024);  // 512KB dict
+
+            printf("\r%s", clr(ANSI_CLEAR_LINE));  // Clear the "Training..." line
+
+            if(dict_ctx != NULL)
+            {
+                format_bytes(dict_ctx->dict_size, dict_size_str, sizeof(dict_size_str));
+                print_info("Dictionary Size:", dict_size_str);
+                printf("  %s✓ Dictionary trained successfully%s\n", clr(ANSI_GREEN), clr(ANSI_RESET));
+            }
+            else
+            {
+                printf("  %s⚠ Dictionary training failed%s\n", clr(ANSI_YELLOW), clr(ANSI_RESET));
+            }
+        }
+        else
+        {
+            printf("  %s⚠ No samples collected for dictionary training%s\n", clr(ANSI_YELLOW), clr(ANSI_RESET));
+        }
+
+        free(sample_buffer);
+    }
+    // ===== END DICTIONARY TRAINING =====
+
+    // Test each compression algorithm
+    const compression_algorithm algorithms[]      = {COMP_LZMA, COMP_BZIP3, COMP_BROTLI, COMP_ZSTD, COMP_ZSTD};
+    const char                 *algorithm_names[] = {"LZMA", "Bzip3", "Brotli", "Zstd (no dict)", "Zstd (with dict)"};
+    const size_t                algorithm_count   = sizeof(algorithms) / sizeof(algorithms[0]);
+
+    benchmark_result results[5];
+
+    print_section_header("Compression Benchmarks");
+
+    for(size_t i = 0; i < algorithm_count; i++)
+    {
+        print_subsection_header(algorithm_names[i]);
+
+        char output_path[512];
+        snprintf(output_path, sizeof(output_path), "%s.%s.aaruformat", input_path, algorithm_names[i]);
+
+        const uint64_t start_time = get_time_ns();
+
+        progress_state progress = {0, info.block_count, {0}};
+        snprintf(progress.label, sizeof(progress.label), "Compressing");
+
+        // Pass dictionary only for fifth run (Zstd with dict)
+        const zstd_dict_context *use_dict = (i == 4) ? dict_ctx : NULL;
+
+        if(benchmark_compression(input_path, output_path, algorithms[i], &info, &results[i], &progress, print_progress,
+                                 use_dict) != 0)
+        {
+            printf("  %s✗ Failed to benchmark%s\n", clr(ANSI_RED), clr(ANSI_RESET));
+            if(dict_ctx) free_zstd_dictionary(dict_ctx);
+            close_image(&info);
+            return 1;
+        }
+
+        results[i].elapsed_ns = get_time_ns() - start_time;
+
+        // Print results for this algorithm
+        char compressed_str[64];
+        format_bytes(results[i].compressed_size, compressed_str, sizeof(compressed_str));
+        double ratio = (double)results[i].compressed_size / (double)info.total_uncompressed_size * 100.0;
+
+        printf("  %sTime:%s         %s%.2f%s seconds\n", clr(ANSI_DIM), clr(ANSI_RESET),
+               clr(ANSI_WHITE), ns_to_seconds(results[i].elapsed_ns), clr(ANSI_RESET));
+        printf("  %sCompressed:%s   %s%s%s\n", clr(ANSI_DIM), clr(ANSI_RESET),
+               clr(ANSI_WHITE), compressed_str, clr(ANSI_RESET));
+        printf("  %sRatio:%s        %s%s%.2f%%%s\n\n", clr(ANSI_DIM), clr(ANSI_RESET),
+               clr(ANSI_BOLD), clr(get_ratio_color(ratio)), ratio, clr(ANSI_RESET));
+    }
+
+    // Cleanup dictionary
+    if(dict_ctx != NULL) free_zstd_dictionary(dict_ctx);
+
+    // Print summary table
+    print_section_header("Results Summary");
+
+    // Table header
+    printf("  %s%-16s %20s %20s %12s %10s%s\n", clr(ANSI_BOLD),
+           "Algorithm", "Uncompressed (B)", "Compressed (B)", "Ratio", "Time (s)", clr(ANSI_RESET));
+    printf("  %s────────────────────────────────────────────────────────────────────────────────%s\n",
+           clr(ANSI_DIM), clr(ANSI_RESET));
+
+    for(size_t i = 0; i < algorithm_count; i++)
+    {
+        const double ratio  = (double)results[i].compressed_size / (double)info.total_uncompressed_size * 100.0;
+        const double time_s = ns_to_seconds(results[i].elapsed_ns);
+
+        printf("  %s%-16s%s %'20" PRIu64 " %'20" PRIu64 " %s%s%11.2f%%%s %10.2f\n",
+               clr(ANSI_WHITE), algorithm_names[i], clr(ANSI_RESET),
+               info.total_uncompressed_size, results[i].compressed_size,
+               clr(ANSI_BOLD), clr(get_ratio_color(ratio)), ratio, clr(ANSI_RESET), time_s);
+    }
+
+    // Find best compression
+    size_t   best_compression_idx = 0;
+    uint64_t best_compressed_size = results[0].compressed_size;
+    for(size_t i = 1; i < algorithm_count; i++)
+    {
+        if(results[i].compressed_size < best_compressed_size)
+        {
+            best_compressed_size = results[i].compressed_size;
+            best_compression_idx = i;
+        }
+    }
+
+    // Find fastest
+    size_t   fastest_idx  = 0;
+    uint64_t fastest_time = results[0].elapsed_ns;
+    for(size_t i = 1; i < algorithm_count; i++)
+    {
+        if(results[i].elapsed_ns < fastest_time)
+        {
+            fastest_time = results[i].elapsed_ns;
+            fastest_idx  = i;
+        }
+    }
+
+    printf("\n  %s────────────────────────────────────────────────────────────────────────────────%s\n",
+           clr(ANSI_DIM), clr(ANSI_RESET));
+    printf("  %s🏆 Best Compression:%s %s%s%s\n", clr(ANSI_BOLD), clr(ANSI_RESET),
+           clr(ANSI_GREEN), algorithm_names[best_compression_idx], clr(ANSI_RESET));
+    printf("  %s⚡ Fastest:%s         %s%s%s\n\n", clr(ANSI_BOLD), clr(ANSI_RESET),
+           clr(ANSI_CYAN), algorithm_names[fastest_idx], clr(ANSI_RESET));
+
+    close_image(&info);
+    return 0;
+}
--- a/benchmark/benchmark.h
+++ b/benchmark/benchmark.h
@@ -0,0 +1,80 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2026 Natalia Portillo.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef BENCHMARK_H
+#define BENCHMARK_H
+
+#include <stdint.h>
+#include <stdio.h>
+
+// Zstd dictionary context
+typedef struct
+{
+    uint8_t *dict_data;
+    size_t   dict_size;
+    uint32_t dict_id;
+} zstd_dict_context;
+
+// Compression algorithm identifiers
+typedef enum
+{
+    COMP_LZMA   = 0,
+    COMP_BZIP3  = 1,
+    COMP_ZSTD   = 2,
+    COMP_BROTLI = 3
+} compression_algorithm;
+
+// Image information structure
+typedef struct
+{
+    FILE    *file;
+    uint8_t  major_version;
+    uint8_t  minor_version;
+    uint64_t index_offset;
+    uint64_t block_count;
+    uint64_t total_uncompressed_size;
+    void    *index_entries;  // Array of IndexEntry
+} image_info;
+
+// Benchmark result structure
+typedef struct
+{
+    uint64_t compressed_size;
+    uint64_t elapsed_ns;
+} benchmark_result;
+
+// Progress tracking structure
+typedef struct
+{
+    uint64_t current;
+    uint64_t total;
+    char     label[256];
+} progress_state;
+
+// Progress callback type
+typedef void (*progress_callback)(const progress_state *state);
+
+// Function declarations
+int  open_image(const char *path, image_info *info);
+void close_image(image_info *info);
+int  benchmark_compression(const char *input_path, const char *output_path, compression_algorithm algorithm,
+                           image_info *info, benchmark_result *result, progress_state *progress,
+                           progress_callback progress_cb, const zstd_dict_context *dict_ctx);
+
+#endif  // BENCHMARK_H
--- a/benchmark/compression.c
+++ b/benchmark/compression.c
@@ -0,0 +1,380 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2026 Natalia Portillo.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include "compression.h"
+#include <aaruformat/consts.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_ZSTD
+#include <zdict.h>
+#include <zstd.h>
+#endif
+
+#ifdef HAVE_BZ3
+#include <libbz3.h>
+#endif
+
+#ifdef HAVE_BROTLI
+#include <brotli/encode.h>
+#include <brotli/decode.h>
+#endif
+
+
+// LZMA compression from library
+extern int32_t aaruf_lzma_encode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer,
+                                        size_t src_len, uint8_t *out_props, size_t *out_props_size, int32_t level,
+                                        uint32_t dict_size, int32_t lc, int32_t lp, int32_t pb, int32_t fb,
+                                        int32_t num_threads);
+
+// Compress data using LZMA
+static int compress_lzma(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
+{
+    // Allocate output buffer (input size * 2 + margin)
+    const size_t max_output_size = input_size * 2 + 65536;
+    uint8_t     *buffer          = malloc(max_output_size);
+    if(buffer == NULL) return -1;
+
+    uint8_t props[LZMA_PROPERTIES_LENGTH];
+    size_t  props_size = LZMA_PROPERTIES_LENGTH;
+    size_t  cmp_size   = max_output_size;
+
+    // Compress (level 9, 1MB dictionary, standard parameters)
+    if(aaruf_lzma_encode_buffer(buffer, &cmp_size, input, input_size, props, &props_size, 9, 33554432, 4, 0, 2, 273,
+                                8) != 0)
+    {
+        free(buffer);
+        return -1;
+    }
+
+    // Allocate final buffer with properties prepended
+    *output_size = cmp_size + LZMA_PROPERTIES_LENGTH;
+    *output      = malloc(*output_size);
+    if(*output == NULL)
+    {
+        free(buffer);
+        return -1;
+    }
+
+    memcpy(*output, props, LZMA_PROPERTIES_LENGTH);
+    memcpy(*output + LZMA_PROPERTIES_LENGTH, buffer, cmp_size);
+
+    free(buffer);
+    return 0;
+}
+
+// Compress data using Bzip3
+static int compress_bzip3(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
+{
+#ifdef HAVE_BZ3
+    // Determine block size (16MiB for best compression)
+    const int32_t block_size = 16 * 1024 * 1024;
+
+    // Create bzip3 state
+    struct bz3_state *state = bz3_new(block_size);
+    if(state == NULL) return -1;
+
+    // Calculate max output size - bz3_encode_block compresses in-place
+    // so we need a buffer that can hold the original data initially
+    const size_t max_output_size = bz3_bound(input_size);
+    *output                      = malloc(max_output_size);
+    if(*output == NULL)
+    {
+        bz3_free(state);
+        return -1;
+    }
+
+    // Copy input to output buffer (bz3_encode_block works in-place)
+    memcpy(*output, input, input_size);
+
+    // Compress in-place (returns compressed size or negative on error)
+    const int32_t result = bz3_encode_block(state, *output, input_size);
+    bz3_free(state);
+
+    if(result < 0)
+    {
+        free(*output);
+        *output = NULL;
+        return -1;
+    }
+
+    *output_size = result;
+    return 0;
+#else
+    // Bzip3 not available
+    (void)input;
+    (void)input_size;
+    *output      = NULL;
+    *output_size = 0;
+    return -1;
+#endif
+}
+
+// Compress data using Zstd
+static int compress_zstd(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
+{
+#ifdef HAVE_ZSTD
+    // Calculate max output size
+    const size_t max_output_size = ZSTD_compressBound(input_size);
+    *output                      = malloc(max_output_size);
+    if(*output == NULL) return -1;
+
+    // Compress with level 19 (max compression)
+    const size_t result = ZSTD_compress(*output, max_output_size, input, input_size, 19);
+
+    if(ZSTD_isError(result))
+    {
+        free(*output);
+        *output = NULL;
+        return -1;
+    }
+
+    *output_size = result;
+    return 0;
+#else
+    // Zstd not available
+    (void)input;
+    (void)input_size;
+    *output      = NULL;
+    *output_size = 0;
+    return -1;
+#endif
+}
+
+// Compress data using Brotli
+// Using quality 9 and window size 22 (4MB) for good compression with reasonable speed
+// Quality 11 is extremely slow; quality 9 provides similar ratio but much faster
+static int compress_brotli(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
+{
+#ifdef HAVE_BROTLI
+    // Calculate max output size
+    const size_t max_output_size = BrotliEncoderMaxCompressedSize(input_size);
+    if(max_output_size == 0)
+    {
+        // Input too large, use a conservative estimate
+        *output = malloc(input_size + (input_size >> 2) + 10240);
+    }
+    else
+    {
+        *output = malloc(max_output_size);
+    }
+    if(*output == NULL) return -1;
+
+    size_t encoded_size = max_output_size ? max_output_size : (input_size + (input_size >> 2) + 10240);
+
+    // Compress with quality 8 (high compression but not max)
+    // Window size 22 = 4MB window (2^22 bytes)
+    // Quality 9 is ~5-10x faster than quality 11 with minimal compression loss
+    const BROTLI_BOOL result = BrotliEncoderCompress(
+        8,                      // quality 8 (high but not max - much faster)
+        22,                     // lgwin = 22 (4MB window)
+        BROTLI_DEFAULT_MODE,    // generic mode
+        input_size,
+        input,
+        &encoded_size,
+        *output
+    );
+
+    if(result != BROTLI_TRUE)
+    {
+        free(*output);
+        *output = NULL;
+        return -1;
+    }
+
+    *output_size = encoded_size;
+    return 0;
+#else
+    // Brotli not available
+    (void)input;
+    (void)input_size;
+    *output      = NULL;
+    *output_size = 0;
+    return -1;
+#endif
+}
+
+
+// Main compression function
+int compress_data(const compression_algorithm algorithm, const uint8_t *input, const size_t input_size,
+                  uint8_t **output, size_t *output_size)
+{
+    switch(algorithm)
+    {
+        case COMP_LZMA:
+            return compress_lzma(input, input_size, output, output_size);
+        case COMP_BZIP3:
+            return compress_bzip3(input, input_size, output, output_size);
+        case COMP_ZSTD:
+            return compress_zstd(input, input_size, output, output_size);
+        case COMP_BROTLI:
+            return compress_brotli(input, input_size, output, output_size);
+        default:
+            return -1;
+    }
+}
+
+// Get compression type for header
+int get_compression_type(const compression_algorithm algorithm)
+{
+    switch(algorithm)
+    {
+        case COMP_LZMA:
+            return 1;  // LZMA
+        case COMP_BZIP3:
+            return 100;  // Custom identifier for bzip3
+        case COMP_ZSTD:
+            return 101;  // Custom identifier for zstd
+        case COMP_BROTLI:
+            return 102;  // Custom identifier for brotli
+        default:
+            return 0;  // None
+    }
+}
+
+// Train a Zstd dictionary from samples
+zstd_dict_context *train_zstd_dictionary(const uint8_t *sample_data, size_t sample_size, size_t dict_size)
+{
+#ifdef HAVE_ZSTD
+    if(sample_data == NULL || sample_size == 0) return NULL;
+
+    zstd_dict_context *ctx = malloc(sizeof(zstd_dict_context));
+    if(ctx == NULL) return NULL;
+
+    // Allocate dictionary buffer
+    ctx->dict_data = malloc(dict_size);
+    if(ctx->dict_data == NULL)
+    {
+        free(ctx);
+        return NULL;
+    }
+
+    // Train dictionary using Zstd's ZDICT_trainFromBuffer
+    // This analyzes the sample data and creates an optimized dictionary
+    // We need to split the sample into multiple samples for proper training
+
+    // ZDICT has internal constraints on maximum sample size
+    // Split large samples into chunks to work around this
+    // Use reasonable chunk size (e.g., 2MB per sample)
+    const size_t max_sample_size = 2 * 1024 * 1024;  // 2MB chunks
+    const size_t num_samples     = (sample_size + max_sample_size - 1) / max_sample_size;
+
+    // Allocate array for sample sizes
+    size_t *sample_sizes = malloc(num_samples * sizeof(size_t));
+    if(sample_sizes == NULL)
+    {
+        free(ctx->dict_data);
+        free(ctx);
+        return NULL;
+    }
+
+    // Calculate size for each sample
+    size_t remaining = sample_size;
+    for(size_t i = 0; i < num_samples; i++)
+    {
+        sample_sizes[i] = (remaining > max_sample_size) ? max_sample_size : remaining;
+        remaining -= sample_sizes[i];
+    }
+
+    // Use standard ZDICT_trainFromBuffer
+    size_t trained_size = ZDICT_trainFromBuffer(ctx->dict_data, dict_size, sample_data, sample_sizes, num_samples);
+
+    free(sample_sizes);
+
+    if(ZDICT_isError(trained_size))
+    {
+        free(ctx->dict_data);
+        free(ctx);
+        return NULL;
+    }
+
+    ctx->dict_size = trained_size;
+
+    // Get dictionary ID
+    ctx->dict_id = ZSTD_getDictID_fromDict(ctx->dict_data, ctx->dict_size);
+    if(ctx->dict_id == 0)
+    {
+        ctx->dict_id = 0x12345678;  // Fallback ID
+    }
+
+    return ctx;
+#else
+    (void)sample_data;
+    (void)sample_size;
+    (void)dict_size;
+    return NULL;
+#endif
+}
+
+// Free dictionary context
+void free_zstd_dictionary(zstd_dict_context *dict_ctx)
+{
+    if(dict_ctx == NULL) return;
+    if(dict_ctx->dict_data) free(dict_ctx->dict_data);
+    free(dict_ctx);
+}
+
+// Compress data using Zstd with custom dictionary
+int compress_data_zstd_dict(const uint8_t *input, size_t input_size, uint8_t **output, size_t *output_size,
+                            const zstd_dict_context *dict_ctx)
+{
+#ifdef HAVE_ZSTD
+    if(dict_ctx == NULL || dict_ctx->dict_data == NULL) return -1;
+
+    // Calculate max output size
+    const size_t max_output_size = ZSTD_compressBound(input_size);
+    *output                      = malloc(max_output_size);
+    if(*output == NULL) return -1;
+
+    // Create compression context with dictionary
+    ZSTD_CCtx *cctx = ZSTD_createCCtx();
+    if(cctx == NULL)
+    {
+        free(*output);
+        *output = NULL;
+        return -1;
+    }
+
+    // Use the simpler ZSTD_compress_usingDict which is optimized for dictionary compression
+    // This is more efficient than ZSTD_compress2 with loadDictionary
+    size_t result = ZSTD_compress_usingDict(cctx, *output, max_output_size, input, input_size, dict_ctx->dict_data,
+                                            dict_ctx->dict_size,
+                                            19);  // Compression level 19
+
+    ZSTD_freeCCtx(cctx);
+
+    if(ZSTD_isError(result))
+    {
+        free(*output);
+        *output = NULL;
+        return -1;
+    }
+
+    *output_size = result;
+    return 0;
+#else
+    (void)input;
+    (void)input_size;
+    (void)output;
+    (void)output_size;
+    (void)dict_ctx;
+    return -1;
+#endif
+}
--- a/benchmark/compression.h
+++ b/benchmark/compression.h
@@ -0,0 +1,51 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2026 Natalia Portillo.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#ifndef COMPRESSION_H
+#define COMPRESSION_H
+
+#include <stddef.h>
+#include <stdint.h>
+#include "benchmark.h"
+
+// Compress data using specified algorithm
+// Returns 0 on success, -1 on failure
+// Caller must free output buffer
+int compress_data(compression_algorithm algorithm, const uint8_t *input, size_t input_size, uint8_t **output,
+                  size_t *output_size);
+
+// Compress data using Zstd with custom dictionary
+// Returns 0 on success, -1 on failure
+int compress_data_zstd_dict(const uint8_t *input, size_t input_size, uint8_t **output, size_t *output_size,
+                            const zstd_dict_context *dict_ctx);
+
+// Train a Zstd dictionary from samples
+// sample_data: concatenated uncompressed data from multiple blocks
+// sample_size: total size of sample data
+// dict_size: desired dictionary size (typically 16KB)
+// Returns dictionary context on success, NULL on failure
+zstd_dict_context *train_zstd_dictionary(const uint8_t *sample_data, size_t sample_size, size_t dict_size);
+
+// Free dictionary context
+void free_zstd_dictionary(zstd_dict_context *dict_ctx);
+
+// Get compression type identifier for block header
+int get_compression_type(compression_algorithm algorithm);
+
+#endif  // COMPRESSION_H
--- a/benchmark/compression_benchmark.c
+++ b/benchmark/compression_benchmark.c
@@ -0,0 +1,783 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2026 Natalia Portillo.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <aaruformat/consts.h>
+#include <aaruformat/structs/data.h>
+#include <aaruformat/structs/ddt.h>
+#include <aaruformat/structs/flux.h>
+#include <aaruformat/structs/header.h>
+#include <aaruformat/structs/index.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/stat.h>
+#include "benchmark.h"
+#include "compression.h"
+
+// CRC64 implementation (from library)
+extern uint64_t aaruf_crc64_data(const uint8_t *data, size_t length);
+
+// LZMA decompression (from library)
+extern int32_t aaruf_lzma_decode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer,
+                                        size_t *src_len, const uint8_t *props, const size_t props_size);
+
+// FLAC decompression (from library)
+extern size_t aaruf_flac_decode_redbook_buffer(uint8_t *dst_buffer, size_t dst_size, const uint8_t *src_buffer,
+                                               size_t src_size);
+
+// CST (Claunia Subchannel Transform) functions (from library)
+extern int32_t aaruf_cst_transform(const uint8_t *interleaved, uint8_t *sequential, size_t length);
+extern int32_t aaruf_cst_untransform(const uint8_t *sequential, uint8_t *interleaved, size_t length);
+
+// Benchmark compression algorithm on an image
+int benchmark_compression(const char *input_path, const char *output_path, const compression_algorithm algorithm,
+                          image_info *info, benchmark_result *result, progress_state *progress,
+                          progress_callback progress_cb, const zstd_dict_context *dict_ctx)
+{
+    memset(result, 0, sizeof(benchmark_result));
+
+    // Open output file
+    FILE *output = fopen(output_path, "wb");
+    if(output == NULL)
+    {
+        fprintf(stderr, "Error: Cannot create output file %s\n", output_path);
+        return -1;
+    }
+
+    // Read original header from input
+    if(fseek(info->file, 0, SEEK_SET) != 0)
+    {
+        fprintf(stderr, "Error: Cannot seek to header\n");
+        fclose(output);
+        return -1;
+    }
+
+    AaruHeaderV2 header;
+    if(fread(&header, 1, sizeof(AaruHeaderV2), info->file) != sizeof(AaruHeaderV2))
+    {
+        fprintf(stderr, "Error: Cannot read header\n");
+        fclose(output);
+        return -1;
+    }
+
+    // Write placeholder header (will update later)
+    if(fwrite(&header, 1, sizeof(AaruHeaderV2), output) != sizeof(AaruHeaderV2))
+    {
+        fprintf(stderr, "Error: Cannot write header\n");
+        fclose(output);
+        return -1;
+    }
+
+    // Prepare new index entries
+    IndexEntry *old_entries = (IndexEntry *)info->index_entries;
+    IndexEntry *new_entries = malloc(info->block_count * sizeof(IndexEntry));
+    if(new_entries == NULL)
+    {
+        fprintf(stderr, "Error: Cannot allocate memory for new index entries\n");
+        fclose(output);
+        return -1;
+    }
+
+    memcpy(new_entries, old_entries, info->block_count * sizeof(IndexEntry));
+
+    // Process each block
+    uint64_t current_position = ftell(output);
+    progress->current         = 0;
+
+    for(uint64_t i = 0; i < info->block_count; i++)
+    {
+        IndexEntry *entry = &old_entries[i];
+
+        // Seek to block
+        if(fseek(info->file, entry->offset, SEEK_SET) != 0)
+        {
+            fprintf(stderr, "Error: Cannot seek to block %" PRIu64 "\n", i);
+            free(new_entries);
+            fclose(output);
+            return -1;
+        }
+
+        // Read block identifier to determine type
+        uint32_t identifier;
+        long     block_start = ftell(info->file);
+        if(fread(&identifier, 1, sizeof(uint32_t), info->file) != sizeof(uint32_t))
+        {
+            fprintf(stderr, "Error: Cannot read block identifier %" PRIu64 "\n", i);
+            free(new_entries);
+            fclose(output);
+            return -1;
+        }
+        fseek(info->file, block_start, SEEK_SET);  // Rewind
+
+        // Calculate block size by looking at next entry or EOF
+        size_t block_size;
+        if(i + 1 < info->block_count) { block_size = old_entries[i + 1].offset - entry->offset; }
+        else
+        {
+            // Last block - read to EOF (excluding index)
+            block_size = info->index_offset - entry->offset;
+        }
+
+        // Process blocks with compression: DataBlock, DDT (v1/v2), DataStreamPayload
+        if(identifier == 0x4B4C4244 ||  // DataBlock
+           identifier == 0x2A544444 ||  // DeDuplicationTable (v1)
+           identifier == 0x32544444 ||  // DeDuplicationTable2 (v2)
+           identifier == 0x4C505344)    // DataStreamPayloadBlock
+        {
+            // These blocks all share: identifier(4), type(2), compression(2), then data
+            // Read the common header fields
+            uint16_t type, compression;
+            fseek(info->file, block_start + 4, SEEK_SET);
+            if(fread(&type, 1, sizeof(uint16_t), info->file) != sizeof(uint16_t) ||
+               fread(&compression, 1, sizeof(uint16_t), info->file) != sizeof(uint16_t))
+            {
+                fprintf(stderr, "Error: Cannot read block header fields\n");
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+
+            // Read the full header based on type to get cmpLength and length
+            uint64_t cmpLength, length, cmpCrc64, crc64;
+            size_t   header_size;
+
+            fseek(info->file, block_start, SEEK_SET);
+
+            if(identifier == 0x4B4C4244)  // DataBlock
+            {
+                BlockHeader block_header;
+                if(fread(&block_header, 1, sizeof(BlockHeader), info->file) != sizeof(BlockHeader))
+                {
+                    fprintf(stderr, "Error: Cannot read BlockHeader\n");
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+                header_size = sizeof(BlockHeader);
+                cmpLength   = block_header.cmpLength;
+                length      = block_header.length;
+                cmpCrc64    = block_header.cmpCrc64;
+                crc64       = block_header.crc64;
+            }
+            else if(identifier == 0x2A544444)  // DDT v1
+            {
+                DdtHeader ddt_header;
+                if(fread(&ddt_header, 1, sizeof(DdtHeader), info->file) != sizeof(DdtHeader))
+                {
+                    fprintf(stderr, "Error: Cannot read DdtHeader\n");
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+                header_size = sizeof(DdtHeader);
+                compression = ddt_header.compression;
+                cmpLength   = ddt_header.cmpLength;
+                length      = ddt_header.length;
+                cmpCrc64    = ddt_header.cmpCrc64;
+                crc64       = ddt_header.crc64;
+            }
+            else  // DDT v2 (0x32544444) or DataStreamPayload (0x4C505344)
+            {
+                if(identifier == 0x32544444)  // DDT v2
+                {
+                    DdtHeader2 ddt_header2;
+                    if(fread(&ddt_header2, 1, sizeof(DdtHeader2), info->file) != sizeof(DdtHeader2))
+                    {
+                        fprintf(stderr, "Error: Cannot read DdtHeader2\n");
+                        free(new_entries);
+                        fclose(output);
+                        return -1;
+                    }
+                    header_size = sizeof(DdtHeader2);
+                    compression = ddt_header2.compression;
+                    cmpLength   = ddt_header2.cmpLength;
+                    length      = ddt_header2.length;
+                    cmpCrc64    = ddt_header2.cmpCrc64;
+                    crc64       = ddt_header2.crc64;
+                }
+                else  // DataStreamPayloadBlock (0x4C505344)
+                {
+                    DataStreamPayloadHeader payload_header;
+                    if(fread(&payload_header, 1, sizeof(DataStreamPayloadHeader), info->file) !=
+                       sizeof(DataStreamPayloadHeader))
+                    {
+                        fprintf(stderr, "Error: Cannot read DataStreamPayloadHeader\n");
+                        free(new_entries);
+                        fclose(output);
+                        return -1;
+                    }
+                    header_size = sizeof(DataStreamPayloadHeader);
+                    compression = payload_header.compression;
+                    cmpLength   = payload_header.cmpLength;
+                    length      = payload_header.length;
+                    cmpCrc64    = payload_header.cmpCrc64;
+                    crc64       = payload_header.crc64;
+                }
+            }
+
+            // Allocate buffer for uncompressed data
+            uint8_t *uncompressed = malloc(length);
+            if(uncompressed == NULL)
+            {
+                fprintf(stderr, "Error: Cannot allocate uncompressed buffer\n");
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+
+            // Decompress data if needed
+            if(compression == 1)  // LZMA
+            {
+                // Read LZMA properties
+                uint8_t lzma_props[LZMA_PROPERTIES_LENGTH];
+                if(fread(lzma_props, 1, LZMA_PROPERTIES_LENGTH, info->file) != LZMA_PROPERTIES_LENGTH)
+                {
+                    fprintf(stderr, "Error: Cannot read LZMA properties\n");
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                // Read compressed data
+                const size_t compressed_size = cmpLength - LZMA_PROPERTIES_LENGTH;
+                uint8_t     *compressed      = malloc(compressed_size);
+                if(compressed == NULL)
+                {
+                    fprintf(stderr, "Error: Cannot allocate compressed buffer\n");
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                if(fread(compressed, 1, compressed_size, info->file) != compressed_size)
+                {
+                    fprintf(stderr, "Error: Cannot read compressed data\n");
+                    free(compressed);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                // Decompress
+                size_t decompressed_size = length;
+                size_t cmp_size          = compressed_size;
+                if(aaruf_lzma_decode_buffer(uncompressed, &decompressed_size, compressed, &cmp_size, lzma_props,
+                                            LZMA_PROPERTIES_LENGTH) != 0)
+                {
+                    fprintf(stderr, "Error: LZMA decompression failed for block %" PRIu64 "\n", i);
+                    free(compressed);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                free(compressed);
+            }
+            else if(compression == 2)  // FLAC
+            {
+                // Read FLAC compressed data
+                uint8_t *compressed = malloc(cmpLength);
+                if(compressed == NULL)
+                {
+                    fprintf(stderr, "Error: Cannot allocate compressed buffer for FLAC\n");
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                if(fread(compressed, 1, cmpLength, info->file) != cmpLength)
+                {
+                    fprintf(stderr, "Error: Cannot read FLAC compressed data\n");
+                    free(compressed);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                // Decompress FLAC (returns bytes written)
+                const size_t decompressed_size =
+                    aaruf_flac_decode_redbook_buffer(uncompressed, length, compressed, cmpLength);
+                if(decompressed_size != length)
+                {
+                    fprintf(stderr, "Error: FLAC decompression failed for block %" PRIu64 " (expected %zu, got %zu)\n",
+                            i, (size_t)length, decompressed_size);
+                    free(compressed);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                free(compressed);
+            }
+            else if(compression == 3)  // LZMA with Claunia Subchannel Transform
+            {
+                // Read LZMA properties
+                uint8_t lzma_props[LZMA_PROPERTIES_LENGTH];
+                if(fread(lzma_props, 1, LZMA_PROPERTIES_LENGTH, info->file) != LZMA_PROPERTIES_LENGTH)
+                {
+                    fprintf(stderr, "Error: Cannot read LZMA properties for LZMA+CST\n");
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                // Read compressed data
+                const size_t compressed_size = cmpLength - LZMA_PROPERTIES_LENGTH;
+                uint8_t     *compressed      = malloc(compressed_size);
+                if(compressed == NULL)
+                {
+                    fprintf(stderr, "Error: Cannot allocate compressed buffer for LZMA+CST\n");
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                if(fread(compressed, 1, compressed_size, info->file) != compressed_size)
+                {
+                    fprintf(stderr, "Error: Cannot read LZMA+CST compressed data\n");
+                    free(compressed);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                // Decompress LZMA first (into a temporary buffer for CST sequential data)
+                uint8_t *cst_sequential = malloc(length);
+                if(cst_sequential == NULL)
+                {
+                    fprintf(stderr, "Error: Cannot allocate CST sequential buffer\n");
+                    free(compressed);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                size_t decompressed_size = length;
+                size_t cmp_size          = compressed_size;
+                if(aaruf_lzma_decode_buffer(cst_sequential, &decompressed_size, compressed, &cmp_size, lzma_props,
+                                            LZMA_PROPERTIES_LENGTH) != 0)
+                {
+                    fprintf(stderr, "Error: LZMA decompression failed for LZMA+CST block %" PRIu64 "\n", i);
+                    free(cst_sequential);
+                    free(compressed);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                free(compressed);
+
+                // Untransform CST (sequential -> interleaved)
+                if(aaruf_cst_untransform(cst_sequential, uncompressed, length) != 0)
+                {
+                    fprintf(stderr, "Error: CST untransform failed for block %" PRIu64 "\n", i);
+                    free(cst_sequential);
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+
+                free(cst_sequential);
+            }
+            else if(compression == 0)  // None
+            {
+                // Read uncompressed data directly
+                if(fread(uncompressed, 1, length, info->file) != length)
+                {
+                    fprintf(stderr, "Error: Cannot read uncompressed data\n");
+                    free(uncompressed);
+                    free(new_entries);
+                    fclose(output);
+                    return -1;
+                }
+            }
+            else
+            {
+                fprintf(stderr,
+                        "Warning: Unsupported compression type %u for block %" PRIu64
+                        " (supported: 0=None, 1=LZMA, 2=FLAC, 3=LZMA+CST), copying as-is\n",
+                        compression, i);
+                free(uncompressed);
+                // Fall through to copy block as-is
+                goto copy_block_asis;
+            }
+
+            // Now recompress with the test algorithm (only for DataBlock and DataStreamPayload)
+            uint8_t *recompressed      = NULL;
+            size_t   recompressed_size = 0;
+            int      new_compression   = compression;         // Default: keep original
+            bool     had_cst           = (compression == 3);  // Track if original had CST
+
+            // Benchmark DataBlocks, DataStreamPayload, and DDT blocks for compression
+            if((identifier == 0x4B4C4244 || identifier == 0x4C505344 || identifier == 0x2A544444 ||
+                identifier == 0x32544444))  // Include DDT v1 and v2
+            {
+                uint8_t *data_to_compress      = uncompressed;
+                size_t   data_to_compress_size = length;
+                uint8_t *cst_transformed       = NULL;
+
+                // If original had CST, apply CST transform before compressing
+                if(had_cst)
+                {
+                    cst_transformed = malloc(length);
+                    if(cst_transformed == NULL)
+                    {
+                        fprintf(stderr, "Error: Cannot allocate CST transform buffer\n");
+                        free(uncompressed);
+                        free(new_entries);
+                        fclose(output);
+                        return -1;
+                    }
+
+                    if(aaruf_cst_transform(uncompressed, cst_transformed, length) != 0)
+                    {
+                        fprintf(stderr, "Error: CST transform failed for block %" PRIu64 "\n", i);
+                        free(cst_transformed);
+                        free(uncompressed);
+                        free(new_entries);
+                        fclose(output);
+                        return -1;
+                    }
+
+                    data_to_compress = cst_transformed;
+                }
+
+                // Try test compression algorithm on the (possibly CST-transformed) data
+                // Use Zstd with dictionary if available, otherwise use standard compression
+                if(algorithm == COMP_ZSTD && dict_ctx != NULL)
+                {
+                    if(compress_data_zstd_dict(data_to_compress, data_to_compress_size, &recompressed,
+                                               &recompressed_size, dict_ctx) == 0)
+                    {
+                        // Check if compression is beneficial
+                        if(recompressed_size < data_to_compress_size)
+                        {
+                            new_compression = had_cst ? 3 : 101;  // 101 is zstd identifier
+                        }
+                        else
+                        {
+                            // Compression not beneficial, use uncompressed
+                            free(recompressed);
+                            recompressed      = uncompressed;
+                            recompressed_size = length;
+                            new_compression   = 0;
+                            uncompressed      = NULL;
+                        }
+                    }
+                    else
+                    {
+                        // Compression failed, use original compression type
+                        recompressed      = data_to_compress;
+                        recompressed_size = data_to_compress_size;
+                        new_compression   = compression;
+
+                        if(had_cst) { cst_transformed = NULL; }
+                        else
+                        {
+                            uncompressed = NULL;
+                        }
+                    }
+                }
+                else
+                {
+                    // Use standard compression algorithm (LZMA, Bzip3, or Zstd without dictionary)
+                    if(compress_data(algorithm, data_to_compress, data_to_compress_size, &recompressed,
+                                     &recompressed_size) == 0)
+                    {
+                        // Check if compression is beneficial
+                        if(recompressed_size < data_to_compress_size)
+                        {
+                            int base_compression = get_compression_type(algorithm);
+                            new_compression      = had_cst ? 3 : base_compression;
+                        }
+                        else
+                        {
+                            // Compression not beneficial, use uncompressed
+                            free(recompressed);
+                            recompressed      = uncompressed;
+                            recompressed_size = length;
+                            new_compression   = 0;
+                            uncompressed      = NULL;
+                        }
+                    }
+                    else
+                    {
+                        // Compression failed, use original compression type
+                        recompressed      = data_to_compress;
+                        recompressed_size = data_to_compress_size;
+                        new_compression   = compression;
+
+                        if(had_cst) { cst_transformed = NULL; }
+                        else
+                        {
+                            uncompressed = NULL;
+                        }
+                    }
+                }
+
+                // Clean up CST buffer if not used
+                if(cst_transformed && cst_transformed != recompressed) free(cst_transformed);
+                // Clean up uncompressed if CST was used and not recompressed
+                if(had_cst && uncompressed && uncompressed != recompressed)
+                {
+                    free(uncompressed);
+                    uncompressed = NULL;
+                }
+            }
+            else
+            {
+                // Keep DDT blocks unchanged
+                recompressed      = uncompressed;
+                recompressed_size = length;
+                new_compression   = compression;
+                uncompressed      = NULL;
+            }
+
+            // Write the block back with original header structure
+            new_entries[i].offset = current_position;
+
+            fseek(info->file, block_start, SEEK_SET);
+            uint8_t *header_buffer = malloc(header_size);
+            if(fread(header_buffer, 1, header_size, info->file) != header_size)
+            {
+                fprintf(stderr, "Error: Cannot re-read header\n");
+                free(header_buffer);
+                free(recompressed);
+                if(uncompressed) free(uncompressed);
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+
+            // Update compression fields in the header buffer
+            if(identifier == 0x4B4C4244)  // DataBlock
+            {
+                BlockHeader *bhdr = (BlockHeader *)header_buffer;
+                bhdr->compression = new_compression;
+                bhdr->cmpLength   = recompressed_size;
+                bhdr->cmpCrc64    = aaruf_crc64_data(recompressed, recompressed_size);
+            }
+            else if(identifier == 0x2A544444)  // DDT v1
+            {
+                DdtHeader *dhdr   = (DdtHeader *)header_buffer;
+                dhdr->compression = new_compression;
+                dhdr->cmpLength   = recompressed_size;
+                dhdr->cmpCrc64    = aaruf_crc64_data(recompressed, recompressed_size);
+            }
+            else if(identifier == 0x32544444)  // DDT v2
+            {
+                DdtHeader2 *dhdr2  = (DdtHeader2 *)header_buffer;
+                dhdr2->compression = new_compression;
+                dhdr2->cmpLength   = recompressed_size;
+                dhdr2->cmpCrc64    = aaruf_crc64_data(recompressed, recompressed_size);
+            }
+            else  // DataStreamPayloadBlock (0x4C505344)
+            {
+                DataStreamPayloadHeader *phdr = (DataStreamPayloadHeader *)header_buffer;
+                phdr->compression             = new_compression;
+                phdr->cmpLength               = recompressed_size;
+                phdr->cmpCrc64                = aaruf_crc64_data(recompressed, recompressed_size);
+            }
+
+            // Write header
+            if(fwrite(header_buffer, 1, header_size, output) != header_size)
+            {
+                fprintf(stderr, "Error: Cannot write block header\n");
+                free(header_buffer);
+                free(recompressed);
+                if(uncompressed) free(uncompressed);
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+            free(header_buffer);
+
+            // Write data
+            if(fwrite(recompressed, 1, recompressed_size, output) != recompressed_size)
+            {
+                fprintf(stderr, "Error: Cannot write block data\n");
+                free(recompressed);
+                if(uncompressed) free(uncompressed);
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+
+            current_position += header_size + recompressed_size;
+            // Count all benchmarked blocks in compressed size (DataBlock, DataStreamPayload, DDT v1/v2)
+            if(identifier == 0x4B4C4244 || identifier == 0x4C505344 || identifier == 0x2A544444 ||
+               identifier == 0x32544444)
+                result->compressed_size += recompressed_size;
+
+            free(recompressed);
+            if(uncompressed) free(uncompressed);
+        }
+        else
+        {
+        copy_block_asis:
+            // Copy all other blocks as-is
+            fseek(info->file, block_start, SEEK_SET);
+
+            uint8_t *block_buffer = malloc(block_size);
+            if(block_buffer == NULL)
+            {
+                fprintf(stderr, "Error: Cannot allocate block buffer\n");
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+
+            if(fread(block_buffer, 1, block_size, info->file) != block_size)
+            {
+                fprintf(stderr, "Error: Cannot read block %" PRIu64 "\n", i);
+                free(block_buffer);
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+
+            new_entries[i].offset = current_position;
+            if(fwrite(block_buffer, 1, block_size, output) != block_size)
+            {
+                fprintf(stderr, "Error: Cannot write block %" PRIu64 "\n", i);
+                free(block_buffer);
+                free(new_entries);
+                fclose(output);
+                return -1;
+            }
+
+            free(block_buffer);
+            current_position += block_size;
+        }
+
+        // Update progress
+        progress->current++;
+        if(progress_cb) progress_cb(progress);
+    }
+
+    // ===== WRITE DICTIONARY BLOCK (Zstd only) =====
+    if(algorithm == COMP_ZSTD && dict_ctx != NULL && dict_ctx->dict_data != NULL && dict_ctx->dict_size > 0)
+    {
+        printf("\nWriting Zstd dictionary block: %zu bytes\n", dict_ctx->dict_size);
+
+        // Create a DataBlock with new datatype for dictionary
+        BlockHeader dict_block_header;
+        dict_block_header.identifier  = 0x4B4C4244;  // DataBlock
+        dict_block_header.type        = 99;          // Custom datatype for dictionary
+        dict_block_header.compression = 101;         // Zstd
+        dict_block_header.sectorSize  = 512;
+        dict_block_header.length      = dict_ctx->dict_size;
+        dict_block_header.cmpLength   = dict_ctx->dict_size;  // Not further compressed
+        dict_block_header.crc64       = aaruf_crc64_data(dict_ctx->dict_data, dict_ctx->dict_size);
+        dict_block_header.cmpCrc64    = dict_block_header.crc64;
+
+        // Write dictionary block
+        fseek(output, current_position, SEEK_SET);
+        if(fwrite(&dict_block_header, sizeof(BlockHeader), 1, output) != 1)
+        {
+            fprintf(stderr, "Error: Cannot write dictionary block header\n");
+            free_zstd_dictionary(dict_ctx);
+            free(new_entries);
+            fclose(output);
+            return -1;
+        }
+
+        if(fwrite(dict_ctx->dict_data, dict_ctx->dict_size, 1, output) != 1)
+        {
+            fprintf(stderr, "Error: Cannot write dictionary block data\n");
+            free_zstd_dictionary(dict_ctx);
+            free(new_entries);
+            fclose(output);
+            return -1;
+        }
+
+        // Include dictionary in result size
+        result->compressed_size += dict_ctx->dict_size;
+        current_position += sizeof(BlockHeader) + dict_ctx->dict_size;
+        printf("Dictionary block written at offset %" PRIu64 "\n",
+               current_position - sizeof(BlockHeader) - dict_ctx->dict_size);
+    }
+    // ===== END DICTIONARY BLOCK =====
+
+    // Write new index (use IndexBlock3 to match modern images)
+    const uint64_t new_index_offset = current_position;
+
+    IndexHeader3 index_header;
+    index_header.identifier = 0x33584449;  // IndexBlock3
+    index_header.entries    = info->block_count;
+    index_header.crc64      = aaruf_crc64_data((uint8_t *)new_entries, info->block_count * sizeof(IndexEntry));
+    index_header.previous   = 0;  // No chaining for now
+
+    if(fwrite(&index_header, 1, sizeof(IndexHeader3), output) != sizeof(IndexHeader3))
+    {
+        fprintf(stderr, "Error: Cannot write index header\n");
+        free(new_entries);
+        fclose(output);
+        return -1;
+    }
+
+    if(fwrite(new_entries, sizeof(IndexEntry), info->block_count, output) != info->block_count)
+    {
+        fprintf(stderr, "Error: Cannot write index entries\n");
+        free(new_entries);
+        fclose(output);
+        return -1;
+    }
+
+    // Update header with new index offset
+    header.indexOffset = new_index_offset;
+    if(fseek(output, 0, SEEK_SET) != 0)
+    {
+        fprintf(stderr, "Error: Cannot seek to header\n");
+        free(new_entries);
+        fclose(output);
+        return -1;
+    }
+
+    if(fwrite(&header, 1, sizeof(AaruHeaderV2), output) != sizeof(AaruHeaderV2))
+    {
+        fprintf(stderr, "Error: Cannot update header\n");
+        free(new_entries);
+        fclose(output);
+        return -1;
+    }
+
+    free(new_entries);
+    fclose(output);
+
+    // Get final file size
+    struct stat st;
+    if(stat(output_path, &st) == 0) { result->compressed_size = st.st_size; }
+
+    return 0;
+}
--- a/benchmark/image_ops.c
+++ b/benchmark/image_ops.c
@@ -0,0 +1,183 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2026 Natalia Portillo.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * as published by the Free Software Foundation; either version 2
+ * of the License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+ */
+
+#include <aaruformat/consts.h>
+#include <aaruformat/structs/data.h>
+#include <aaruformat/structs/header.h>
+#include <aaruformat/structs/index.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include "benchmark.h"
+
+// Open an Aaru format image and read its index
+int open_image(const char *path, image_info *info)
+{
+    memset(info, 0, sizeof(image_info));
+
+    info->file = fopen(path, "rb");
+    if(info->file == NULL)
+    {
+        fprintf(stderr, "Error: Cannot open file %s\n", path);
+        return -1;
+    }
+
+    // Read header
+    AaruHeaderV2 header;
+    if(fread(&header, 1, sizeof(AaruHeaderV2), info->file) != sizeof(AaruHeaderV2))
+    {
+        fprintf(stderr, "Error: Cannot read header\n");
+        fclose(info->file);
+        return -1;
+    }
+
+    // Check magic
+    if(header.identifier != AARU_MAGIC)
+    {
+        fprintf(stderr, "Error: Invalid Aaru format magic\n");
+        fclose(info->file);
+        return -1;
+    }
+
+    info->major_version = header.imageMajorVersion;
+    info->minor_version = header.imageMinorVersion;
+    info->index_offset  = header.indexOffset;
+
+    if(info->index_offset == 0)
+    {
+        fprintf(stderr, "Error: No index in image\n");
+        fclose(info->file);
+        return -1;
+    }
+
+    // Seek to index
+    if(fseek(info->file, info->index_offset, SEEK_SET) != 0)
+    {
+        fprintf(stderr, "Error: Cannot seek to index\n");
+        fclose(info->file);
+        return -1;
+    }
+
+    // Read index header - check which version
+    uint32_t index_identifier;
+    if(fread(&index_identifier, 1, sizeof(uint32_t), info->file) != sizeof(uint32_t))
+    {
+        fprintf(stderr, "Error: Cannot read index identifier\n");
+        fclose(info->file);
+        return -1;
+    }
+
+    // Rewind to read full header
+    if(fseek(info->file, info->index_offset, SEEK_SET) != 0)
+    {
+        fprintf(stderr, "Error: Cannot rewind to index\n");
+        fclose(info->file);
+        return -1;
+    }
+
+    uint64_t entries_count = 0;
+
+    if(index_identifier == 0x32584449)  // IndexBlock2
+    {
+        IndexHeader2 index_header;
+        if(fread(&index_header, 1, sizeof(IndexHeader2), info->file) != sizeof(IndexHeader2))
+        {
+            fprintf(stderr, "Error: Cannot read IndexHeader2\n");
+            fclose(info->file);
+            return -1;
+        }
+        entries_count = index_header.entries;
+    }
+    else if(index_identifier == 0x33584449)  // IndexBlock3
+    {
+        IndexHeader3 index_header;
+        if(fread(&index_header, 1, sizeof(IndexHeader3), info->file) != sizeof(IndexHeader3))
+        {
+            fprintf(stderr, "Error: Cannot read IndexHeader3\n");
+            fclose(info->file);
+            return -1;
+        }
+        entries_count = index_header.entries;
+
+        // TODO: If we need to handle chained indexes (previous field), we would do it here
+        // For now, we just read the main index
+    }
+    else
+    {
+        fprintf(stderr, "Error: Unsupported index version (identifier: 0x%08X)\n", index_identifier);
+        fclose(info->file);
+        return -1;
+    }
+
+    info->block_count = entries_count;
+
+    // Allocate and read index entries
+    const size_t entries_size = info->block_count * sizeof(IndexEntry);
+    info->index_entries       = malloc(entries_size);
+    if(info->index_entries == NULL)
+    {
+        fprintf(stderr, "Error: Cannot allocate memory for index entries\n");
+        fclose(info->file);
+        return -1;
+    }
+
+    if(fread(info->index_entries, 1, entries_size, info->file) != entries_size)
+    {
+        fprintf(stderr, "Error: Cannot read index entries\n");
+        free(info->index_entries);
+        fclose(info->file);
+        return -1;
+    }
+
+    // Calculate total uncompressed size by scanning data blocks
+    info->total_uncompressed_size = 0;
+    IndexEntry *entries           = (IndexEntry *)info->index_entries;
+
+    for(uint64_t i = 0; i < info->block_count; i++)
+    {
+        if(entries[i].blockType == 0x4B4C4244)  // DataBlock
+        {
+            // Seek to block and read header
+            if(fseek(info->file, entries[i].offset, SEEK_SET) != 0) continue;
+
+            BlockHeader block_header;
+            if(fread(&block_header, 1, sizeof(BlockHeader), info->file) != sizeof(BlockHeader)) continue;
+
+            info->total_uncompressed_size += block_header.length;
+        }
+    }
+
+    return 0;
+}
+
+// Close image
+void close_image(image_info *info)
+{
+    if(info->file != NULL)
+    {
+        fclose(info->file);
+        info->file = NULL;
+    }
+
+    if(info->index_entries != NULL)
+    {
+        free(info->index_entries);
+        info->index_entries = NULL;
+    }
+}
--- a/benchmark/patch_bzip3.cmake
+++ b/benchmark/patch_bzip3.cmake
@@ -0,0 +1,14 @@
+# Patch bzip3's CMakeLists.txt to fix CMAKE_PROJECT_NAME issue
+file(READ "${SOURCE_DIR}/CMakeLists.txt" CONTENT)
+
+# Replace ${CMAKE_PROJECT_NAME} with bzip3 in config file lines
+string(REPLACE "\${CMAKE_PROJECT_NAME}-config.cmake" "bzip3-config.cmake" CONTENT "${CONTENT}")
+string(REPLACE "\${CMAKE_PROJECT_NAME}-targets" "bzip3-targets" CONTENT "${CONTENT}")
+string(REPLACE "NAMESPACE \${CMAKE_PROJECT_NAME}::" "NAMESPACE bzip3::" CONTENT "${CONTENT}")
+string(REPLACE "cmake/\${CMAKE_PROJECT_NAME}" "cmake/bzip3" CONTENT "${CONTENT}")
+
+# Write back
+file(WRITE "${SOURCE_DIR}/CMakeLists.txt" "${CONTENT}")
+
+message(STATUS "Patched bzip3 CMakeLists.txt to fix CMAKE_PROJECT_NAME references")
+
--- a/src/create.c
+++ b/src/create.c
@@ -419,6 +419,7 @@ AARU_EXPORT void AARU_CALL *aaruf_create(const char *filepath, const uint32_t me
        ctx->user_data_ddt_header.overflow            = overflow_sectors;
        ctx->user_data_ddt_header.start               = 0;
        ctx->user_data_ddt_header.blockAlignmentShift = parsed_options.block_alignment;
+        ctx->header.blockAlignmentShift               = parsed_options.block_alignment;
        ctx->user_data_ddt_header.dataShift           = parsed_options.data_shift;

        if(parsed_options.table_shift == -1 || !table_shift_found)
--- a/src/helpers.c
+++ b/src/helpers.c
@@ -17,8 +17,8 @@
 */

 #if defined(_WIN32) || defined(_WIN64)
-#include <wincrypt.h>
 #include <windows.h>
+#include <wincrypt.h>
 #endif

 #include <aaru.h>
Author	SHA1	Message	Date
Natalia Portillo	8b43155399	Enhance benchmark tool with color support and improved output formatting	2026-01-31 01:47:36 +00:00
Natalia Portillo	8e9c3e46a1	Update Brotli compression settings for improved speed and efficiency	2026-01-31 00:28:58 +00:00
Natalia Portillo	1d72f44783	Add brotli algorithm to benchmark tool.	2026-01-30 20:42:31 +00:00
Natalia Portillo	97a9e8753e	Add compression algorithms benchmark tool.	2026-01-11 21:36:41 +00:00
Natalia Portillo	1a16c7b6e2	Add blockAlignmentShift to header in create.c	2026-01-10 14:40:27 +00:00
Natalia Portillo	e598dd97c7	Fix order of includes.	2026-01-06 14:27:50 +00:00