mirror of
https://github.com/aaru-dps/libaaruformat.git
synced 2026-02-14 21:23:36 +00:00
Compare commits
6 Commits
v1.0.0-alp
...
benchmark
| Author | SHA1 | Date | |
|---|---|---|---|
|
8b43155399
|
|||
|
8e9c3e46a1
|
|||
|
1d72f44783
|
|||
|
97a9e8753e
|
|||
|
1a16c7b6e2
|
|||
|
e598dd97c7
|
@@ -346,5 +346,6 @@ endif()
|
||||
if(NOT AARU_BUILD_PACKAGE)
|
||||
add_subdirectory(tests)
|
||||
add_subdirectory(tool)
|
||||
add_subdirectory(benchmark)
|
||||
add_subdirectory(docs/spec)
|
||||
endif()
|
||||
|
||||
175
benchmark/CMakeLists.txt
Normal file
175
benchmark/CMakeLists.txt
Normal file
@@ -0,0 +1,175 @@
|
||||
# Benchmark tool project
|
||||
project(aarubenchmark C)
|
||||
|
||||
# Find required compression libraries
|
||||
find_package(PkgConfig)
|
||||
if(PKG_CONFIG_FOUND)
|
||||
pkg_check_modules(ZSTD libzstd)
|
||||
endif()
|
||||
|
||||
# Fallback to find_package if pkg-config fails
|
||||
if(NOT ZSTD_FOUND)
|
||||
find_package(ZSTD QUIET)
|
||||
if(ZSTD_FOUND)
|
||||
set(ZSTD_LIBRARIES zstd::libzstd_static)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Download and build bzip3 from git using FetchContent
|
||||
include(FetchContent)
|
||||
|
||||
message(STATUS "Fetching bzip3 from GitHub...")
|
||||
|
||||
FetchContent_Declare(
|
||||
bzip3
|
||||
GIT_REPOSITORY https://github.com/kspalaiologos/bzip3.git
|
||||
GIT_TAG master
|
||||
GIT_SHALLOW TRUE
|
||||
PATCH_COMMAND ${CMAKE_COMMAND} -DSOURCE_DIR=<SOURCE_DIR> -P ${CMAKE_CURRENT_SOURCE_DIR}/patch_bzip3.cmake
|
||||
)
|
||||
|
||||
# Configure bzip3 options
|
||||
set(BUILD_SHARED_LIBS OFF CACHE BOOL "Build shared libraries" FORCE)
|
||||
set(BZIP3_ENABLE_ARCH_NATIVE ON CACHE BOOL "Enable CPU-specific optimizations" FORCE)
|
||||
set(BZIP3_BUILD_APPS OFF CACHE BOOL "Build bzip3 command-line applications" FORCE)
|
||||
|
||||
# Make bzip3 available - this will run their CMakeLists.txt (now patched)
|
||||
FetchContent_MakeAvailable(bzip3)
|
||||
|
||||
# Check if bzip3 target was created
|
||||
if(TARGET bz3)
|
||||
set(BZ3_AVAILABLE TRUE)
|
||||
set(BZ3_LIBRARY bz3)
|
||||
message(STATUS "Bzip3: Built from source (FetchContent)")
|
||||
else()
|
||||
set(BZ3_AVAILABLE FALSE)
|
||||
message(STATUS "Bzip3: Failed to build from source")
|
||||
endif()
|
||||
|
||||
# Download and build Brotli from git using FetchContent
|
||||
message(STATUS "Fetching Brotli from GitHub...")
|
||||
|
||||
FetchContent_Declare(
|
||||
brotli
|
||||
GIT_REPOSITORY https://github.com/google/brotli.git
|
||||
GIT_TAG v1.1.0
|
||||
GIT_SHALLOW TRUE
|
||||
)
|
||||
|
||||
# Configure Brotli options - build static libraries only
|
||||
set(BROTLI_DISABLE_TESTS ON CACHE BOOL "Disable Brotli tests" FORCE)
|
||||
set(BROTLI_BUNDLED_MODE ON CACHE BOOL "Enable bundled mode" FORCE)
|
||||
|
||||
# Make Brotli available
|
||||
FetchContent_MakeAvailable(brotli)
|
||||
|
||||
# Check if Brotli targets were created
|
||||
if(TARGET brotlienc AND TARGET brotlidec AND TARGET brotlicommon)
|
||||
set(BROTLI_AVAILABLE TRUE)
|
||||
message(STATUS "Brotli: Built from source (FetchContent)")
|
||||
else()
|
||||
set(BROTLI_AVAILABLE FALSE)
|
||||
message(STATUS "Brotli: Failed to build from source")
|
||||
endif()
|
||||
|
||||
|
||||
# Benchmark executable
|
||||
add_executable(aarubenchmark
|
||||
benchmark.c
|
||||
benchmark.h
|
||||
image_ops.c
|
||||
compression_benchmark.c
|
||||
compression.c
|
||||
compression.h
|
||||
)
|
||||
|
||||
# Set C as the linker language
|
||||
set_target_properties(aarubenchmark PROPERTIES LINKER_LANGUAGE C)
|
||||
|
||||
# Set up include directories for accessing library headers
|
||||
target_include_directories(aarubenchmark PRIVATE
|
||||
${CMAKE_SOURCE_DIR}/include
|
||||
${CMAKE_SOURCE_DIR}/3rdparty/BLAKE3
|
||||
${CMAKE_SOURCE_DIR}/3rdparty/lzma-21.03beta/C
|
||||
${CMAKE_SOURCE_DIR}/3rdparty/xxHash
|
||||
)
|
||||
|
||||
# Add ZSTD include if found
|
||||
if(ZSTD_FOUND)
|
||||
target_include_directories(aarubenchmark PRIVATE ${ZSTD_INCLUDE_DIRS})
|
||||
target_compile_definitions(aarubenchmark PRIVATE HAVE_ZSTD=1)
|
||||
endif()
|
||||
|
||||
# Add bzip3 compile definition if available (includes come from bz3 target)
|
||||
if(BZ3_AVAILABLE)
|
||||
target_compile_definitions(aarubenchmark PRIVATE HAVE_BZ3=1)
|
||||
endif()
|
||||
|
||||
# Add Brotli compile definition and includes if available
|
||||
if(BROTLI_AVAILABLE)
|
||||
target_compile_definitions(aarubenchmark PRIVATE HAVE_BROTLI=1)
|
||||
# Get include directories from FetchContent
|
||||
FetchContent_GetProperties(brotli SOURCE_DIR BROTLI_SOURCE_DIR)
|
||||
target_include_directories(aarubenchmark PRIVATE ${BROTLI_SOURCE_DIR}/c/include)
|
||||
endif()
|
||||
|
||||
# Link to the main library and compression libraries
|
||||
target_link_libraries(aarubenchmark
|
||||
PRIVATE
|
||||
# Link to the aaruformat library
|
||||
aaruformat
|
||||
)
|
||||
|
||||
# Link ZSTD if available
|
||||
if(ZSTD_FOUND)
|
||||
if(ZSTD_LINK_LIBRARIES)
|
||||
target_link_libraries(aarubenchmark PRIVATE ${ZSTD_LINK_LIBRARIES})
|
||||
elseif(ZSTD_LIBRARIES)
|
||||
target_link_libraries(aarubenchmark PRIVATE ${ZSTD_LIBRARIES})
|
||||
else()
|
||||
# Fallback: try to find the library directly
|
||||
find_library(ZSTD_LIB NAMES zstd libzstd PATHS /usr/local/lib /opt/homebrew/lib)
|
||||
if(ZSTD_LIB)
|
||||
target_link_libraries(aarubenchmark PRIVATE ${ZSTD_LIB})
|
||||
endif()
|
||||
endif()
|
||||
endif()
|
||||
|
||||
# Link bzip3 if available
|
||||
if(BZ3_AVAILABLE)
|
||||
target_link_libraries(aarubenchmark PRIVATE ${BZ3_LIBRARY})
|
||||
endif()
|
||||
|
||||
# Link Brotli if available
|
||||
if(BROTLI_AVAILABLE)
|
||||
target_link_libraries(aarubenchmark PRIVATE brotlienc brotlidec brotlicommon)
|
||||
endif()
|
||||
|
||||
|
||||
# On Linux, enable GNU/POSIX feature test macros
|
||||
if(UNIX AND NOT APPLE)
|
||||
target_compile_definitions(aarubenchmark PRIVATE _GNU_SOURCE=1 _POSIX_C_SOURCE=200809L)
|
||||
endif()
|
||||
|
||||
# Set output directory
|
||||
set_target_properties(aarubenchmark PROPERTIES
|
||||
RUNTIME_OUTPUT_DIRECTORY "${CMAKE_BINARY_DIR}/bin"
|
||||
)
|
||||
|
||||
message(STATUS "Configured aarubenchmark tool")
|
||||
if(ZSTD_FOUND)
|
||||
message(STATUS " - Zstd support: YES")
|
||||
else()
|
||||
message(STATUS " - Zstd support: NO (will be disabled)")
|
||||
endif()
|
||||
if(BZ3_AVAILABLE)
|
||||
message(STATUS " - Bzip3 support: YES (downloaded from git)")
|
||||
else()
|
||||
message(STATUS " - Bzip3 support: NO (will be disabled)")
|
||||
endif()
|
||||
if(BROTLI_AVAILABLE)
|
||||
message(STATUS " - Brotli support: YES (downloaded from git)")
|
||||
else()
|
||||
message(STATUS " - Brotli support: NO (will be disabled)")
|
||||
endif()
|
||||
|
||||
134
benchmark/README.md
Normal file
134
benchmark/README.md
Normal file
@@ -0,0 +1,134 @@
|
||||
# Aaru Format Compression Benchmark Tool
|
||||
|
||||
This tool benchmarks different compression algorithms on Aaru format images without modifying the library itself.
|
||||
|
||||
## Purpose
|
||||
|
||||
The benchmark tool helps determine the most effective compression algorithm for specific image types by:
|
||||
- Testing multiple compression algorithms (LZMA, Bzip3, Zstd)
|
||||
- Measuring compression ratios and processing times
|
||||
- Providing detailed performance metrics
|
||||
|
||||
## Features
|
||||
|
||||
- **Non-invasive**: Does not modify library code - operates on internal structures directly
|
||||
- **Comprehensive**: Tests all major compression algorithms
|
||||
- **Progressive**: Shows real-time progress bars for each operation
|
||||
- **Isolated**: Compression algorithms are confined to the benchmark tool only
|
||||
- **Detailed results**: Provides comparison tables with sizes, ratios, and timing
|
||||
|
||||
## Usage
|
||||
|
||||
```bash
|
||||
aarubenchmark <input.aaruformat>
|
||||
```
|
||||
|
||||
### Example
|
||||
|
||||
```bash
|
||||
aarubenchmark myimage.aaruformat
|
||||
```
|
||||
|
||||
This will:
|
||||
1. Open and analyze the input image
|
||||
2. For each compression algorithm:
|
||||
- Decompress all data blocks
|
||||
- Recompress with the test algorithm
|
||||
- Write a new image file (e.g., `myimage.aaruformat.LZMA.aaruformat`)
|
||||
- Display progress and timing
|
||||
3. Show a summary table comparing all algorithms
|
||||
|
||||
## Output
|
||||
|
||||
The tool creates test images for each algorithm:
|
||||
- `input.aaruformat.LZMA.aaruformat`
|
||||
- `input.aaruformat.Bzip3.aaruformat` (if bzip3 is available)
|
||||
- `input.aaruformat.Zstd.aaruformat` (if zstd is available)
|
||||
|
||||
And displays a comparison table:
|
||||
```
|
||||
Algorithm Uncompressed Compressed Ratio Time (s)
|
||||
---------- --------------- --------------- ------------ ----------
|
||||
LZMA 1.50 GB 450.23 MB 30.01% 45.23
|
||||
Bzip3 1.50 GB 425.67 MB 28.38% 52.17
|
||||
Zstd 1.50 GB 475.89 MB 31.73% 12.45
|
||||
|
||||
Best compression: Bzip3
|
||||
Fastest: Zstd
|
||||
```
|
||||
|
||||
## Requirements
|
||||
|
||||
### Mandatory
|
||||
- libaaruformat (automatically linked)
|
||||
- LZMA support (built into library)
|
||||
|
||||
### Optional (Automatic)
|
||||
- **bzip3**: Automatically downloaded and built from GitHub
|
||||
- No manual installation needed
|
||||
- CMake fetches and builds it automatically
|
||||
|
||||
### Optional (Manual)
|
||||
- **zstd**: For Zstd compression testing
|
||||
- Install: `brew install zstd` (macOS) or `apt install libzstd-dev` (Linux)
|
||||
|
||||
Algorithms without available libraries will be skipped automatically.
|
||||
|
||||
## Building
|
||||
|
||||
The benchmark tool is built automatically when building the main project:
|
||||
|
||||
```bash
|
||||
cd libaaruformat
|
||||
mkdir build && cd build
|
||||
cmake ..
|
||||
make
|
||||
```
|
||||
|
||||
The compiled binary will be in `build/bin/aarubenchmark`.
|
||||
|
||||
## Implementation Details
|
||||
|
||||
### How It Works
|
||||
|
||||
1. **Opens image manually**: Reads header and index using structs directly
|
||||
2. **Iterates blocks**: Uses index entries to locate all data blocks
|
||||
3. **Decompresses**: Uses library's LZMA decoder to decompress existing data
|
||||
4. **Recompresses**: Applies test algorithm with optimal settings
|
||||
5. **Writes output**: Creates proper header and index for the new image
|
||||
6. **Measures**: Records timing and file sizes for comparison
|
||||
|
||||
### Isolation
|
||||
|
||||
The benchmark tool is completely isolated:
|
||||
- No library code is modified
|
||||
- Compression algorithms are in `benchmark/` directory only
|
||||
- Only the main CMakeLists.txt is updated to include the benchmark subdirectory
|
||||
- The library continues to work exactly as before
|
||||
|
||||
### Memory Safety
|
||||
|
||||
- All allocations are checked
|
||||
- Proper cleanup on errors
|
||||
- No memory leaks (validated with valgrind)
|
||||
|
||||
## Limitations
|
||||
|
||||
- Only tests data block compression (not metadata or other blocks)
|
||||
- Requires sufficient disk space for test output files
|
||||
- Only works with Aaru format version 2 images
|
||||
|
||||
## Contributing
|
||||
|
||||
To add a new compression algorithm:
|
||||
|
||||
1. Add the algorithm enum to `benchmark.h`
|
||||
2. Implement compression function in `compression.c`
|
||||
3. Update `get_compression_type()` to return a unique identifier
|
||||
4. Add library dependency to `CMakeLists.txt`
|
||||
5. Update this README
|
||||
|
||||
## License
|
||||
|
||||
Same as libaaruformat - LGPL 2.1 or later.
|
||||
|
||||
506
benchmark/benchmark.c
Normal file
506
benchmark/benchmark.c
Normal file
@@ -0,0 +1,506 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2026 Natalia Portillo.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "benchmark.h"
|
||||
#include <inttypes.h>
|
||||
#include <locale.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include <aaruformat/structs/data.h>
|
||||
#include <aaruformat/structs/ddt.h>
|
||||
#include <aaruformat/structs/flux.h>
|
||||
#include <aaruformat/structs/index.h>
|
||||
#include "compression.h"
|
||||
|
||||
#define PROGRESS_BAR_WIDTH 40
|
||||
|
||||
// ANSI color codes
|
||||
#define ANSI_RESET "\033[0m"
|
||||
#define ANSI_BOLD "\033[1m"
|
||||
#define ANSI_DIM "\033[2m"
|
||||
#define ANSI_RED "\033[31m"
|
||||
#define ANSI_GREEN "\033[32m"
|
||||
#define ANSI_YELLOW "\033[33m"
|
||||
#define ANSI_BLUE "\033[34m"
|
||||
#define ANSI_MAGENTA "\033[35m"
|
||||
#define ANSI_CYAN "\033[36m"
|
||||
#define ANSI_WHITE "\033[37m"
|
||||
#define ANSI_BG_BLUE "\033[44m"
|
||||
#define ANSI_BG_GREEN "\033[42m"
|
||||
#define ANSI_CLEAR_LINE "\033[2K"
|
||||
|
||||
// Check if terminal supports colors
|
||||
static int use_colors = 0;
|
||||
|
||||
static void init_colors(void)
|
||||
{
|
||||
// Check if stdout is a terminal and TERM is set
|
||||
use_colors = isatty(STDOUT_FILENO) && getenv("TERM") != NULL;
|
||||
}
|
||||
|
||||
// Color helper function
|
||||
static const char* clr(const char* code)
|
||||
{
|
||||
return use_colors ? code : "";
|
||||
}
|
||||
|
||||
// External library functions
|
||||
extern uint64_t aaruf_crc64_data(const uint8_t *data, size_t length);
|
||||
extern int32_t aaruf_lzma_decode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer,
|
||||
size_t *src_len, const uint8_t *props, const size_t props_size);
|
||||
|
||||
#define LZMA_PROPERTIES_LENGTH 5
|
||||
|
||||
// Print progress bar with colors
|
||||
static void print_progress(const progress_state *state)
|
||||
{
|
||||
if(state->total == 0) return;
|
||||
|
||||
const double percentage = (double)state->current / (double)state->total * 100.0;
|
||||
const int filled = (int)(percentage / 100.0 * PROGRESS_BAR_WIDTH);
|
||||
|
||||
printf("\r%s%s%s ", clr(ANSI_CLEAR_LINE), clr(ANSI_CYAN), state->label);
|
||||
printf("%s[", clr(ANSI_RESET));
|
||||
|
||||
// Draw progress bar with gradient effect
|
||||
for(int i = 0; i < PROGRESS_BAR_WIDTH; i++)
|
||||
{
|
||||
if(i < filled)
|
||||
printf("%s█", clr(ANSI_GREEN));
|
||||
else if(i == filled && state->current < state->total)
|
||||
printf("%s▓", clr(ANSI_YELLOW));
|
||||
else
|
||||
printf("%s░", clr(ANSI_DIM));
|
||||
}
|
||||
|
||||
printf("%s] %s%6.1f%%%s", clr(ANSI_RESET), clr(ANSI_BOLD), percentage, clr(ANSI_RESET));
|
||||
fflush(stdout);
|
||||
|
||||
if(state->current >= state->total) printf("\n");
|
||||
}
|
||||
|
||||
// Get current time in nanoseconds
|
||||
static uint64_t get_time_ns(void)
|
||||
{
|
||||
struct timespec ts;
|
||||
clock_gettime(CLOCK_MONOTONIC, &ts);
|
||||
return (uint64_t)ts.tv_sec * 1000000000ULL + (uint64_t)ts.tv_nsec;
|
||||
}
|
||||
|
||||
// Convert nanoseconds to seconds
|
||||
static double ns_to_seconds(const uint64_t ns) { return (double)ns / 1000000000.0; }
|
||||
|
||||
// Format bytes as human-readable string with optional color
|
||||
static void format_bytes(const uint64_t bytes, char *buffer, const size_t buffer_size)
|
||||
{
|
||||
if(bytes < 1024)
|
||||
snprintf(buffer, buffer_size, "%" PRIu64 " B", bytes);
|
||||
else if(bytes < 1024 * 1024)
|
||||
snprintf(buffer, buffer_size, "%.2f KiB", (double)bytes / 1024.0);
|
||||
else if(bytes < 1024 * 1024 * 1024)
|
||||
snprintf(buffer, buffer_size, "%.2f MiB", (double)bytes / 1024.0 / 1024.0);
|
||||
else
|
||||
snprintf(buffer, buffer_size, "%.2f GiB", (double)bytes / 1024.0 / 1024.0 / 1024.0);
|
||||
}
|
||||
|
||||
// Print a section header
|
||||
static void print_section_header(const char *title)
|
||||
{
|
||||
printf("\n%s%s══════════════════════════════════════════════════════════════%s\n",
|
||||
clr(ANSI_BOLD), clr(ANSI_CYAN), clr(ANSI_RESET));
|
||||
printf("%s%s %s%s\n", clr(ANSI_BOLD), clr(ANSI_WHITE), title, clr(ANSI_RESET));
|
||||
printf("%s%s══════════════════════════════════════════════════════════════%s\n\n",
|
||||
clr(ANSI_BOLD), clr(ANSI_CYAN), clr(ANSI_RESET));
|
||||
}
|
||||
|
||||
// Print a subsection header
|
||||
static void print_subsection_header(const char *title)
|
||||
{
|
||||
printf("%s%s▶ %s%s\n", clr(ANSI_BOLD), clr(ANSI_YELLOW), title, clr(ANSI_RESET));
|
||||
printf("%s──────────────────────────────────────────────────────────────%s\n",
|
||||
clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
}
|
||||
|
||||
// Print a key-value pair
|
||||
static void print_info(const char *key, const char *value)
|
||||
{
|
||||
printf(" %s%-24s%s %s%s%s\n", clr(ANSI_DIM), key, clr(ANSI_RESET), clr(ANSI_WHITE), value, clr(ANSI_RESET));
|
||||
}
|
||||
|
||||
// Print a key-value pair with numeric value
|
||||
static void print_info_num(const char *key, uint64_t value)
|
||||
{
|
||||
printf(" %s%-24s%s %s%'" PRIu64 "%s\n", clr(ANSI_DIM), key, clr(ANSI_RESET), clr(ANSI_WHITE), value, clr(ANSI_RESET));
|
||||
}
|
||||
|
||||
// Get color for compression ratio
|
||||
static const char* get_ratio_color(double ratio)
|
||||
{
|
||||
if(ratio < 50.0) return ANSI_GREEN;
|
||||
if(ratio < 70.0) return ANSI_YELLOW;
|
||||
return ANSI_RED;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
// Initialize color support
|
||||
init_colors();
|
||||
|
||||
// Set locale for thousands separator in printf
|
||||
setlocale(LC_NUMERIC, "");
|
||||
|
||||
// Print banner
|
||||
printf("\n%s%s", clr(ANSI_BOLD), clr(ANSI_CYAN));
|
||||
printf(" ╔═══════════════════════════════════════════════════════╗\n");
|
||||
printf(" ║ %sAaru Format Compression Benchmark Tool%s ║\n", clr(ANSI_WHITE), clr(ANSI_CYAN));
|
||||
printf(" ╚═══════════════════════════════════════════════════════╝%s\n\n", clr(ANSI_RESET));
|
||||
|
||||
if(argc < 2)
|
||||
{
|
||||
printf("%sUsage:%s %s <input.aaruformat>\n\n", clr(ANSI_BOLD), clr(ANSI_RESET), argv[0]);
|
||||
printf("%sDescription:%s\n", clr(ANSI_BOLD), clr(ANSI_RESET));
|
||||
printf(" Benchmark compression algorithms on Aaru format images.\n\n");
|
||||
printf("%sAlgorithms tested:%s\n", clr(ANSI_BOLD), clr(ANSI_RESET));
|
||||
printf(" %s•%s LZMA %s(high compression, slow)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
printf(" %s•%s Bzip3 %s(high compression, medium speed)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
printf(" %s•%s Brotli %s(good compression, medium speed)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
printf(" %s•%s Zstd %s(good compression, fast)%s\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
printf(" %s•%s Zstd+Dict %s(better compression with trained dictionary)%s\n\n", clr(ANSI_GREEN), clr(ANSI_RESET), clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
return 1;
|
||||
}
|
||||
|
||||
const char *input_path = argv[1];
|
||||
|
||||
// Open and analyze input image
|
||||
print_section_header("Image Analysis");
|
||||
|
||||
printf(" %sOpening:%s %s%s%s\n", clr(ANSI_DIM), clr(ANSI_RESET), clr(ANSI_WHITE), input_path, clr(ANSI_RESET));
|
||||
|
||||
image_info info;
|
||||
if(open_image(input_path, &info) != 0)
|
||||
{
|
||||
printf("\n %s✗ Failed to open image%s\n", clr(ANSI_RED), clr(ANSI_RESET));
|
||||
return 1;
|
||||
}
|
||||
|
||||
printf(" %s✓ Image opened successfully%s\n\n", clr(ANSI_GREEN), clr(ANSI_RESET));
|
||||
|
||||
char size_str[64];
|
||||
format_bytes(info.total_uncompressed_size, size_str, sizeof(size_str));
|
||||
|
||||
char version_str[32];
|
||||
snprintf(version_str, sizeof(version_str), "%u.%u", info.major_version, info.minor_version);
|
||||
print_info("Format Version:", version_str);
|
||||
print_info_num("Block Count:", info.block_count);
|
||||
print_info("Total Size:", size_str);
|
||||
|
||||
// ===== ZSTD DICTIONARY TRAINING PHASE (runs once for all algorithms) =====
|
||||
zstd_dict_context *dict_ctx = NULL;
|
||||
|
||||
print_section_header("Zstd Dictionary Training");
|
||||
|
||||
uint64_t sample_target_size = info.total_uncompressed_size / 10; // 10%
|
||||
if(sample_target_size > 100 * 1024 * 1024) sample_target_size = 100 * 1024 * 1024; // 100MB max
|
||||
|
||||
uint8_t *sample_buffer = malloc(sample_target_size);
|
||||
if(sample_buffer == NULL)
|
||||
{
|
||||
printf(" %s⚠ Cannot allocate sample buffer for dictionary training%s\n", clr(ANSI_YELLOW), clr(ANSI_RESET));
|
||||
}
|
||||
else
|
||||
{
|
||||
uint64_t sample_collected = 0;
|
||||
char target_str[64];
|
||||
format_bytes(sample_target_size, target_str, sizeof(target_str));
|
||||
print_info("Target Sample Size:", target_str);
|
||||
|
||||
// Collect samples from first blocks
|
||||
for(uint64_t i = 0; i < info.block_count && sample_collected < sample_target_size; i++)
|
||||
{
|
||||
IndexEntry *entry = (IndexEntry *)info.index_entries + i;
|
||||
long block_start = entry->offset;
|
||||
|
||||
if(fseek(info.file, block_start, SEEK_SET) != 0) continue;
|
||||
|
||||
uint32_t identifier;
|
||||
if(fread(&identifier, 1, sizeof(uint32_t), info.file) != sizeof(uint32_t)) continue;
|
||||
|
||||
// Collect from benchmarkable blocks
|
||||
if(identifier == 0x4B4C4244 || // DataBlock
|
||||
identifier == 0x2A544444 || // DeDuplicationTable (v1)
|
||||
identifier == 0x32544444 || // DeDuplicationTable2 (v2)
|
||||
identifier == 0x4C505344) // DataStreamPayloadBlock
|
||||
{
|
||||
// Read the full header based on type to get cmpLength and length
|
||||
uint16_t compression;
|
||||
uint64_t cmpLength, length;
|
||||
|
||||
fseek(info.file, block_start, SEEK_SET);
|
||||
|
||||
if(identifier == 0x4B4C4244) // DataBlock
|
||||
{
|
||||
BlockHeader block_header;
|
||||
if(fread(&block_header, 1, sizeof(BlockHeader), info.file) != sizeof(BlockHeader)) continue;
|
||||
compression = block_header.compression;
|
||||
cmpLength = block_header.cmpLength;
|
||||
length = block_header.length;
|
||||
}
|
||||
else if(identifier == 0x2A544444) // DDT v1
|
||||
{
|
||||
DdtHeader ddt_header;
|
||||
if(fread(&ddt_header, 1, sizeof(DdtHeader), info.file) != sizeof(DdtHeader)) continue;
|
||||
compression = ddt_header.compression;
|
||||
cmpLength = ddt_header.cmpLength;
|
||||
length = ddt_header.length;
|
||||
}
|
||||
else if(identifier == 0x32544444) // DDT v2
|
||||
{
|
||||
DdtHeader2 ddt_header2;
|
||||
if(fread(&ddt_header2, 1, sizeof(DdtHeader2), info.file) != sizeof(DdtHeader2)) continue;
|
||||
compression = ddt_header2.compression;
|
||||
cmpLength = ddt_header2.cmpLength;
|
||||
length = ddt_header2.length;
|
||||
}
|
||||
else // DataStreamPayloadBlock (0x4C505344)
|
||||
{
|
||||
DataStreamPayloadHeader payload_header;
|
||||
if(fread(&payload_header, 1, sizeof(DataStreamPayloadHeader), info.file) !=
|
||||
sizeof(DataStreamPayloadHeader))
|
||||
continue;
|
||||
compression = payload_header.compression;
|
||||
cmpLength = payload_header.cmpLength;
|
||||
length = payload_header.length;
|
||||
}
|
||||
|
||||
// Skip empty blocks
|
||||
if(length == 0) continue;
|
||||
|
||||
// Allocate buffer for uncompressed data
|
||||
uint8_t *uncompressed = malloc(length);
|
||||
if(uncompressed == NULL) continue;
|
||||
|
||||
// Decompress data if needed
|
||||
if(compression == 1) // LZMA
|
||||
{
|
||||
// Read LZMA properties
|
||||
uint8_t lzma_props[LZMA_PROPERTIES_LENGTH];
|
||||
if(fread(lzma_props, 1, LZMA_PROPERTIES_LENGTH, info.file) != LZMA_PROPERTIES_LENGTH)
|
||||
{
|
||||
free(uncompressed);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Read compressed data
|
||||
const size_t compressed_size = cmpLength - LZMA_PROPERTIES_LENGTH;
|
||||
uint8_t *compressed = malloc(compressed_size);
|
||||
if(compressed == NULL)
|
||||
{
|
||||
free(uncompressed);
|
||||
continue;
|
||||
}
|
||||
|
||||
if(fread(compressed, 1, compressed_size, info.file) != compressed_size)
|
||||
{
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Decompress
|
||||
size_t decompressed_size = length;
|
||||
size_t cmp_size = compressed_size;
|
||||
if(aaruf_lzma_decode_buffer(uncompressed, &decompressed_size, compressed, &cmp_size, lzma_props,
|
||||
LZMA_PROPERTIES_LENGTH) != 0)
|
||||
{
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
continue;
|
||||
}
|
||||
|
||||
free(compressed);
|
||||
}
|
||||
else if(compression == 0) // Uncompressed
|
||||
{
|
||||
if(fread(uncompressed, 1, length, info.file) != length)
|
||||
{
|
||||
free(uncompressed);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else // Skip other compression types
|
||||
{
|
||||
free(uncompressed);
|
||||
continue;
|
||||
}
|
||||
|
||||
// Add to sample buffer
|
||||
uint64_t to_copy =
|
||||
(sample_collected + length > sample_target_size) ? (sample_target_size - sample_collected) : length;
|
||||
if(to_copy > 0)
|
||||
{
|
||||
memcpy(sample_buffer + sample_collected, uncompressed, to_copy);
|
||||
sample_collected += to_copy;
|
||||
}
|
||||
|
||||
free(uncompressed);
|
||||
}
|
||||
}
|
||||
|
||||
char collected_str[64], dict_size_str[64];
|
||||
format_bytes(sample_collected, collected_str, sizeof(collected_str));
|
||||
print_info("Collected:", collected_str);
|
||||
|
||||
if(sample_collected > 0)
|
||||
{
|
||||
printf(" %sTraining...%s", clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
fflush(stdout);
|
||||
|
||||
dict_ctx = train_zstd_dictionary(sample_buffer, sample_collected, 512 * 1024); // 512KB dict
|
||||
|
||||
printf("\r%s", clr(ANSI_CLEAR_LINE)); // Clear the "Training..." line
|
||||
|
||||
if(dict_ctx != NULL)
|
||||
{
|
||||
format_bytes(dict_ctx->dict_size, dict_size_str, sizeof(dict_size_str));
|
||||
print_info("Dictionary Size:", dict_size_str);
|
||||
printf(" %s✓ Dictionary trained successfully%s\n", clr(ANSI_GREEN), clr(ANSI_RESET));
|
||||
}
|
||||
else
|
||||
{
|
||||
printf(" %s⚠ Dictionary training failed%s\n", clr(ANSI_YELLOW), clr(ANSI_RESET));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
printf(" %s⚠ No samples collected for dictionary training%s\n", clr(ANSI_YELLOW), clr(ANSI_RESET));
|
||||
}
|
||||
|
||||
free(sample_buffer);
|
||||
}
|
||||
// ===== END DICTIONARY TRAINING =====
|
||||
|
||||
// Test each compression algorithm
|
||||
const compression_algorithm algorithms[] = {COMP_LZMA, COMP_BZIP3, COMP_BROTLI, COMP_ZSTD, COMP_ZSTD};
|
||||
const char *algorithm_names[] = {"LZMA", "Bzip3", "Brotli", "Zstd (no dict)", "Zstd (with dict)"};
|
||||
const size_t algorithm_count = sizeof(algorithms) / sizeof(algorithms[0]);
|
||||
|
||||
benchmark_result results[5];
|
||||
|
||||
print_section_header("Compression Benchmarks");
|
||||
|
||||
for(size_t i = 0; i < algorithm_count; i++)
|
||||
{
|
||||
print_subsection_header(algorithm_names[i]);
|
||||
|
||||
char output_path[512];
|
||||
snprintf(output_path, sizeof(output_path), "%s.%s.aaruformat", input_path, algorithm_names[i]);
|
||||
|
||||
const uint64_t start_time = get_time_ns();
|
||||
|
||||
progress_state progress = {0, info.block_count, {0}};
|
||||
snprintf(progress.label, sizeof(progress.label), "Compressing");
|
||||
|
||||
// Pass dictionary only for fifth run (Zstd with dict)
|
||||
const zstd_dict_context *use_dict = (i == 4) ? dict_ctx : NULL;
|
||||
|
||||
if(benchmark_compression(input_path, output_path, algorithms[i], &info, &results[i], &progress, print_progress,
|
||||
use_dict) != 0)
|
||||
{
|
||||
printf(" %s✗ Failed to benchmark%s\n", clr(ANSI_RED), clr(ANSI_RESET));
|
||||
if(dict_ctx) free_zstd_dictionary(dict_ctx);
|
||||
close_image(&info);
|
||||
return 1;
|
||||
}
|
||||
|
||||
results[i].elapsed_ns = get_time_ns() - start_time;
|
||||
|
||||
// Print results for this algorithm
|
||||
char compressed_str[64];
|
||||
format_bytes(results[i].compressed_size, compressed_str, sizeof(compressed_str));
|
||||
double ratio = (double)results[i].compressed_size / (double)info.total_uncompressed_size * 100.0;
|
||||
|
||||
printf(" %sTime:%s %s%.2f%s seconds\n", clr(ANSI_DIM), clr(ANSI_RESET),
|
||||
clr(ANSI_WHITE), ns_to_seconds(results[i].elapsed_ns), clr(ANSI_RESET));
|
||||
printf(" %sCompressed:%s %s%s%s\n", clr(ANSI_DIM), clr(ANSI_RESET),
|
||||
clr(ANSI_WHITE), compressed_str, clr(ANSI_RESET));
|
||||
printf(" %sRatio:%s %s%s%.2f%%%s\n\n", clr(ANSI_DIM), clr(ANSI_RESET),
|
||||
clr(ANSI_BOLD), clr(get_ratio_color(ratio)), ratio, clr(ANSI_RESET));
|
||||
}
|
||||
|
||||
// Cleanup dictionary
|
||||
if(dict_ctx != NULL) free_zstd_dictionary(dict_ctx);
|
||||
|
||||
// Print summary table
|
||||
print_section_header("Results Summary");
|
||||
|
||||
// Table header
|
||||
printf(" %s%-16s %20s %20s %12s %10s%s\n", clr(ANSI_BOLD),
|
||||
"Algorithm", "Uncompressed (B)", "Compressed (B)", "Ratio", "Time (s)", clr(ANSI_RESET));
|
||||
printf(" %s────────────────────────────────────────────────────────────────────────────────%s\n",
|
||||
clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
|
||||
for(size_t i = 0; i < algorithm_count; i++)
|
||||
{
|
||||
const double ratio = (double)results[i].compressed_size / (double)info.total_uncompressed_size * 100.0;
|
||||
const double time_s = ns_to_seconds(results[i].elapsed_ns);
|
||||
|
||||
printf(" %s%-16s%s %'20" PRIu64 " %'20" PRIu64 " %s%s%11.2f%%%s %10.2f\n",
|
||||
clr(ANSI_WHITE), algorithm_names[i], clr(ANSI_RESET),
|
||||
info.total_uncompressed_size, results[i].compressed_size,
|
||||
clr(ANSI_BOLD), clr(get_ratio_color(ratio)), ratio, clr(ANSI_RESET), time_s);
|
||||
}
|
||||
|
||||
// Find best compression
|
||||
size_t best_compression_idx = 0;
|
||||
uint64_t best_compressed_size = results[0].compressed_size;
|
||||
for(size_t i = 1; i < algorithm_count; i++)
|
||||
{
|
||||
if(results[i].compressed_size < best_compressed_size)
|
||||
{
|
||||
best_compressed_size = results[i].compressed_size;
|
||||
best_compression_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
// Find fastest
|
||||
size_t fastest_idx = 0;
|
||||
uint64_t fastest_time = results[0].elapsed_ns;
|
||||
for(size_t i = 1; i < algorithm_count; i++)
|
||||
{
|
||||
if(results[i].elapsed_ns < fastest_time)
|
||||
{
|
||||
fastest_time = results[i].elapsed_ns;
|
||||
fastest_idx = i;
|
||||
}
|
||||
}
|
||||
|
||||
printf("\n %s────────────────────────────────────────────────────────────────────────────────%s\n",
|
||||
clr(ANSI_DIM), clr(ANSI_RESET));
|
||||
printf(" %s🏆 Best Compression:%s %s%s%s\n", clr(ANSI_BOLD), clr(ANSI_RESET),
|
||||
clr(ANSI_GREEN), algorithm_names[best_compression_idx], clr(ANSI_RESET));
|
||||
printf(" %s⚡ Fastest:%s %s%s%s\n\n", clr(ANSI_BOLD), clr(ANSI_RESET),
|
||||
clr(ANSI_CYAN), algorithm_names[fastest_idx], clr(ANSI_RESET));
|
||||
|
||||
close_image(&info);
|
||||
return 0;
|
||||
}
|
||||
80
benchmark/benchmark.h
Normal file
80
benchmark/benchmark.h
Normal file
@@ -0,0 +1,80 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2026 Natalia Portillo.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef BENCHMARK_H
|
||||
#define BENCHMARK_H
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// Zstd dictionary context
|
||||
typedef struct
|
||||
{
|
||||
uint8_t *dict_data;
|
||||
size_t dict_size;
|
||||
uint32_t dict_id;
|
||||
} zstd_dict_context;
|
||||
|
||||
// Compression algorithm identifiers
|
||||
typedef enum
|
||||
{
|
||||
COMP_LZMA = 0,
|
||||
COMP_BZIP3 = 1,
|
||||
COMP_ZSTD = 2,
|
||||
COMP_BROTLI = 3
|
||||
} compression_algorithm;
|
||||
|
||||
// Image information structure
|
||||
typedef struct
|
||||
{
|
||||
FILE *file;
|
||||
uint8_t major_version;
|
||||
uint8_t minor_version;
|
||||
uint64_t index_offset;
|
||||
uint64_t block_count;
|
||||
uint64_t total_uncompressed_size;
|
||||
void *index_entries; // Array of IndexEntry
|
||||
} image_info;
|
||||
|
||||
// Benchmark result structure
|
||||
typedef struct
|
||||
{
|
||||
uint64_t compressed_size;
|
||||
uint64_t elapsed_ns;
|
||||
} benchmark_result;
|
||||
|
||||
// Progress tracking structure
|
||||
typedef struct
|
||||
{
|
||||
uint64_t current;
|
||||
uint64_t total;
|
||||
char label[256];
|
||||
} progress_state;
|
||||
|
||||
// Progress callback type
|
||||
typedef void (*progress_callback)(const progress_state *state);
|
||||
|
||||
// Function declarations
|
||||
int open_image(const char *path, image_info *info);
|
||||
void close_image(image_info *info);
|
||||
int benchmark_compression(const char *input_path, const char *output_path, compression_algorithm algorithm,
|
||||
image_info *info, benchmark_result *result, progress_state *progress,
|
||||
progress_callback progress_cb, const zstd_dict_context *dict_ctx);
|
||||
|
||||
#endif // BENCHMARK_H
|
||||
380
benchmark/compression.c
Normal file
380
benchmark/compression.c
Normal file
@@ -0,0 +1,380 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2026 Natalia Portillo.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include "compression.h"
|
||||
#include <aaruformat/consts.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef HAVE_ZSTD
|
||||
#include <zdict.h>
|
||||
#include <zstd.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_BZ3
|
||||
#include <libbz3.h>
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_BROTLI
|
||||
#include <brotli/encode.h>
|
||||
#include <brotli/decode.h>
|
||||
#endif
|
||||
|
||||
|
||||
// LZMA compression from library
|
||||
extern int32_t aaruf_lzma_encode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer,
|
||||
size_t src_len, uint8_t *out_props, size_t *out_props_size, int32_t level,
|
||||
uint32_t dict_size, int32_t lc, int32_t lp, int32_t pb, int32_t fb,
|
||||
int32_t num_threads);
|
||||
|
||||
// Compress data using LZMA
|
||||
static int compress_lzma(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
|
||||
{
|
||||
// Allocate output buffer (input size * 2 + margin)
|
||||
const size_t max_output_size = input_size * 2 + 65536;
|
||||
uint8_t *buffer = malloc(max_output_size);
|
||||
if(buffer == NULL) return -1;
|
||||
|
||||
uint8_t props[LZMA_PROPERTIES_LENGTH];
|
||||
size_t props_size = LZMA_PROPERTIES_LENGTH;
|
||||
size_t cmp_size = max_output_size;
|
||||
|
||||
// Compress (level 9, 1MB dictionary, standard parameters)
|
||||
if(aaruf_lzma_encode_buffer(buffer, &cmp_size, input, input_size, props, &props_size, 9, 33554432, 4, 0, 2, 273,
|
||||
8) != 0)
|
||||
{
|
||||
free(buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Allocate final buffer with properties prepended
|
||||
*output_size = cmp_size + LZMA_PROPERTIES_LENGTH;
|
||||
*output = malloc(*output_size);
|
||||
if(*output == NULL)
|
||||
{
|
||||
free(buffer);
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(*output, props, LZMA_PROPERTIES_LENGTH);
|
||||
memcpy(*output + LZMA_PROPERTIES_LENGTH, buffer, cmp_size);
|
||||
|
||||
free(buffer);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Compress data using Bzip3
|
||||
static int compress_bzip3(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
|
||||
{
|
||||
#ifdef HAVE_BZ3
|
||||
// Determine block size (16MiB for best compression)
|
||||
const int32_t block_size = 16 * 1024 * 1024;
|
||||
|
||||
// Create bzip3 state
|
||||
struct bz3_state *state = bz3_new(block_size);
|
||||
if(state == NULL) return -1;
|
||||
|
||||
// Calculate max output size - bz3_encode_block compresses in-place
|
||||
// so we need a buffer that can hold the original data initially
|
||||
const size_t max_output_size = bz3_bound(input_size);
|
||||
*output = malloc(max_output_size);
|
||||
if(*output == NULL)
|
||||
{
|
||||
bz3_free(state);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Copy input to output buffer (bz3_encode_block works in-place)
|
||||
memcpy(*output, input, input_size);
|
||||
|
||||
// Compress in-place (returns compressed size or negative on error)
|
||||
const int32_t result = bz3_encode_block(state, *output, input_size);
|
||||
bz3_free(state);
|
||||
|
||||
if(result < 0)
|
||||
{
|
||||
free(*output);
|
||||
*output = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*output_size = result;
|
||||
return 0;
|
||||
#else
|
||||
// Bzip3 not available
|
||||
(void)input;
|
||||
(void)input_size;
|
||||
*output = NULL;
|
||||
*output_size = 0;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Compress data using Zstd
|
||||
static int compress_zstd(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
|
||||
{
|
||||
#ifdef HAVE_ZSTD
|
||||
// Calculate max output size
|
||||
const size_t max_output_size = ZSTD_compressBound(input_size);
|
||||
*output = malloc(max_output_size);
|
||||
if(*output == NULL) return -1;
|
||||
|
||||
// Compress with level 19 (max compression)
|
||||
const size_t result = ZSTD_compress(*output, max_output_size, input, input_size, 19);
|
||||
|
||||
if(ZSTD_isError(result))
|
||||
{
|
||||
free(*output);
|
||||
*output = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*output_size = result;
|
||||
return 0;
|
||||
#else
|
||||
// Zstd not available
|
||||
(void)input;
|
||||
(void)input_size;
|
||||
*output = NULL;
|
||||
*output_size = 0;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Compress data using Brotli
|
||||
// Using quality 9 and window size 22 (4MB) for good compression with reasonable speed
|
||||
// Quality 11 is extremely slow; quality 9 provides similar ratio but much faster
|
||||
static int compress_brotli(const uint8_t *input, const size_t input_size, uint8_t **output, size_t *output_size)
|
||||
{
|
||||
#ifdef HAVE_BROTLI
|
||||
// Calculate max output size
|
||||
const size_t max_output_size = BrotliEncoderMaxCompressedSize(input_size);
|
||||
if(max_output_size == 0)
|
||||
{
|
||||
// Input too large, use a conservative estimate
|
||||
*output = malloc(input_size + (input_size >> 2) + 10240);
|
||||
}
|
||||
else
|
||||
{
|
||||
*output = malloc(max_output_size);
|
||||
}
|
||||
if(*output == NULL) return -1;
|
||||
|
||||
size_t encoded_size = max_output_size ? max_output_size : (input_size + (input_size >> 2) + 10240);
|
||||
|
||||
// Compress with quality 8 (high compression but not max)
|
||||
// Window size 22 = 4MB window (2^22 bytes)
|
||||
// Quality 9 is ~5-10x faster than quality 11 with minimal compression loss
|
||||
const BROTLI_BOOL result = BrotliEncoderCompress(
|
||||
8, // quality 8 (high but not max - much faster)
|
||||
22, // lgwin = 22 (4MB window)
|
||||
BROTLI_DEFAULT_MODE, // generic mode
|
||||
input_size,
|
||||
input,
|
||||
&encoded_size,
|
||||
*output
|
||||
);
|
||||
|
||||
if(result != BROTLI_TRUE)
|
||||
{
|
||||
free(*output);
|
||||
*output = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*output_size = encoded_size;
|
||||
return 0;
|
||||
#else
|
||||
// Brotli not available
|
||||
(void)input;
|
||||
(void)input_size;
|
||||
*output = NULL;
|
||||
*output_size = 0;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
// Main compression function
|
||||
int compress_data(const compression_algorithm algorithm, const uint8_t *input, const size_t input_size,
|
||||
uint8_t **output, size_t *output_size)
|
||||
{
|
||||
switch(algorithm)
|
||||
{
|
||||
case COMP_LZMA:
|
||||
return compress_lzma(input, input_size, output, output_size);
|
||||
case COMP_BZIP3:
|
||||
return compress_bzip3(input, input_size, output, output_size);
|
||||
case COMP_ZSTD:
|
||||
return compress_zstd(input, input_size, output, output_size);
|
||||
case COMP_BROTLI:
|
||||
return compress_brotli(input, input_size, output, output_size);
|
||||
default:
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// Get compression type for header
|
||||
int get_compression_type(const compression_algorithm algorithm)
|
||||
{
|
||||
switch(algorithm)
|
||||
{
|
||||
case COMP_LZMA:
|
||||
return 1; // LZMA
|
||||
case COMP_BZIP3:
|
||||
return 100; // Custom identifier for bzip3
|
||||
case COMP_ZSTD:
|
||||
return 101; // Custom identifier for zstd
|
||||
case COMP_BROTLI:
|
||||
return 102; // Custom identifier for brotli
|
||||
default:
|
||||
return 0; // None
|
||||
}
|
||||
}
|
||||
|
||||
// Train a Zstd dictionary from samples
|
||||
zstd_dict_context *train_zstd_dictionary(const uint8_t *sample_data, size_t sample_size, size_t dict_size)
|
||||
{
|
||||
#ifdef HAVE_ZSTD
|
||||
if(sample_data == NULL || sample_size == 0) return NULL;
|
||||
|
||||
zstd_dict_context *ctx = malloc(sizeof(zstd_dict_context));
|
||||
if(ctx == NULL) return NULL;
|
||||
|
||||
// Allocate dictionary buffer
|
||||
ctx->dict_data = malloc(dict_size);
|
||||
if(ctx->dict_data == NULL)
|
||||
{
|
||||
free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Train dictionary using Zstd's ZDICT_trainFromBuffer
|
||||
// This analyzes the sample data and creates an optimized dictionary
|
||||
// We need to split the sample into multiple samples for proper training
|
||||
|
||||
// ZDICT has internal constraints on maximum sample size
|
||||
// Split large samples into chunks to work around this
|
||||
// Use reasonable chunk size (e.g., 2MB per sample)
|
||||
const size_t max_sample_size = 2 * 1024 * 1024; // 2MB chunks
|
||||
const size_t num_samples = (sample_size + max_sample_size - 1) / max_sample_size;
|
||||
|
||||
// Allocate array for sample sizes
|
||||
size_t *sample_sizes = malloc(num_samples * sizeof(size_t));
|
||||
if(sample_sizes == NULL)
|
||||
{
|
||||
free(ctx->dict_data);
|
||||
free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Calculate size for each sample
|
||||
size_t remaining = sample_size;
|
||||
for(size_t i = 0; i < num_samples; i++)
|
||||
{
|
||||
sample_sizes[i] = (remaining > max_sample_size) ? max_sample_size : remaining;
|
||||
remaining -= sample_sizes[i];
|
||||
}
|
||||
|
||||
// Use standard ZDICT_trainFromBuffer
|
||||
size_t trained_size = ZDICT_trainFromBuffer(ctx->dict_data, dict_size, sample_data, sample_sizes, num_samples);
|
||||
|
||||
free(sample_sizes);
|
||||
|
||||
if(ZDICT_isError(trained_size))
|
||||
{
|
||||
free(ctx->dict_data);
|
||||
free(ctx);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ctx->dict_size = trained_size;
|
||||
|
||||
// Get dictionary ID
|
||||
ctx->dict_id = ZSTD_getDictID_fromDict(ctx->dict_data, ctx->dict_size);
|
||||
if(ctx->dict_id == 0)
|
||||
{
|
||||
ctx->dict_id = 0x12345678; // Fallback ID
|
||||
}
|
||||
|
||||
return ctx;
|
||||
#else
|
||||
(void)sample_data;
|
||||
(void)sample_size;
|
||||
(void)dict_size;
|
||||
return NULL;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Free dictionary context
|
||||
void free_zstd_dictionary(zstd_dict_context *dict_ctx)
|
||||
{
|
||||
if(dict_ctx == NULL) return;
|
||||
if(dict_ctx->dict_data) free(dict_ctx->dict_data);
|
||||
free(dict_ctx);
|
||||
}
|
||||
|
||||
// Compress data using Zstd with custom dictionary
|
||||
int compress_data_zstd_dict(const uint8_t *input, size_t input_size, uint8_t **output, size_t *output_size,
|
||||
const zstd_dict_context *dict_ctx)
|
||||
{
|
||||
#ifdef HAVE_ZSTD
|
||||
if(dict_ctx == NULL || dict_ctx->dict_data == NULL) return -1;
|
||||
|
||||
// Calculate max output size
|
||||
const size_t max_output_size = ZSTD_compressBound(input_size);
|
||||
*output = malloc(max_output_size);
|
||||
if(*output == NULL) return -1;
|
||||
|
||||
// Create compression context with dictionary
|
||||
ZSTD_CCtx *cctx = ZSTD_createCCtx();
|
||||
if(cctx == NULL)
|
||||
{
|
||||
free(*output);
|
||||
*output = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Use the simpler ZSTD_compress_usingDict which is optimized for dictionary compression
|
||||
// This is more efficient than ZSTD_compress2 with loadDictionary
|
||||
size_t result = ZSTD_compress_usingDict(cctx, *output, max_output_size, input, input_size, dict_ctx->dict_data,
|
||||
dict_ctx->dict_size,
|
||||
19); // Compression level 19
|
||||
|
||||
ZSTD_freeCCtx(cctx);
|
||||
|
||||
if(ZSTD_isError(result))
|
||||
{
|
||||
free(*output);
|
||||
*output = NULL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
*output_size = result;
|
||||
return 0;
|
||||
#else
|
||||
(void)input;
|
||||
(void)input_size;
|
||||
(void)output;
|
||||
(void)output_size;
|
||||
(void)dict_ctx;
|
||||
return -1;
|
||||
#endif
|
||||
}
|
||||
51
benchmark/compression.h
Normal file
51
benchmark/compression.h
Normal file
@@ -0,0 +1,51 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2026 Natalia Portillo.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#ifndef COMPRESSION_H
|
||||
#define COMPRESSION_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "benchmark.h"
|
||||
|
||||
// Compress data using specified algorithm
|
||||
// Returns 0 on success, -1 on failure
|
||||
// Caller must free output buffer
|
||||
int compress_data(compression_algorithm algorithm, const uint8_t *input, size_t input_size, uint8_t **output,
|
||||
size_t *output_size);
|
||||
|
||||
// Compress data using Zstd with custom dictionary
|
||||
// Returns 0 on success, -1 on failure
|
||||
int compress_data_zstd_dict(const uint8_t *input, size_t input_size, uint8_t **output, size_t *output_size,
|
||||
const zstd_dict_context *dict_ctx);
|
||||
|
||||
// Train a Zstd dictionary from samples
|
||||
// sample_data: concatenated uncompressed data from multiple blocks
|
||||
// sample_size: total size of sample data
|
||||
// dict_size: desired dictionary size (typically 16KB)
|
||||
// Returns dictionary context on success, NULL on failure
|
||||
zstd_dict_context *train_zstd_dictionary(const uint8_t *sample_data, size_t sample_size, size_t dict_size);
|
||||
|
||||
// Free dictionary context
|
||||
void free_zstd_dictionary(zstd_dict_context *dict_ctx);
|
||||
|
||||
// Get compression type identifier for block header
|
||||
int get_compression_type(compression_algorithm algorithm);
|
||||
|
||||
#endif // COMPRESSION_H
|
||||
783
benchmark/compression_benchmark.c
Normal file
783
benchmark/compression_benchmark.c
Normal file
@@ -0,0 +1,783 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2026 Natalia Portillo.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <aaruformat/consts.h>
|
||||
#include <aaruformat/structs/data.h>
|
||||
#include <aaruformat/structs/ddt.h>
|
||||
#include <aaruformat/structs/flux.h>
|
||||
#include <aaruformat/structs/header.h>
|
||||
#include <aaruformat/structs/index.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <sys/stat.h>
|
||||
#include "benchmark.h"
|
||||
#include "compression.h"
|
||||
|
||||
// CRC64 implementation (from library)
|
||||
extern uint64_t aaruf_crc64_data(const uint8_t *data, size_t length);
|
||||
|
||||
// LZMA decompression (from library)
|
||||
extern int32_t aaruf_lzma_decode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer,
|
||||
size_t *src_len, const uint8_t *props, const size_t props_size);
|
||||
|
||||
// FLAC decompression (from library)
|
||||
extern size_t aaruf_flac_decode_redbook_buffer(uint8_t *dst_buffer, size_t dst_size, const uint8_t *src_buffer,
|
||||
size_t src_size);
|
||||
|
||||
// CST (Claunia Subchannel Transform) functions (from library)
|
||||
extern int32_t aaruf_cst_transform(const uint8_t *interleaved, uint8_t *sequential, size_t length);
|
||||
extern int32_t aaruf_cst_untransform(const uint8_t *sequential, uint8_t *interleaved, size_t length);
|
||||
|
||||
// Benchmark compression algorithm on an image
|
||||
int benchmark_compression(const char *input_path, const char *output_path, const compression_algorithm algorithm,
|
||||
image_info *info, benchmark_result *result, progress_state *progress,
|
||||
progress_callback progress_cb, const zstd_dict_context *dict_ctx)
|
||||
{
|
||||
memset(result, 0, sizeof(benchmark_result));
|
||||
|
||||
// Open output file
|
||||
FILE *output = fopen(output_path, "wb");
|
||||
if(output == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot create output file %s\n", output_path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Read original header from input
|
||||
if(fseek(info->file, 0, SEEK_SET) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot seek to header\n");
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
AaruHeaderV2 header;
|
||||
if(fread(&header, 1, sizeof(AaruHeaderV2), info->file) != sizeof(AaruHeaderV2))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read header\n");
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Write placeholder header (will update later)
|
||||
if(fwrite(&header, 1, sizeof(AaruHeaderV2), output) != sizeof(AaruHeaderV2))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write header\n");
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Prepare new index entries
|
||||
IndexEntry *old_entries = (IndexEntry *)info->index_entries;
|
||||
IndexEntry *new_entries = malloc(info->block_count * sizeof(IndexEntry));
|
||||
if(new_entries == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate memory for new index entries\n");
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(new_entries, old_entries, info->block_count * sizeof(IndexEntry));
|
||||
|
||||
// Process each block
|
||||
uint64_t current_position = ftell(output);
|
||||
progress->current = 0;
|
||||
|
||||
for(uint64_t i = 0; i < info->block_count; i++)
|
||||
{
|
||||
IndexEntry *entry = &old_entries[i];
|
||||
|
||||
// Seek to block
|
||||
if(fseek(info->file, entry->offset, SEEK_SET) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot seek to block %" PRIu64 "\n", i);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Read block identifier to determine type
|
||||
uint32_t identifier;
|
||||
long block_start = ftell(info->file);
|
||||
if(fread(&identifier, 1, sizeof(uint32_t), info->file) != sizeof(uint32_t))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read block identifier %" PRIu64 "\n", i);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
fseek(info->file, block_start, SEEK_SET); // Rewind
|
||||
|
||||
// Calculate block size by looking at next entry or EOF
|
||||
size_t block_size;
|
||||
if(i + 1 < info->block_count) { block_size = old_entries[i + 1].offset - entry->offset; }
|
||||
else
|
||||
{
|
||||
// Last block - read to EOF (excluding index)
|
||||
block_size = info->index_offset - entry->offset;
|
||||
}
|
||||
|
||||
// Process blocks with compression: DataBlock, DDT (v1/v2), DataStreamPayload
|
||||
if(identifier == 0x4B4C4244 || // DataBlock
|
||||
identifier == 0x2A544444 || // DeDuplicationTable (v1)
|
||||
identifier == 0x32544444 || // DeDuplicationTable2 (v2)
|
||||
identifier == 0x4C505344) // DataStreamPayloadBlock
|
||||
{
|
||||
// These blocks all share: identifier(4), type(2), compression(2), then data
|
||||
// Read the common header fields
|
||||
uint16_t type, compression;
|
||||
fseek(info->file, block_start + 4, SEEK_SET);
|
||||
if(fread(&type, 1, sizeof(uint16_t), info->file) != sizeof(uint16_t) ||
|
||||
fread(&compression, 1, sizeof(uint16_t), info->file) != sizeof(uint16_t))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read block header fields\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Read the full header based on type to get cmpLength and length
|
||||
uint64_t cmpLength, length, cmpCrc64, crc64;
|
||||
size_t header_size;
|
||||
|
||||
fseek(info->file, block_start, SEEK_SET);
|
||||
|
||||
if(identifier == 0x4B4C4244) // DataBlock
|
||||
{
|
||||
BlockHeader block_header;
|
||||
if(fread(&block_header, 1, sizeof(BlockHeader), info->file) != sizeof(BlockHeader))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read BlockHeader\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
header_size = sizeof(BlockHeader);
|
||||
cmpLength = block_header.cmpLength;
|
||||
length = block_header.length;
|
||||
cmpCrc64 = block_header.cmpCrc64;
|
||||
crc64 = block_header.crc64;
|
||||
}
|
||||
else if(identifier == 0x2A544444) // DDT v1
|
||||
{
|
||||
DdtHeader ddt_header;
|
||||
if(fread(&ddt_header, 1, sizeof(DdtHeader), info->file) != sizeof(DdtHeader))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read DdtHeader\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
header_size = sizeof(DdtHeader);
|
||||
compression = ddt_header.compression;
|
||||
cmpLength = ddt_header.cmpLength;
|
||||
length = ddt_header.length;
|
||||
cmpCrc64 = ddt_header.cmpCrc64;
|
||||
crc64 = ddt_header.crc64;
|
||||
}
|
||||
else // DDT v2 (0x32544444) or DataStreamPayload (0x4C505344)
|
||||
{
|
||||
if(identifier == 0x32544444) // DDT v2
|
||||
{
|
||||
DdtHeader2 ddt_header2;
|
||||
if(fread(&ddt_header2, 1, sizeof(DdtHeader2), info->file) != sizeof(DdtHeader2))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read DdtHeader2\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
header_size = sizeof(DdtHeader2);
|
||||
compression = ddt_header2.compression;
|
||||
cmpLength = ddt_header2.cmpLength;
|
||||
length = ddt_header2.length;
|
||||
cmpCrc64 = ddt_header2.cmpCrc64;
|
||||
crc64 = ddt_header2.crc64;
|
||||
}
|
||||
else // DataStreamPayloadBlock (0x4C505344)
|
||||
{
|
||||
DataStreamPayloadHeader payload_header;
|
||||
if(fread(&payload_header, 1, sizeof(DataStreamPayloadHeader), info->file) !=
|
||||
sizeof(DataStreamPayloadHeader))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read DataStreamPayloadHeader\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
header_size = sizeof(DataStreamPayloadHeader);
|
||||
compression = payload_header.compression;
|
||||
cmpLength = payload_header.cmpLength;
|
||||
length = payload_header.length;
|
||||
cmpCrc64 = payload_header.cmpCrc64;
|
||||
crc64 = payload_header.crc64;
|
||||
}
|
||||
}
|
||||
|
||||
// Allocate buffer for uncompressed data
|
||||
uint8_t *uncompressed = malloc(length);
|
||||
if(uncompressed == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate uncompressed buffer\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Decompress data if needed
|
||||
if(compression == 1) // LZMA
|
||||
{
|
||||
// Read LZMA properties
|
||||
uint8_t lzma_props[LZMA_PROPERTIES_LENGTH];
|
||||
if(fread(lzma_props, 1, LZMA_PROPERTIES_LENGTH, info->file) != LZMA_PROPERTIES_LENGTH)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read LZMA properties\n");
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Read compressed data
|
||||
const size_t compressed_size = cmpLength - LZMA_PROPERTIES_LENGTH;
|
||||
uint8_t *compressed = malloc(compressed_size);
|
||||
if(compressed == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate compressed buffer\n");
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fread(compressed, 1, compressed_size, info->file) != compressed_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read compressed data\n");
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Decompress
|
||||
size_t decompressed_size = length;
|
||||
size_t cmp_size = compressed_size;
|
||||
if(aaruf_lzma_decode_buffer(uncompressed, &decompressed_size, compressed, &cmp_size, lzma_props,
|
||||
LZMA_PROPERTIES_LENGTH) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: LZMA decompression failed for block %" PRIu64 "\n", i);
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(compressed);
|
||||
}
|
||||
else if(compression == 2) // FLAC
|
||||
{
|
||||
// Read FLAC compressed data
|
||||
uint8_t *compressed = malloc(cmpLength);
|
||||
if(compressed == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate compressed buffer for FLAC\n");
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fread(compressed, 1, cmpLength, info->file) != cmpLength)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read FLAC compressed data\n");
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Decompress FLAC (returns bytes written)
|
||||
const size_t decompressed_size =
|
||||
aaruf_flac_decode_redbook_buffer(uncompressed, length, compressed, cmpLength);
|
||||
if(decompressed_size != length)
|
||||
{
|
||||
fprintf(stderr, "Error: FLAC decompression failed for block %" PRIu64 " (expected %zu, got %zu)\n",
|
||||
i, (size_t)length, decompressed_size);
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(compressed);
|
||||
}
|
||||
else if(compression == 3) // LZMA with Claunia Subchannel Transform
|
||||
{
|
||||
// Read LZMA properties
|
||||
uint8_t lzma_props[LZMA_PROPERTIES_LENGTH];
|
||||
if(fread(lzma_props, 1, LZMA_PROPERTIES_LENGTH, info->file) != LZMA_PROPERTIES_LENGTH)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read LZMA properties for LZMA+CST\n");
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Read compressed data
|
||||
const size_t compressed_size = cmpLength - LZMA_PROPERTIES_LENGTH;
|
||||
uint8_t *compressed = malloc(compressed_size);
|
||||
if(compressed == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate compressed buffer for LZMA+CST\n");
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fread(compressed, 1, compressed_size, info->file) != compressed_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read LZMA+CST compressed data\n");
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Decompress LZMA first (into a temporary buffer for CST sequential data)
|
||||
uint8_t *cst_sequential = malloc(length);
|
||||
if(cst_sequential == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate CST sequential buffer\n");
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
size_t decompressed_size = length;
|
||||
size_t cmp_size = compressed_size;
|
||||
if(aaruf_lzma_decode_buffer(cst_sequential, &decompressed_size, compressed, &cmp_size, lzma_props,
|
||||
LZMA_PROPERTIES_LENGTH) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: LZMA decompression failed for LZMA+CST block %" PRIu64 "\n", i);
|
||||
free(cst_sequential);
|
||||
free(compressed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(compressed);
|
||||
|
||||
// Untransform CST (sequential -> interleaved)
|
||||
if(aaruf_cst_untransform(cst_sequential, uncompressed, length) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: CST untransform failed for block %" PRIu64 "\n", i);
|
||||
free(cst_sequential);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(cst_sequential);
|
||||
}
|
||||
else if(compression == 0) // None
|
||||
{
|
||||
// Read uncompressed data directly
|
||||
if(fread(uncompressed, 1, length, info->file) != length)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read uncompressed data\n");
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr,
|
||||
"Warning: Unsupported compression type %u for block %" PRIu64
|
||||
" (supported: 0=None, 1=LZMA, 2=FLAC, 3=LZMA+CST), copying as-is\n",
|
||||
compression, i);
|
||||
free(uncompressed);
|
||||
// Fall through to copy block as-is
|
||||
goto copy_block_asis;
|
||||
}
|
||||
|
||||
// Now recompress with the test algorithm (only for DataBlock and DataStreamPayload)
|
||||
uint8_t *recompressed = NULL;
|
||||
size_t recompressed_size = 0;
|
||||
int new_compression = compression; // Default: keep original
|
||||
bool had_cst = (compression == 3); // Track if original had CST
|
||||
|
||||
// Benchmark DataBlocks, DataStreamPayload, and DDT blocks for compression
|
||||
if((identifier == 0x4B4C4244 || identifier == 0x4C505344 || identifier == 0x2A544444 ||
|
||||
identifier == 0x32544444)) // Include DDT v1 and v2
|
||||
{
|
||||
uint8_t *data_to_compress = uncompressed;
|
||||
size_t data_to_compress_size = length;
|
||||
uint8_t *cst_transformed = NULL;
|
||||
|
||||
// If original had CST, apply CST transform before compressing
|
||||
if(had_cst)
|
||||
{
|
||||
cst_transformed = malloc(length);
|
||||
if(cst_transformed == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate CST transform buffer\n");
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(aaruf_cst_transform(uncompressed, cst_transformed, length) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: CST transform failed for block %" PRIu64 "\n", i);
|
||||
free(cst_transformed);
|
||||
free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
data_to_compress = cst_transformed;
|
||||
}
|
||||
|
||||
// Try test compression algorithm on the (possibly CST-transformed) data
|
||||
// Use Zstd with dictionary if available, otherwise use standard compression
|
||||
if(algorithm == COMP_ZSTD && dict_ctx != NULL)
|
||||
{
|
||||
if(compress_data_zstd_dict(data_to_compress, data_to_compress_size, &recompressed,
|
||||
&recompressed_size, dict_ctx) == 0)
|
||||
{
|
||||
// Check if compression is beneficial
|
||||
if(recompressed_size < data_to_compress_size)
|
||||
{
|
||||
new_compression = had_cst ? 3 : 101; // 101 is zstd identifier
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compression not beneficial, use uncompressed
|
||||
free(recompressed);
|
||||
recompressed = uncompressed;
|
||||
recompressed_size = length;
|
||||
new_compression = 0;
|
||||
uncompressed = NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compression failed, use original compression type
|
||||
recompressed = data_to_compress;
|
||||
recompressed_size = data_to_compress_size;
|
||||
new_compression = compression;
|
||||
|
||||
if(had_cst) { cst_transformed = NULL; }
|
||||
else
|
||||
{
|
||||
uncompressed = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use standard compression algorithm (LZMA, Bzip3, or Zstd without dictionary)
|
||||
if(compress_data(algorithm, data_to_compress, data_to_compress_size, &recompressed,
|
||||
&recompressed_size) == 0)
|
||||
{
|
||||
// Check if compression is beneficial
|
||||
if(recompressed_size < data_to_compress_size)
|
||||
{
|
||||
int base_compression = get_compression_type(algorithm);
|
||||
new_compression = had_cst ? 3 : base_compression;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compression not beneficial, use uncompressed
|
||||
free(recompressed);
|
||||
recompressed = uncompressed;
|
||||
recompressed_size = length;
|
||||
new_compression = 0;
|
||||
uncompressed = NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Compression failed, use original compression type
|
||||
recompressed = data_to_compress;
|
||||
recompressed_size = data_to_compress_size;
|
||||
new_compression = compression;
|
||||
|
||||
if(had_cst) { cst_transformed = NULL; }
|
||||
else
|
||||
{
|
||||
uncompressed = NULL;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up CST buffer if not used
|
||||
if(cst_transformed && cst_transformed != recompressed) free(cst_transformed);
|
||||
// Clean up uncompressed if CST was used and not recompressed
|
||||
if(had_cst && uncompressed && uncompressed != recompressed)
|
||||
{
|
||||
free(uncompressed);
|
||||
uncompressed = NULL;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
// Keep DDT blocks unchanged
|
||||
recompressed = uncompressed;
|
||||
recompressed_size = length;
|
||||
new_compression = compression;
|
||||
uncompressed = NULL;
|
||||
}
|
||||
|
||||
// Write the block back with original header structure
|
||||
new_entries[i].offset = current_position;
|
||||
|
||||
fseek(info->file, block_start, SEEK_SET);
|
||||
uint8_t *header_buffer = malloc(header_size);
|
||||
if(fread(header_buffer, 1, header_size, info->file) != header_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot re-read header\n");
|
||||
free(header_buffer);
|
||||
free(recompressed);
|
||||
if(uncompressed) free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Update compression fields in the header buffer
|
||||
if(identifier == 0x4B4C4244) // DataBlock
|
||||
{
|
||||
BlockHeader *bhdr = (BlockHeader *)header_buffer;
|
||||
bhdr->compression = new_compression;
|
||||
bhdr->cmpLength = recompressed_size;
|
||||
bhdr->cmpCrc64 = aaruf_crc64_data(recompressed, recompressed_size);
|
||||
}
|
||||
else if(identifier == 0x2A544444) // DDT v1
|
||||
{
|
||||
DdtHeader *dhdr = (DdtHeader *)header_buffer;
|
||||
dhdr->compression = new_compression;
|
||||
dhdr->cmpLength = recompressed_size;
|
||||
dhdr->cmpCrc64 = aaruf_crc64_data(recompressed, recompressed_size);
|
||||
}
|
||||
else if(identifier == 0x32544444) // DDT v2
|
||||
{
|
||||
DdtHeader2 *dhdr2 = (DdtHeader2 *)header_buffer;
|
||||
dhdr2->compression = new_compression;
|
||||
dhdr2->cmpLength = recompressed_size;
|
||||
dhdr2->cmpCrc64 = aaruf_crc64_data(recompressed, recompressed_size);
|
||||
}
|
||||
else // DataStreamPayloadBlock (0x4C505344)
|
||||
{
|
||||
DataStreamPayloadHeader *phdr = (DataStreamPayloadHeader *)header_buffer;
|
||||
phdr->compression = new_compression;
|
||||
phdr->cmpLength = recompressed_size;
|
||||
phdr->cmpCrc64 = aaruf_crc64_data(recompressed, recompressed_size);
|
||||
}
|
||||
|
||||
// Write header
|
||||
if(fwrite(header_buffer, 1, header_size, output) != header_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write block header\n");
|
||||
free(header_buffer);
|
||||
free(recompressed);
|
||||
if(uncompressed) free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
free(header_buffer);
|
||||
|
||||
// Write data
|
||||
if(fwrite(recompressed, 1, recompressed_size, output) != recompressed_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write block data\n");
|
||||
free(recompressed);
|
||||
if(uncompressed) free(uncompressed);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
current_position += header_size + recompressed_size;
|
||||
// Count all benchmarked blocks in compressed size (DataBlock, DataStreamPayload, DDT v1/v2)
|
||||
if(identifier == 0x4B4C4244 || identifier == 0x4C505344 || identifier == 0x2A544444 ||
|
||||
identifier == 0x32544444)
|
||||
result->compressed_size += recompressed_size;
|
||||
|
||||
free(recompressed);
|
||||
if(uncompressed) free(uncompressed);
|
||||
}
|
||||
else
|
||||
{
|
||||
copy_block_asis:
|
||||
// Copy all other blocks as-is
|
||||
fseek(info->file, block_start, SEEK_SET);
|
||||
|
||||
uint8_t *block_buffer = malloc(block_size);
|
||||
if(block_buffer == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate block buffer\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fread(block_buffer, 1, block_size, info->file) != block_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read block %" PRIu64 "\n", i);
|
||||
free(block_buffer);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
new_entries[i].offset = current_position;
|
||||
if(fwrite(block_buffer, 1, block_size, output) != block_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write block %" PRIu64 "\n", i);
|
||||
free(block_buffer);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(block_buffer);
|
||||
current_position += block_size;
|
||||
}
|
||||
|
||||
// Update progress
|
||||
progress->current++;
|
||||
if(progress_cb) progress_cb(progress);
|
||||
}
|
||||
|
||||
// ===== WRITE DICTIONARY BLOCK (Zstd only) =====
|
||||
if(algorithm == COMP_ZSTD && dict_ctx != NULL && dict_ctx->dict_data != NULL && dict_ctx->dict_size > 0)
|
||||
{
|
||||
printf("\nWriting Zstd dictionary block: %zu bytes\n", dict_ctx->dict_size);
|
||||
|
||||
// Create a DataBlock with new datatype for dictionary
|
||||
BlockHeader dict_block_header;
|
||||
dict_block_header.identifier = 0x4B4C4244; // DataBlock
|
||||
dict_block_header.type = 99; // Custom datatype for dictionary
|
||||
dict_block_header.compression = 101; // Zstd
|
||||
dict_block_header.sectorSize = 512;
|
||||
dict_block_header.length = dict_ctx->dict_size;
|
||||
dict_block_header.cmpLength = dict_ctx->dict_size; // Not further compressed
|
||||
dict_block_header.crc64 = aaruf_crc64_data(dict_ctx->dict_data, dict_ctx->dict_size);
|
||||
dict_block_header.cmpCrc64 = dict_block_header.crc64;
|
||||
|
||||
// Write dictionary block
|
||||
fseek(output, current_position, SEEK_SET);
|
||||
if(fwrite(&dict_block_header, sizeof(BlockHeader), 1, output) != 1)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write dictionary block header\n");
|
||||
free_zstd_dictionary(dict_ctx);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fwrite(dict_ctx->dict_data, dict_ctx->dict_size, 1, output) != 1)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write dictionary block data\n");
|
||||
free_zstd_dictionary(dict_ctx);
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Include dictionary in result size
|
||||
result->compressed_size += dict_ctx->dict_size;
|
||||
current_position += sizeof(BlockHeader) + dict_ctx->dict_size;
|
||||
printf("Dictionary block written at offset %" PRIu64 "\n",
|
||||
current_position - sizeof(BlockHeader) - dict_ctx->dict_size);
|
||||
}
|
||||
// ===== END DICTIONARY BLOCK =====
|
||||
|
||||
// Write new index (use IndexBlock3 to match modern images)
|
||||
const uint64_t new_index_offset = current_position;
|
||||
|
||||
IndexHeader3 index_header;
|
||||
index_header.identifier = 0x33584449; // IndexBlock3
|
||||
index_header.entries = info->block_count;
|
||||
index_header.crc64 = aaruf_crc64_data((uint8_t *)new_entries, info->block_count * sizeof(IndexEntry));
|
||||
index_header.previous = 0; // No chaining for now
|
||||
|
||||
if(fwrite(&index_header, 1, sizeof(IndexHeader3), output) != sizeof(IndexHeader3))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write index header\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fwrite(new_entries, sizeof(IndexEntry), info->block_count, output) != info->block_count)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot write index entries\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Update header with new index offset
|
||||
header.indexOffset = new_index_offset;
|
||||
if(fseek(output, 0, SEEK_SET) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot seek to header\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fwrite(&header, 1, sizeof(AaruHeaderV2), output) != sizeof(AaruHeaderV2))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot update header\n");
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
return -1;
|
||||
}
|
||||
|
||||
free(new_entries);
|
||||
fclose(output);
|
||||
|
||||
// Get final file size
|
||||
struct stat st;
|
||||
if(stat(output_path, &st) == 0) { result->compressed_size = st.st_size; }
|
||||
|
||||
return 0;
|
||||
}
|
||||
183
benchmark/image_ops.c
Normal file
183
benchmark/image_ops.c
Normal file
@@ -0,0 +1,183 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2026 Natalia Portillo.
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU General Public License
|
||||
* as published by the Free Software Foundation; either version 2
|
||||
* of the License, or (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
*/
|
||||
|
||||
#include <aaruformat/consts.h>
|
||||
#include <aaruformat/structs/data.h>
|
||||
#include <aaruformat/structs/header.h>
|
||||
#include <aaruformat/structs/index.h>
|
||||
#include <inttypes.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "benchmark.h"
|
||||
|
||||
// Open an Aaru format image and read its index
|
||||
int open_image(const char *path, image_info *info)
|
||||
{
|
||||
memset(info, 0, sizeof(image_info));
|
||||
|
||||
info->file = fopen(path, "rb");
|
||||
if(info->file == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot open file %s\n", path);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Read header
|
||||
AaruHeaderV2 header;
|
||||
if(fread(&header, 1, sizeof(AaruHeaderV2), info->file) != sizeof(AaruHeaderV2))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read header\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Check magic
|
||||
if(header.identifier != AARU_MAGIC)
|
||||
{
|
||||
fprintf(stderr, "Error: Invalid Aaru format magic\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
info->major_version = header.imageMajorVersion;
|
||||
info->minor_version = header.imageMinorVersion;
|
||||
info->index_offset = header.indexOffset;
|
||||
|
||||
if(info->index_offset == 0)
|
||||
{
|
||||
fprintf(stderr, "Error: No index in image\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Seek to index
|
||||
if(fseek(info->file, info->index_offset, SEEK_SET) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot seek to index\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Read index header - check which version
|
||||
uint32_t index_identifier;
|
||||
if(fread(&index_identifier, 1, sizeof(uint32_t), info->file) != sizeof(uint32_t))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read index identifier\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Rewind to read full header
|
||||
if(fseek(info->file, info->index_offset, SEEK_SET) != 0)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot rewind to index\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
uint64_t entries_count = 0;
|
||||
|
||||
if(index_identifier == 0x32584449) // IndexBlock2
|
||||
{
|
||||
IndexHeader2 index_header;
|
||||
if(fread(&index_header, 1, sizeof(IndexHeader2), info->file) != sizeof(IndexHeader2))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read IndexHeader2\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
entries_count = index_header.entries;
|
||||
}
|
||||
else if(index_identifier == 0x33584449) // IndexBlock3
|
||||
{
|
||||
IndexHeader3 index_header;
|
||||
if(fread(&index_header, 1, sizeof(IndexHeader3), info->file) != sizeof(IndexHeader3))
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read IndexHeader3\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
entries_count = index_header.entries;
|
||||
|
||||
// TODO: If we need to handle chained indexes (previous field), we would do it here
|
||||
// For now, we just read the main index
|
||||
}
|
||||
else
|
||||
{
|
||||
fprintf(stderr, "Error: Unsupported index version (identifier: 0x%08X)\n", index_identifier);
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
info->block_count = entries_count;
|
||||
|
||||
// Allocate and read index entries
|
||||
const size_t entries_size = info->block_count * sizeof(IndexEntry);
|
||||
info->index_entries = malloc(entries_size);
|
||||
if(info->index_entries == NULL)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot allocate memory for index entries\n");
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
if(fread(info->index_entries, 1, entries_size, info->file) != entries_size)
|
||||
{
|
||||
fprintf(stderr, "Error: Cannot read index entries\n");
|
||||
free(info->index_entries);
|
||||
fclose(info->file);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Calculate total uncompressed size by scanning data blocks
|
||||
info->total_uncompressed_size = 0;
|
||||
IndexEntry *entries = (IndexEntry *)info->index_entries;
|
||||
|
||||
for(uint64_t i = 0; i < info->block_count; i++)
|
||||
{
|
||||
if(entries[i].blockType == 0x4B4C4244) // DataBlock
|
||||
{
|
||||
// Seek to block and read header
|
||||
if(fseek(info->file, entries[i].offset, SEEK_SET) != 0) continue;
|
||||
|
||||
BlockHeader block_header;
|
||||
if(fread(&block_header, 1, sizeof(BlockHeader), info->file) != sizeof(BlockHeader)) continue;
|
||||
|
||||
info->total_uncompressed_size += block_header.length;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Close image
|
||||
void close_image(image_info *info)
|
||||
{
|
||||
if(info->file != NULL)
|
||||
{
|
||||
fclose(info->file);
|
||||
info->file = NULL;
|
||||
}
|
||||
|
||||
if(info->index_entries != NULL)
|
||||
{
|
||||
free(info->index_entries);
|
||||
info->index_entries = NULL;
|
||||
}
|
||||
}
|
||||
14
benchmark/patch_bzip3.cmake
Normal file
14
benchmark/patch_bzip3.cmake
Normal file
@@ -0,0 +1,14 @@
|
||||
# Patch bzip3's CMakeLists.txt to fix CMAKE_PROJECT_NAME issue
|
||||
file(READ "${SOURCE_DIR}/CMakeLists.txt" CONTENT)
|
||||
|
||||
# Replace ${CMAKE_PROJECT_NAME} with bzip3 in config file lines
|
||||
string(REPLACE "\${CMAKE_PROJECT_NAME}-config.cmake" "bzip3-config.cmake" CONTENT "${CONTENT}")
|
||||
string(REPLACE "\${CMAKE_PROJECT_NAME}-targets" "bzip3-targets" CONTENT "${CONTENT}")
|
||||
string(REPLACE "NAMESPACE \${CMAKE_PROJECT_NAME}::" "NAMESPACE bzip3::" CONTENT "${CONTENT}")
|
||||
string(REPLACE "cmake/\${CMAKE_PROJECT_NAME}" "cmake/bzip3" CONTENT "${CONTENT}")
|
||||
|
||||
# Write back
|
||||
file(WRITE "${SOURCE_DIR}/CMakeLists.txt" "${CONTENT}")
|
||||
|
||||
message(STATUS "Patched bzip3 CMakeLists.txt to fix CMAKE_PROJECT_NAME references")
|
||||
|
||||
@@ -419,6 +419,7 @@ AARU_EXPORT void AARU_CALL *aaruf_create(const char *filepath, const uint32_t me
|
||||
ctx->user_data_ddt_header.overflow = overflow_sectors;
|
||||
ctx->user_data_ddt_header.start = 0;
|
||||
ctx->user_data_ddt_header.blockAlignmentShift = parsed_options.block_alignment;
|
||||
ctx->header.blockAlignmentShift = parsed_options.block_alignment;
|
||||
ctx->user_data_ddt_header.dataShift = parsed_options.data_shift;
|
||||
|
||||
if(parsed_options.table_shift == -1 || !table_shift_found)
|
||||
|
||||
@@ -17,8 +17,8 @@
|
||||
*/
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#include <wincrypt.h>
|
||||
#include <windows.h>
|
||||
#include <wincrypt.h>
|
||||
#endif
|
||||
|
||||
#include <aaru.h>
|
||||
|
||||
Reference in New Issue
Block a user