From 2e11b05547954f34132d910207e2c6371a8b1064 Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Sat, 11 Oct 2025 01:26:17 +0100 Subject: [PATCH] Update application name handling to support UTF-8 in version 2 and remove ICU dependency --- CMakeLists.txt | 9 ----- README.md | 3 +- docs/spec/blocks/header.adoc | 2 +- src/create.c | 69 ++++----------------------------- src/open.c | 74 +++++++++--------------------------- 5 files changed, 28 insertions(+), 129 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index d48192c..438049c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -214,19 +214,10 @@ include(3rdparty/lzma.cmake) include(3rdparty/xxhash.cmake) include(3rdparty/blake3.cmake) -# Find and link ICU library for UTF-16LE to UTF-8 conversion -find_package(ICU COMPONENTS uc REQUIRED) - if(TARGET blake3) target_link_libraries(aaruformat blake3) endif() -# Add ICU include directories and link library to the target -if(ICU_FOUND) - target_include_directories(aaruformat PRIVATE ${ICU_INCLUDE_DIRS}) - target_link_libraries(aaruformat ${ICU_LIBRARIES}) -endif() - macro(TARGET_LINK_LIBRARIES_WHOLE_ARCHIVE target) if(MSVC) foreach(lib IN LISTS ARGN) diff --git a/README.md b/README.md index db5981f..f286239 100644 --- a/README.md +++ b/README.md @@ -4,8 +4,7 @@ C implementation of [Aaru](https://www.github.com/aaru-dps/Aaru) file format. The target is to be able to compile it with a normal C (C89 compliant) compiler. -Currently depends on libicu being available thru vcpkg due to UTF-16 shenanigans. -Currently under debate on a breaking ABI change to remove this dependency. +There are no external dependencies. cmake is not a hard dependency, it's merely for the ease of using IDEs (specifically CLion). diff --git a/docs/spec/blocks/header.adoc b/docs/spec/blocks/header.adoc index e40a281..7d7262d 100644 --- a/docs/spec/blocks/header.adoc +++ b/docs/spec/blocks/header.adoc @@ -54,7 +54,7 @@ typedef struct AaruHeaderV2 |=== | Name | Type | Description | identifier | uint64_t | Header identifier constant. Must match the predefined `AARU_MAGIC` value to validate the format. -| application | uint8_t[HEADER_APP_NAME_LEN] | UTF-16LE encoded name of the application responsible for creating the image. +| application | uint8_t[HEADER_APP_NAME_LEN] | UTF-16LE in version 1, UTF-8 in version 2, encoded name of the application responsible for creating the image. Length is defined by `HEADER_APP_NAME_LEN`. | imageMajorVersion | uint8_t | Major version of the AaruFormat structure. diff --git a/src/create.c b/src/create.c index 77e5d37..ba626a7 100644 --- a/src/create.c +++ b/src/create.c @@ -22,9 +22,6 @@ #include #include -#include -#include - #include "aaruformat.h" #include "enums.h" #include "internal.h" @@ -182,8 +179,9 @@ static void cleanup_failed_create(aaruformat_context *ctx) * - "dictionary=N": LZMA dictionary size in bytes * Example: "compress=true;deduplicate=true;md5=true;sha1=true" * - * @param application_name Pointer to the application name string (UTF-16LE raw bytes). + * @param application_name Pointer to the application name string (UTF-8 encoded). * This identifies the software that created the image. + * The string will be copied directly to the image header. * * @param application_name_length Length of the application name string in bytes. * Must be ≤ AARU_HEADER_APP_NAME_LEN (64 bytes). @@ -362,64 +360,11 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32 // Initialize image info TRACE("Initializing image info"); - // Convert application name from UTF-16LE to UTF-8 using libicu - UErrorCode status = U_ZERO_ERROR; - int32_t app_name_utf16_len = AARU_HEADER_APP_NAME_LEN / 2; // UTF-16LE uses 2 bytes per character - UChar *app_name_utf16 = (UChar *)malloc(app_name_utf16_len * sizeof(UChar)); - - if(app_name_utf16 != NULL) - { - // Convert raw UTF-16LE bytes to UChar (UTF-16, host endian) - for(int32_t j = 0; j < app_name_utf16_len; j++) - { - app_name_utf16[j] = (UChar)(ctx->header.application[j * 2] | (ctx->header.application[j * 2 + 1] << 8)); - } - - // Get required length for UTF-8 - int32_t app_name_utf8_len = 0; - u_strToUTF8(NULL, 0, &app_name_utf8_len, app_name_utf16, app_name_utf16_len, &status); - - if(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR) - { - status = U_ZERO_ERROR; - - // Ensure it fits in the Application buffer (64 bytes including null terminator) - if(app_name_utf8_len < 64) - { - u_strToUTF8(ctx->image_info.Application, 64, NULL, app_name_utf16, app_name_utf16_len, &status); - - if(U_FAILURE(status)) - { - TRACE("Error converting application name to UTF-8: %d, using raw bytes", status); - // Fallback: just copy what we can - memset(ctx->image_info.Application, 0, 64); - memcpy(ctx->image_info.Application, ctx->header.application, AARU_HEADER_APP_NAME_LEN); - } - } - else - { - TRACE("Application name too long for buffer, truncating"); - u_strToUTF8(ctx->image_info.Application, 63, NULL, app_name_utf16, app_name_utf16_len, &status); - ctx->image_info.Application[63] = '\0'; - } - } - else - { - TRACE("Error getting UTF-8 length: %d, using raw bytes", status); - // Fallback: just copy what we can - memset(ctx->image_info.Application, 0, 64); - memcpy(ctx->image_info.Application, ctx->header.application, AARU_HEADER_APP_NAME_LEN); - } - - free(app_name_utf16); - } - else - { - TRACE("Could not allocate memory for UTF-16 conversion, using raw bytes"); - // Fallback: just copy what we can - memset(ctx->image_info.Application, 0, 64); - memcpy(ctx->image_info.Application, ctx->header.application, AARU_HEADER_APP_NAME_LEN); - } + // Copy application name (UTF-8) to image_info + memset(ctx->image_info.Application, 0, 64); + size_t copy_len = application_name_length < 63 ? application_name_length : 63; + memcpy(ctx->image_info.Application, application_name, copy_len); + ctx->image_info.Application[63] = '\0'; // Set application version string directly in the fixed-size array memset(ctx->image_info.ApplicationVersion, 0, 32); diff --git a/src/open.c b/src/open.c index 1b8a83b..5219201 100644 --- a/src/open.c +++ b/src/open.c @@ -22,9 +22,6 @@ #include #include -#include -#include - #include #include "internal.h" @@ -243,63 +240,30 @@ void *aaruf_open(const char *filepath) // NOLINT(readability-function-size) TRACE("Setting up image info"); - // Convert application name from UTF-16LE to UTF-8 using libicu - UErrorCode status = U_ZERO_ERROR; - int32_t app_name_utf16_len = AARU_HEADER_APP_NAME_LEN / 2; // UTF-16LE uses 2 bytes per character - UChar *app_name_utf16 = (UChar *)malloc(app_name_utf16_len * sizeof(UChar)); + // Handle application name based on image version + memset(ctx->image_info.Application, 0, 64); - if(app_name_utf16 != NULL) + if(ctx->header.imageMajorVersion >= AARUF_VERSION_V2) { - // Convert raw UTF-16LE bytes to UChar (UTF-16, host endian) - for(int32_t j = 0; j < app_name_utf16_len; j++) - { - app_name_utf16[j] = (UChar)(ctx->header.application[j * 2] | (ctx->header.application[j * 2 + 1] << 8)); - } - - // Get required length for UTF-8 - int32_t app_name_utf8_len = 0; - u_strToUTF8(NULL, 0, &app_name_utf8_len, app_name_utf16, app_name_utf16_len, &status); - - if(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR) - { - status = U_ZERO_ERROR; - - // Ensure it fits in the Application buffer (64 bytes including null terminator) - if(app_name_utf8_len < 64) - { - u_strToUTF8(ctx->image_info.Application, 64, NULL, app_name_utf16, app_name_utf16_len, &status); - - if(U_FAILURE(status)) - { - TRACE("Error converting application name to UTF-8: %d, using raw bytes", status); - // Fallback: just copy what we can - memset(ctx->image_info.Application, 0, 64); - strncpy(ctx->image_info.Application, (const char *)ctx->header.application, 63); - } - } - else - { - TRACE("Application name too long for buffer, truncating"); - u_strToUTF8(ctx->image_info.Application, 63, NULL, app_name_utf16, app_name_utf16_len, &status); - ctx->image_info.Application[63] = '\0'; - } - } - else - { - TRACE("Error getting UTF-8 length: %d, using raw bytes", status); - // Fallback: just copy what we can - memset(ctx->image_info.Application, 0, 64); - strncpy(ctx->image_info.Application, (const char *)ctx->header.application, 63); - } - - free(app_name_utf16); + // Version 2+: application name is UTF-8, direct copy + TRACE("Converting application name (v2+): UTF-8 direct copy"); + size_t copy_len = AARU_HEADER_APP_NAME_LEN < 63 ? AARU_HEADER_APP_NAME_LEN : 63; + memcpy(ctx->image_info.Application, ctx->header.application, copy_len); + ctx->image_info.Application[63] = '\0'; } else { - TRACE("Could not allocate memory for UTF-16 conversion, using raw bytes"); - // Fallback: just copy what we can - memset(ctx->image_info.Application, 0, 64); - strncpy(ctx->image_info.Application, (const char *)ctx->header.application, 63); + // Version 1: application name is UTF-16LE, convert by taking every other byte + TRACE("Converting application name (v1): UTF-16LE to ASCII"); + int dest_idx = 0; + for(int j = 0; j < AARU_HEADER_APP_NAME_LEN && dest_idx < 63; j += 2) + // Take the low byte, skip the high byte (assuming it's 0x00 for ASCII) + if(ctx->header.application[j] != 0) + ctx->image_info.Application[dest_idx++] = ctx->header.application[j]; + else + // Stop at null terminator + break; + ctx->image_info.Application[dest_idx] = '\0'; } // Set application version string directly in the fixed-size array