Update application name handling to support UTF-8 in version 2 and remove ICU dependency

This commit is contained in:
2025-10-11 01:26:17 +01:00
parent 756d965e2a
commit 2e11b05547
5 changed files with 28 additions and 129 deletions

View File

@@ -214,19 +214,10 @@ include(3rdparty/lzma.cmake)
include(3rdparty/xxhash.cmake) include(3rdparty/xxhash.cmake)
include(3rdparty/blake3.cmake) include(3rdparty/blake3.cmake)
# Find and link ICU library for UTF-16LE to UTF-8 conversion
find_package(ICU COMPONENTS uc REQUIRED)
if(TARGET blake3) if(TARGET blake3)
target_link_libraries(aaruformat blake3) target_link_libraries(aaruformat blake3)
endif() endif()
# Add ICU include directories and link library to the target
if(ICU_FOUND)
target_include_directories(aaruformat PRIVATE ${ICU_INCLUDE_DIRS})
target_link_libraries(aaruformat ${ICU_LIBRARIES})
endif()
macro(TARGET_LINK_LIBRARIES_WHOLE_ARCHIVE target) macro(TARGET_LINK_LIBRARIES_WHOLE_ARCHIVE target)
if(MSVC) if(MSVC)
foreach(lib IN LISTS ARGN) foreach(lib IN LISTS ARGN)

View File

@@ -4,8 +4,7 @@ C implementation of [Aaru](https://www.github.com/aaru-dps/Aaru) file format.
The target is to be able to compile it with a normal C (C89 compliant) compiler. The target is to be able to compile it with a normal C (C89 compliant) compiler.
Currently depends on libicu being available thru vcpkg due to UTF-16 shenanigans. There are no external dependencies.
Currently under debate on a breaking ABI change to remove this dependency.
cmake is not a hard dependency, it's merely for the ease of using IDEs (specifically CLion). cmake is not a hard dependency, it's merely for the ease of using IDEs (specifically CLion).

View File

@@ -54,7 +54,7 @@ typedef struct AaruHeaderV2
|=== |===
| Name | Type | Description | Name | Type | Description
| identifier | uint64_t | Header identifier constant. Must match the predefined `AARU_MAGIC` value to validate the format. | identifier | uint64_t | Header identifier constant. Must match the predefined `AARU_MAGIC` value to validate the format.
| application | uint8_t[HEADER_APP_NAME_LEN] | UTF-16LE encoded name of the application responsible for creating the image. | application | uint8_t[HEADER_APP_NAME_LEN] | UTF-16LE in version 1, UTF-8 in version 2, encoded name of the application responsible for creating the image.
Length is defined by `HEADER_APP_NAME_LEN`. Length is defined by `HEADER_APP_NAME_LEN`.
| imageMajorVersion | uint8_t | Major version of the AaruFormat structure. | imageMajorVersion | uint8_t | Major version of the AaruFormat structure.

View File

@@ -22,9 +22,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <unicode/ucnv.h>
#include <unicode/ustring.h>
#include "aaruformat.h" #include "aaruformat.h"
#include "enums.h" #include "enums.h"
#include "internal.h" #include "internal.h"
@@ -182,8 +179,9 @@ static void cleanup_failed_create(aaruformat_context *ctx)
* - "dictionary=N": LZMA dictionary size in bytes * - "dictionary=N": LZMA dictionary size in bytes
* Example: "compress=true;deduplicate=true;md5=true;sha1=true" * Example: "compress=true;deduplicate=true;md5=true;sha1=true"
* *
* @param application_name Pointer to the application name string (UTF-16LE raw bytes). * @param application_name Pointer to the application name string (UTF-8 encoded).
* This identifies the software that created the image. * This identifies the software that created the image.
* The string will be copied directly to the image header.
* *
* @param application_name_length Length of the application name string in bytes. * @param application_name_length Length of the application name string in bytes.
* Must be ≤ AARU_HEADER_APP_NAME_LEN (64 bytes). * Must be ≤ AARU_HEADER_APP_NAME_LEN (64 bytes).
@@ -362,64 +360,11 @@ void *aaruf_create(const char *filepath, const uint32_t media_type, const uint32
// Initialize image info // Initialize image info
TRACE("Initializing image info"); TRACE("Initializing image info");
// Convert application name from UTF-16LE to UTF-8 using libicu // Copy application name (UTF-8) to image_info
UErrorCode status = U_ZERO_ERROR; memset(ctx->image_info.Application, 0, 64);
int32_t app_name_utf16_len = AARU_HEADER_APP_NAME_LEN / 2; // UTF-16LE uses 2 bytes per character size_t copy_len = application_name_length < 63 ? application_name_length : 63;
UChar *app_name_utf16 = (UChar *)malloc(app_name_utf16_len * sizeof(UChar)); memcpy(ctx->image_info.Application, application_name, copy_len);
ctx->image_info.Application[63] = '\0';
if(app_name_utf16 != NULL)
{
// Convert raw UTF-16LE bytes to UChar (UTF-16, host endian)
for(int32_t j = 0; j < app_name_utf16_len; j++)
{
app_name_utf16[j] = (UChar)(ctx->header.application[j * 2] | (ctx->header.application[j * 2 + 1] << 8));
}
// Get required length for UTF-8
int32_t app_name_utf8_len = 0;
u_strToUTF8(NULL, 0, &app_name_utf8_len, app_name_utf16, app_name_utf16_len, &status);
if(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR)
{
status = U_ZERO_ERROR;
// Ensure it fits in the Application buffer (64 bytes including null terminator)
if(app_name_utf8_len < 64)
{
u_strToUTF8(ctx->image_info.Application, 64, NULL, app_name_utf16, app_name_utf16_len, &status);
if(U_FAILURE(status))
{
TRACE("Error converting application name to UTF-8: %d, using raw bytes", status);
// Fallback: just copy what we can
memset(ctx->image_info.Application, 0, 64);
memcpy(ctx->image_info.Application, ctx->header.application, AARU_HEADER_APP_NAME_LEN);
}
}
else
{
TRACE("Application name too long for buffer, truncating");
u_strToUTF8(ctx->image_info.Application, 63, NULL, app_name_utf16, app_name_utf16_len, &status);
ctx->image_info.Application[63] = '\0';
}
}
else
{
TRACE("Error getting UTF-8 length: %d, using raw bytes", status);
// Fallback: just copy what we can
memset(ctx->image_info.Application, 0, 64);
memcpy(ctx->image_info.Application, ctx->header.application, AARU_HEADER_APP_NAME_LEN);
}
free(app_name_utf16);
}
else
{
TRACE("Could not allocate memory for UTF-16 conversion, using raw bytes");
// Fallback: just copy what we can
memset(ctx->image_info.Application, 0, 64);
memcpy(ctx->image_info.Application, ctx->header.application, AARU_HEADER_APP_NAME_LEN);
}
// Set application version string directly in the fixed-size array // Set application version string directly in the fixed-size array
memset(ctx->image_info.ApplicationVersion, 0, 32); memset(ctx->image_info.ApplicationVersion, 0, 32);

View File

@@ -22,9 +22,6 @@
#include <stdlib.h> #include <stdlib.h>
#include <string.h> #include <string.h>
#include <unicode/ucnv.h>
#include <unicode/ustring.h>
#include <aaruformat.h> #include <aaruformat.h>
#include "internal.h" #include "internal.h"
@@ -243,63 +240,30 @@ void *aaruf_open(const char *filepath) // NOLINT(readability-function-size)
TRACE("Setting up image info"); TRACE("Setting up image info");
// Convert application name from UTF-16LE to UTF-8 using libicu // Handle application name based on image version
UErrorCode status = U_ZERO_ERROR; memset(ctx->image_info.Application, 0, 64);
int32_t app_name_utf16_len = AARU_HEADER_APP_NAME_LEN / 2; // UTF-16LE uses 2 bytes per character
UChar *app_name_utf16 = (UChar *)malloc(app_name_utf16_len * sizeof(UChar));
if(app_name_utf16 != NULL) if(ctx->header.imageMajorVersion >= AARUF_VERSION_V2)
{ {
// Convert raw UTF-16LE bytes to UChar (UTF-16, host endian) // Version 2+: application name is UTF-8, direct copy
for(int32_t j = 0; j < app_name_utf16_len; j++) TRACE("Converting application name (v2+): UTF-8 direct copy");
{ size_t copy_len = AARU_HEADER_APP_NAME_LEN < 63 ? AARU_HEADER_APP_NAME_LEN : 63;
app_name_utf16[j] = (UChar)(ctx->header.application[j * 2] | (ctx->header.application[j * 2 + 1] << 8)); memcpy(ctx->image_info.Application, ctx->header.application, copy_len);
} ctx->image_info.Application[63] = '\0';
// Get required length for UTF-8
int32_t app_name_utf8_len = 0;
u_strToUTF8(NULL, 0, &app_name_utf8_len, app_name_utf16, app_name_utf16_len, &status);
if(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR)
{
status = U_ZERO_ERROR;
// Ensure it fits in the Application buffer (64 bytes including null terminator)
if(app_name_utf8_len < 64)
{
u_strToUTF8(ctx->image_info.Application, 64, NULL, app_name_utf16, app_name_utf16_len, &status);
if(U_FAILURE(status))
{
TRACE("Error converting application name to UTF-8: %d, using raw bytes", status);
// Fallback: just copy what we can
memset(ctx->image_info.Application, 0, 64);
strncpy(ctx->image_info.Application, (const char *)ctx->header.application, 63);
}
}
else
{
TRACE("Application name too long for buffer, truncating");
u_strToUTF8(ctx->image_info.Application, 63, NULL, app_name_utf16, app_name_utf16_len, &status);
ctx->image_info.Application[63] = '\0';
}
}
else
{
TRACE("Error getting UTF-8 length: %d, using raw bytes", status);
// Fallback: just copy what we can
memset(ctx->image_info.Application, 0, 64);
strncpy(ctx->image_info.Application, (const char *)ctx->header.application, 63);
}
free(app_name_utf16);
} }
else else
{ {
TRACE("Could not allocate memory for UTF-16 conversion, using raw bytes"); // Version 1: application name is UTF-16LE, convert by taking every other byte
// Fallback: just copy what we can TRACE("Converting application name (v1): UTF-16LE to ASCII");
memset(ctx->image_info.Application, 0, 64); int dest_idx = 0;
strncpy(ctx->image_info.Application, (const char *)ctx->header.application, 63); for(int j = 0; j < AARU_HEADER_APP_NAME_LEN && dest_idx < 63; j += 2)
// Take the low byte, skip the high byte (assuming it's 0x00 for ASCII)
if(ctx->header.application[j] != 0)
ctx->image_info.Application[dest_idx++] = ctx->header.application[j];
else
// Stop at null terminator
break;
ctx->image_info.Application[dest_idx] = '\0';
} }
// Set application version string directly in the fixed-size array // Set application version string directly in the fixed-size array