diff --git a/CMakeLists.txt b/CMakeLists.txt index 892a6cb..d56c4e8 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -141,7 +141,8 @@ add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h a zoo/ar.h zoo/maketbl.c arc/pack.c - arc/squeeze.c) + arc/squeeze.c + arc/crunch.c) include(3rdparty/bzip2.cmake) include(3rdparty/flac.cmake) diff --git a/arc/crunch.c b/arc/crunch.c new file mode 100644 index 0000000..3f1128b --- /dev/null +++ b/arc/crunch.c @@ -0,0 +1,294 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * Copyright © 2018-2019 David Ryskalczyk + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include +#include "../library.h" + +#define FALSE 0 +#define TRUE !FALSE +#define TABSIZE 4096 // Size of the string table. +#define NO_PRED 0xFFFF // Indicates no predecessor in the string table. +#define EMPTY 0xFFFF // Indicates an empty stack. + +typedef unsigned char u_char; +typedef unsigned short u_short; + +// Entry in the string table. +struct entry +{ + char used; // Is this entry in use? + u_char follower; // The character that follows the string. + u_short next; // Next entry in a collision chain. + u_short predecessor; // Code for the preceding string. +}; + +// Static variables for decompression state. +static struct entry *string_tab; +static u_char *stack; +static int sp; + +// Buffer management variables. +static const u_char *in_buf_ptr; +static size_t in_len_rem; +static int inflag; + +// Pointer to the hash function to use. +static u_short (*h)(u_short, u_char); + +// Original hash function from ARC. +static u_short oldh(u_short pred, u_char foll) +{ + long local; + local = ((pred + foll) | 0x0800) & 0xFFFF; + local *= local; + return (local >> 6) & 0x0FFF; +} + +// Newer, faster hash function. +static u_short newh(u_short pred, u_char foll) { return (((pred + foll) & 0xFFFF) * 15073) & 0xFFF; } + +// Finds the end of a collision list. +static u_short eolist(u_short index) +{ + int temp; + while((temp = string_tab[index].next)) index = temp; + return index; +} + +// Hashes a string to find its position in the table. +static u_short hash_it(u_short pred, u_char foll) +{ + u_short local, tempnext; + struct entry *ep; + + local = (*h)(pred, foll); + + if(!string_tab[local].used) + return local; + else + { + local = eolist(local); + tempnext = (local + 101) & 0x0FFF; + ep = &string_tab[tempnext]; + + while(ep->used) + { + if(++tempnext == TABSIZE) + { + tempnext = 0; + ep = string_tab; + } + else + ++ep; + } + string_tab[local].next = tempnext; + return tempnext; + } +} + +// Adds a new string to the table. +static void upd_tab(u_short pred, u_short foll) +{ + struct entry *ep; + ep = &string_tab[hash_it(pred, foll)]; + ep->used = TRUE; + ep->next = 0; + ep->predecessor = pred; + ep->follower = foll; +} + +// Initializes the string table. +static void init_tab() +{ + memset((char *)string_tab, 0, TABSIZE * sizeof(struct entry)); + for(unsigned int i = 0; i < 256; i++) upd_tab(NO_PRED, i); +} + +// Reads a 12-bit code from the input buffer. +static int get_code() +{ + int code; + if(in_len_rem < 2) return -1; + + if((inflag ^= 1)) + { + code = (*in_buf_ptr++ << 4); + code |= (*in_buf_ptr >> 4); + in_len_rem--; + } + else + { + code = (*in_buf_ptr++ & 0x0f) << 8; + code |= (*in_buf_ptr++); + in_len_rem -= 2; + } + return code; +} + +// Pushes a character onto the stack. +#define PUSH(c) \ + do { \ + stack[sp] = ((char)(c)); \ + if(++sp >= TABSIZE) return -1; \ + } while(0) + +// Pops a character from the stack. +#define POP() ((sp > 0) ? (int)stack[--sp] : EMPTY) + +// Internal crunch decompression logic. +static int arc_decompress_crunch_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len, int new_hash) +{ + // Basic validation of pointers. + if(!in_buf || !out_buf || !out_len) { return -1; } + + // Allocate memory for tables. + string_tab = (struct entry *)malloc(TABSIZE * sizeof(struct entry)); + stack = (u_char *)malloc(TABSIZE * sizeof(u_char)); + if(!string_tab || !stack) + { + if(string_tab) free(string_tab); + if(stack) free(stack); + return -1; + } + + // Select the hash function. + if(new_hash) + h = newh; + else + h = oldh; + + // Initialize state. + sp = 0; + init_tab(); + int code_count = TABSIZE - 256; + in_buf_ptr = in_buf; + in_len_rem = in_len; + inflag = 0; + + // Main decompression loop. + int oldcode = get_code(); + if(oldcode == -1) + { + *out_len = 0; + free(string_tab); + free(stack); + return 0; + } + int finchar = string_tab[oldcode].follower; + + size_t out_pos = 0; + if(out_pos < *out_len) { out_buf[out_pos++] = finchar; } + + int newcode; + while((newcode = get_code()) != -1) + { + int code = newcode; + struct entry *ep = &string_tab[code]; + + // Handle unknown codes and KwKwK case. + if(!ep->used) + { + code = oldcode; + ep = &string_tab[code]; + PUSH(finchar); + } + // Decode the string by traversing the table. + while(ep->predecessor != NO_PRED) + { + PUSH(ep->follower); + code = ep->predecessor; + ep = &string_tab[code]; + } + PUSH(finchar = ep->follower); + + // Add the new string to the table if there's room. + if(code_count) + { + upd_tab(oldcode, finchar); + --code_count; + } + oldcode = newcode; + + // Write the decoded string to the output buffer. + while(sp > 0) + { + int c = POP(); + if(c == EMPTY) break; + if(out_pos < *out_len) { out_buf[out_pos++] = (unsigned char)c; } + } + } + + // Clean up and return. + *out_len = out_pos; + free(string_tab); + free(stack); + return 0; +} + +// Decompresses crunched data. +AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len) +{ + return arc_decompress_crunch_internal(in_buf, in_len, out_buf, out_len, 0); +} + +// Decompresses crunched data with non-repeat packing. +AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len, + unsigned char *out_buf, size_t *out_len) +{ + // Allocate a temporary buffer for the intermediate decompressed data. + size_t temp_len = *out_len * 2; // Heuristic for temp buffer size. + unsigned char *temp_buf = malloc(temp_len); + if(!temp_buf) return -1; + + // First, decompress the crunched data. + int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 0); + if(result == 0) + { + // Then, decompress the non-repeat packing. + result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); + } + + free(temp_buf); + return result; +} + +// Decompresses crunched data with non-repeat packing and the new hash function. +AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len, + unsigned char *out_buf, size_t *out_len) +{ + // Allocate a temporary buffer. + size_t temp_len = *out_len * 2; // Heuristic. + unsigned char *temp_buf = malloc(temp_len); + if(!temp_buf) return -1; + + // Decompress crunched data with the new hash. + int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 1); + if(result == 0) + { + // Decompress non-repeat packing. + result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); + } + + free(temp_buf); + return result; +} diff --git a/library.h b/library.h index 1949018..c1e9058 100644 --- a/library.h +++ b/library.h @@ -119,8 +119,19 @@ AARU_EXPORT int AARU_CALL lh5_decompress(const uint8_t *in_buf, size_t in_len, u AARU_EXPORT uint64_t AARU_CALL AARU_get_acn_version(); // ARC method 3: Stored with non-repeat packing -AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); +AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len); // ARC method 4: Huffman squeezing -AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); +AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len); +// Method 5: LZW (crunching) +AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len); +// Method 6: LZW with non-repeat packing (crunching) +AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len, + unsigned char *out_buf, size_t *out_len); +// Method 7: LZW with non-repeat packing and new hash (Crunching) +AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len, + unsigned char *out_buf, size_t *out_len); #endif // AARU_COMPRESSION_NATIVE_LIBRARY_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index af9b4d0..36cf7f6 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -54,5 +54,6 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsqueeze.bin # 'Google_Tests_run' is the target name # 'test1.cpp tests2.cpp' are source files with tests add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp - zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp) + zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp + arc/crunch.cpp) target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native") diff --git a/tests/arc/crunch.cpp b/tests/arc/crunch.cpp new file mode 100644 index 0000000..de310f1 --- /dev/null +++ b/tests/arc/crunch.cpp @@ -0,0 +1,81 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include + +#include "../../library.h" +#include "../crc32.h" +#include "gtest/gtest.h" + +#define EXPECTED_CRC32 0x66007dba + +static const uint8_t *buffer; + +class crunchFixture : public ::testing::Test +{ +public: + crunchFixture() + { + // initialization; + // can also be done in SetUp() + } + +protected: + void SetUp() + { + char path[PATH_MAX]; + char filename[PATH_MAX]; + + getcwd(path, PATH_MAX); + snprintf(filename, PATH_MAX, "%s/data/arccrunchnr.bin", path); + + FILE *file = fopen(filename, "rb"); + buffer = (const uint8_t *)malloc(72537); + fread((void *)buffer, 1, 72537, file); + fclose(file); + } + + void TearDown() { free((void *)buffer); } + + ~crunchFixture() + { + // resources cleanup, no exceptions allowed + } + + // shared user data +}; + +TEST_F(crunchFixture, crunch) +{ + size_t destLen = 152089; + size_t srcLen = 72537; + auto *outBuf = (uint8_t *)malloc(152089); + + auto err = arc_decompress_crunch_nrpack(buffer, srcLen, outBuf, &destLen); + + EXPECT_EQ(err, 0); + EXPECT_EQ(destLen, 152089); + + auto crc = crc32_data(outBuf, 152089); + + free(outBuf); + + EXPECT_EQ(crc, EXPECTED_CRC32); +} \ No newline at end of file diff --git a/tests/data/arccrunchnr.bin b/tests/data/arccrunchnr.bin new file mode 100755 index 0000000..21f9901 Binary files /dev/null and b/tests/data/arccrunchnr.bin differ