diff --git a/CMakeLists.txt b/CMakeLists.txt index d56c4e8..0a6d8a9 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -142,7 +142,8 @@ add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h a zoo/maketbl.c arc/pack.c arc/squeeze.c - arc/crunch.c) + arc/crunch.c + arc/lzw.c) include(3rdparty/bzip2.cmake) include(3rdparty/flac.cmake) diff --git a/arc/lzw.c b/arc/lzw.c new file mode 100644 index 0000000..05417d3 --- /dev/null +++ b/arc/lzw.c @@ -0,0 +1,270 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * Copyright © 2018-2019 David Ryskalczyk + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include +#include "../library.h" + +#define CRBITS 12 // Max bits for crunching. +#define SQBITS 13 // Max bits for squashing. +#define INIT_BITS 9 // Initial number of bits per code. +#define MAXCODE(n) ((1 << (n)) - 1) // Macro to calculate max code for n bits. +#define FIRST 257 // First available code. +#define CLEAR 256 // Code to clear the dictionary. + +// LZW decompression state variables. +static int Bits; +static int max_maxcode; +static int n_bits; +static int maxcode; +static int clear_flg; +static int free_ent; +static unsigned short *prefix; +static unsigned char *suffix; +static unsigned char *stack; + +// Buffer management variables. +static const unsigned char *in_buf_ptr; +static size_t in_len_rem; +static int offset; +static char buf[SQBITS]; + +// Reads a variable-length code from the input buffer. +static int getcode() +{ + int code; + static int size = 0; + int r_off, bits; + unsigned char *bp = (unsigned char *)buf; + + // Check if we need to increase code size or handle a clear flag. + if(clear_flg > 0 || offset >= size || free_ent > maxcode) + { + if(free_ent > maxcode) + { + n_bits++; + if(n_bits == Bits) + maxcode = max_maxcode; + else + maxcode = MAXCODE(n_bits); + } + if(clear_flg > 0) + { + maxcode = MAXCODE(n_bits = INIT_BITS); + clear_flg = 0; + } + // Read n_bits bytes into the buffer. + for(size = 0; size < n_bits; size++) + { + if(in_len_rem == 0) + { + code = -1; + break; + } + code = *in_buf_ptr++; + in_len_rem--; + buf[size] = (char)code; + } + if(size <= 0) return -1; // End of file. + + offset = 0; + size = (size << 3) - (n_bits - 1); + } + r_off = offset; + bits = n_bits; + + // Extract the code from the buffer. + bp += (r_off >> 3); + r_off &= 7; + + code = (*bp++ >> r_off); + bits -= 8 - r_off; + r_off = 8 - r_off; + + if(bits >= 8) + { + code |= *bp++ << r_off; + r_off += 8; + bits -= 8; + } + code |= (*bp & ((1 << bits) - 1)) << r_off; + offset += n_bits; + + return code; +} + +// Main LZW decompression logic. +static int arc_decompress_lzw(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len, + int squash) +{ + // Basic validation of pointers. + if(!in_buf || !out_buf || !out_len) { return -1; } + + // Initialize buffer pointers and lengths. + in_buf_ptr = in_buf; + in_len_rem = in_len; + + // Set parameters based on whether we're unsquashing or uncrushing. + if(squash) { Bits = SQBITS; } + else + { + Bits = CRBITS; + if(in_len_rem > 0) + { + // Crunch format has a header byte indicating max bits. + if(*in_buf_ptr != CRBITS) return -1; + in_buf_ptr++; + in_len_rem--; + } + } + + if(in_len_rem <= 0) + { + *out_len = 0; + return 0; + } + + // Initialize LZW parameters. + max_maxcode = 1 << Bits; + clear_flg = 0; + n_bits = INIT_BITS; + maxcode = MAXCODE(n_bits); + + // Allocate memory for LZW tables. + prefix = (unsigned short *)malloc(max_maxcode * sizeof(unsigned short)); + suffix = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char)); + stack = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char)); + + if(!prefix || !suffix || !stack) + { + if(prefix) free(prefix); + if(suffix) free(suffix); + if(stack) free(stack); + return -1; + } + + // Initialize the first 256 entries of the dictionary. + memset(prefix, 0, 256 * sizeof(unsigned short)); + for(int code = 255; code >= 0; code--) { suffix[code] = (unsigned char)code; } + + free_ent = FIRST; + offset = 0; + + // Main decompression loop. + int finchar, oldcode, incode; + finchar = oldcode = getcode(); + if(oldcode == -1) + { + *out_len = 0; + free(prefix); + free(suffix); + free(stack); + return 0; + } + + size_t out_pos = 0; + if(out_pos < *out_len) { out_buf[out_pos++] = finchar; } + + unsigned char *stackp = stack; + int code; + while((code = getcode()) > -1) + { + if(code == CLEAR) + { + // Clear the dictionary. + memset(prefix, 0, 256 * sizeof(unsigned short)); + clear_flg = 1; + free_ent = FIRST - 1; + if((code = getcode()) == -1) break; + } + incode = code; + // Handle KwKwK case. + if(code >= free_ent) + { + if(code > free_ent) + { + // Error: invalid code. + break; + } + *stackp++ = finchar; + code = oldcode; + } + // Decode the string by traversing the dictionary. + while(code >= 256) + { + *stackp++ = suffix[code]; + code = prefix[code]; + } + *stackp++ = finchar = suffix[code]; + + // Write the decoded string to the output buffer. + do { + if(out_pos < *out_len) { out_buf[out_pos++] = *--stackp; } + else + { + stackp--; // Discard if output buffer is full. + } + } while(stackp > stack); + + // Add the new string to the dictionary. + if((code = free_ent) < max_maxcode) + { + prefix[code] = (unsigned short)oldcode; + suffix[code] = finchar; + free_ent = code + 1; + } + oldcode = incode; + } + + // Clean up and return. + *out_len = out_pos; + free(prefix); + free(suffix); + free(stack); + return 0; +} + +// Decompresses squashed data. +AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len) +{ + return arc_decompress_lzw(in_buf, in_len, out_buf, out_len, 1); +} + +// Decompresses crunched data. +AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len, + unsigned char *out_buf, size_t *out_len) +{ + // Allocate a temporary buffer. + size_t temp_len = *out_len * 2; // Heuristic. + unsigned char *temp_buf = malloc(temp_len); + if(!temp_buf) return -1; + + // Decompress crunched data. + int result = arc_decompress_lzw(in_buf, in_len, temp_buf, &temp_len, 0); + if(result == 0) + { + // Decompress non-repeat packing. + result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); + } + + free(temp_buf); + return result; +} diff --git a/library.h b/library.h index c1e9058..0ab8dcd 100644 --- a/library.h +++ b/library.h @@ -134,4 +134,12 @@ AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_b AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); +// Method 8: Dynamic LZW (crunching) +AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len, + unsigned char *out_buf, size_t *out_len); + +// Method 9: Dynamic LZW with 13 bits (squashing) +AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len); + #endif // AARU_COMPRESSION_NATIVE_LIBRARY_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 36cf7f6..103394c 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -51,9 +51,18 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcpack.bin file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsqueeze.bin DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunchnr.bin + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) + +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunch_dynamic.bin + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) + +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsquash.bin + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) + # 'Google_Tests_run' is the target name # 'test1.cpp tests2.cpp' are source files with tests add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp - zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp - arc/crunch.cpp) + zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp arc/crunch.cpp + arc/squash.cpp) target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native") diff --git a/tests/arc/crunch.cpp b/tests/arc/crunch.cpp index de310f1..be22e2b 100644 --- a/tests/arc/crunch.cpp +++ b/tests/arc/crunch.cpp @@ -77,5 +77,57 @@ TEST_F(crunchFixture, crunch) free(outBuf); + EXPECT_EQ(crc, EXPECTED_CRC32); +} + +class crunchDynamicFixture : public ::testing::Test +{ +public: + crunchDynamicFixture() + { + // initialization; + // can also be done in SetUp() + } + +protected: + void SetUp() + { + char path[PATH_MAX]; + char filename[PATH_MAX]; + + getcwd(path, PATH_MAX); + snprintf(filename, PATH_MAX, "%s/data/arccrunch_dynamic.bin", path); + + FILE *file = fopen(filename, "rb"); + buffer = (const uint8_t *)malloc(73189); + fread((void *)buffer, 1, 73189, file); + fclose(file); + } + + void TearDown() { free((void *)buffer); } + + ~crunchDynamicFixture() + { + // resources cleanup, no exceptions allowed + } + + // shared user data +}; + +TEST_F(crunchDynamicFixture, crunchDynamic) +{ + size_t destLen = 152089; + size_t srcLen = 73189; + auto *outBuf = (uint8_t *)malloc(152089); + + auto err = arc_decompress_crunch_dynamic(buffer, srcLen, outBuf, &destLen); + + EXPECT_EQ(err, 0); + EXPECT_EQ(destLen, 152089); + + auto crc = crc32_data(outBuf, 152089); + + free(outBuf); + EXPECT_EQ(crc, EXPECTED_CRC32); } \ No newline at end of file diff --git a/tests/arc/squash.cpp b/tests/arc/squash.cpp new file mode 100644 index 0000000..cd1819c --- /dev/null +++ b/tests/arc/squash.cpp @@ -0,0 +1,81 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include + +#include "../../library.h" +#include "../crc32.h" +#include "gtest/gtest.h" + +#define EXPECTED_CRC32 0x66007dba + +static const uint8_t *buffer; + +class squashFixture : public ::testing::Test +{ +public: + squashFixture() + { + // initialization; + // can also be done in SetUp() + } + +protected: + void SetUp() + { + char path[PATH_MAX]; + char filename[PATH_MAX]; + + getcwd(path, PATH_MAX); + snprintf(filename, PATH_MAX, "%s/data/arcsquash.bin", path); + + FILE *file = fopen(filename, "rb"); + buffer = (const uint8_t *)malloc(67308); + fread((void *)buffer, 1, 67308, file); + fclose(file); + } + + void TearDown() { free((void *)buffer); } + + ~squashFixture() + { + // resources cleanup, no exceptions allowed + } + + // shared user data +}; + +TEST_F(squashFixture, squash) +{ + size_t destLen = 152089; + size_t srcLen = 67308; + auto *outBuf = (uint8_t *)malloc(152089); + + auto err = arc_decompress_squash(buffer, srcLen, outBuf, &destLen); + + EXPECT_EQ(err, 0); + EXPECT_EQ(destLen, 152089); + + auto crc = crc32_data(outBuf, 152089); + + free(outBuf); + + EXPECT_EQ(crc, EXPECTED_CRC32); +} \ No newline at end of file diff --git a/tests/data/arccrunch_dynamic.bin b/tests/data/arccrunch_dynamic.bin new file mode 100755 index 0000000..ed281a7 Binary files /dev/null and b/tests/data/arccrunch_dynamic.bin differ diff --git a/tests/data/arcsquash.bin b/tests/data/arcsquash.bin new file mode 100755 index 0000000..da67335 Binary files /dev/null and b/tests/data/arcsquash.bin differ