diff --git a/CMakeLists.txt b/CMakeLists.txt index c7f0c41..892a6cb 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -140,7 +140,8 @@ add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h a zoo/lzh.h zoo/ar.h zoo/maketbl.c - arc/pack.c) + arc/pack.c + arc/squeeze.c) include(3rdparty/bzip2.cmake) include(3rdparty/flac.cmake) diff --git a/arc/squeeze.c b/arc/squeeze.c new file mode 100644 index 0000000..1ab2d34 --- /dev/null +++ b/arc/squeeze.c @@ -0,0 +1,148 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * Copyright © 2018-2019 David Ryskalczyk + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include +#include "../library.h" + +#define SPEOF 256 // Special end-of-file token. +#define NUMVALS 257 // Number of values in the Huffman tree (256 chars + SPEOF). + +// Node structure for the Huffman decoding tree. +struct nd +{ + int child[2]; // Children of the node. +}; + +// Static variables for the decompression state. +static struct nd nodes[NUMVALS]; // The Huffman tree. +static int numnodes; // Number of nodes in the tree. + +static int bpos; // Bit position in the current byte. +static unsigned char curin; // Current byte being read. + +// Pointers for buffer management. +static const unsigned char *in_buf_ptr; +static size_t in_len_rem; +static unsigned char *out_buf_ptr; +static size_t out_len_rem; + +// Reads a byte from the input buffer. +static int get_byte() +{ + if(in_len_rem == 0) { return EOF; } + in_len_rem--; + return *in_buf_ptr++; +} + +static int arc_decompress_huffman(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len) +{ + // Basic validation of pointers. + if(!in_buf || !out_buf || !out_len) { return -1; } + + // Initialize buffer pointers and lengths. + in_buf_ptr = in_buf; + in_len_rem = in_len; + out_buf_ptr = out_buf; + out_len_rem = *out_len; + + bpos = 99; // Force initial read. + + // Read the number of nodes in the Huffman tree. + if(in_len_rem < 2) return -1; + numnodes = get_byte(); + numnodes |= get_byte() << 8; + + if(numnodes < 0 || numnodes >= NUMVALS) + { + return -1; // Invalid tree. + } + + // ARC: initialize for possible empty tree (SPEOF only) + nodes[0].child[0] = -(SPEOF + 1); + nodes[0].child[1] = -(SPEOF + 1); + + // Read the Huffman tree from the input buffer, sign-extend 16-bit values + for(int i = 0; i < numnodes; ++i) + { + if(in_len_rem < 4) return -1; + uint8_t b0 = get_byte(); + uint8_t b1 = get_byte(); + uint8_t b2 = get_byte(); + uint8_t b3 = get_byte(); + nodes[i].child[0] = (int16_t)((b0) | (b1 << 8)); + nodes[i].child[1] = (int16_t)((b2) | (b3 << 8)); + } + + size_t written = 0; + // bpos is already 99 from init + + while(written < *out_len) + { + int i = 0; + // follow bit stream in tree to a leaf + while(i >= 0) + { + if(++bpos > 7) + { + int c = get_byte(); + if(c == EOF) + { + *out_len = written; + return 0; // End of input + } + curin = c; + bpos = 0; + // move a level deeper in tree + i = nodes[i].child[curin & 1]; + } + else { i = nodes[i].child[1 & (curin >>= 1)]; } + } + + // decode fake node index to original data value + int value = -(i + 1); + + if(value == SPEOF) + { + break; // End of data + } + + *out_buf_ptr++ = value; + written++; + } + + *out_len = written; + return 0; +} + +// Decompresses data using Huffman squeezing. +AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len) +{ + size_t temp_len = *out_len * 2; + unsigned char *temp_buf = malloc(temp_len); + if(!temp_buf) return -1; + + int result = arc_decompress_huffman(in_buf, in_len, temp_buf, &temp_len); + if(result == 0) { result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); } + + free(temp_buf); + return result; +} diff --git a/library.h b/library.h index e05e0cf..1949018 100644 --- a/library.h +++ b/library.h @@ -120,5 +120,7 @@ AARU_EXPORT uint64_t AARU_CALL AARU_get_acn_version(); // ARC method 3: Stored with non-repeat packing AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); +// ARC method 4: Huffman squeezing +AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); #endif // AARU_COMPRESSION_NATIVE_LIBRARY_H diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index a61ec8b..af9b4d0 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -48,10 +48,11 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lh5 file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcpack.bin DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsqueeze.bin + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) + # 'Google_Tests_run' is the target name # 'test1.cpp tests2.cpp' are source files with tests add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp - zoo/lzd.cpp arc/pack.cpp - lh5.cpp - arc/pack.cpp) + zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp) target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native") diff --git a/tests/arc/squeeze.cpp b/tests/arc/squeeze.cpp new file mode 100644 index 0000000..aee5d58 --- /dev/null +++ b/tests/arc/squeeze.cpp @@ -0,0 +1,81 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include + +#include "../../library.h" +#include "../crc32.h" +#include "gtest/gtest.h" + +#define EXPECTED_CRC32 0x66007dba + +static const uint8_t *buffer; + +class squeezeFixture : public ::testing::Test +{ +public: + squeezeFixture() + { + // initialization; + // can also be done in SetUp() + } + +protected: + void SetUp() + { + char path[PATH_MAX]; + char filename[PATH_MAX]; + + getcwd(path, PATH_MAX); + snprintf(filename, PATH_MAX, "%s/data/arcsqueeze.bin", path); + + FILE *file = fopen(filename, "rb"); + buffer = (const uint8_t *)malloc(88044); + fread((void *)buffer, 1, 88044, file); + fclose(file); + } + + void TearDown() { free((void *)buffer); } + + ~squeezeFixture() + { + // resources cleanup, no exceptions allowed + } + + // shared user data +}; + +TEST_F(squeezeFixture, squeeze) +{ + size_t destLen = 152089; + size_t srcLen = 88044; + auto *outBuf = (uint8_t *)malloc(152089); + + auto err = arc_decompress_squeeze(buffer, srcLen, outBuf, &destLen); + + EXPECT_EQ(err, 0); + EXPECT_EQ(destLen, 152089); + + auto crc = crc32_data(outBuf, 152089); + + free(outBuf); + + EXPECT_EQ(crc, EXPECTED_CRC32); +} \ No newline at end of file diff --git a/tests/data/arcsqueeze.bin b/tests/data/arcsqueeze.bin new file mode 100755 index 0000000..23c9fe0 Binary files /dev/null and b/tests/data/arcsqueeze.bin differ diff --git a/tests/zoo/lzd.cpp b/tests/zoo/lzd.cpp index d1ec820..d8d8a1d 100644 --- a/tests/zoo/lzd.cpp +++ b/tests/zoo/lzd.cpp @@ -25,7 +25,7 @@ #include #include -#include "../library.h" +#include "../../library.h" #include "../crc32.h" #include "gtest/gtest.h"