mirror of
https://github.com/aaru-dps/Aaru.Compression.Native.git
synced 2025-12-16 11:14:30 +00:00
Add ARC methods 8 (Crunch) and 9 (squash), 12-bit and 13-bit Dynamic LZW.
This commit is contained in:
@@ -142,7 +142,8 @@ add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h a
|
||||
zoo/maketbl.c
|
||||
arc/pack.c
|
||||
arc/squeeze.c
|
||||
arc/crunch.c)
|
||||
arc/crunch.c
|
||||
arc/lzw.c)
|
||||
|
||||
include(3rdparty/bzip2.cmake)
|
||||
include(3rdparty/flac.cmake)
|
||||
|
||||
270
arc/lzw.c
Normal file
270
arc/lzw.c
Normal file
@@ -0,0 +1,270 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
* Copyright © 2018-2019 David Ryskalczyk
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../library.h"
|
||||
|
||||
#define CRBITS 12 // Max bits for crunching.
|
||||
#define SQBITS 13 // Max bits for squashing.
|
||||
#define INIT_BITS 9 // Initial number of bits per code.
|
||||
#define MAXCODE(n) ((1 << (n)) - 1) // Macro to calculate max code for n bits.
|
||||
#define FIRST 257 // First available code.
|
||||
#define CLEAR 256 // Code to clear the dictionary.
|
||||
|
||||
// LZW decompression state variables.
|
||||
static int Bits;
|
||||
static int max_maxcode;
|
||||
static int n_bits;
|
||||
static int maxcode;
|
||||
static int clear_flg;
|
||||
static int free_ent;
|
||||
static unsigned short *prefix;
|
||||
static unsigned char *suffix;
|
||||
static unsigned char *stack;
|
||||
|
||||
// Buffer management variables.
|
||||
static const unsigned char *in_buf_ptr;
|
||||
static size_t in_len_rem;
|
||||
static int offset;
|
||||
static char buf[SQBITS];
|
||||
|
||||
// Reads a variable-length code from the input buffer.
|
||||
static int getcode()
|
||||
{
|
||||
int code;
|
||||
static int size = 0;
|
||||
int r_off, bits;
|
||||
unsigned char *bp = (unsigned char *)buf;
|
||||
|
||||
// Check if we need to increase code size or handle a clear flag.
|
||||
if(clear_flg > 0 || offset >= size || free_ent > maxcode)
|
||||
{
|
||||
if(free_ent > maxcode)
|
||||
{
|
||||
n_bits++;
|
||||
if(n_bits == Bits)
|
||||
maxcode = max_maxcode;
|
||||
else
|
||||
maxcode = MAXCODE(n_bits);
|
||||
}
|
||||
if(clear_flg > 0)
|
||||
{
|
||||
maxcode = MAXCODE(n_bits = INIT_BITS);
|
||||
clear_flg = 0;
|
||||
}
|
||||
// Read n_bits bytes into the buffer.
|
||||
for(size = 0; size < n_bits; size++)
|
||||
{
|
||||
if(in_len_rem == 0)
|
||||
{
|
||||
code = -1;
|
||||
break;
|
||||
}
|
||||
code = *in_buf_ptr++;
|
||||
in_len_rem--;
|
||||
buf[size] = (char)code;
|
||||
}
|
||||
if(size <= 0) return -1; // End of file.
|
||||
|
||||
offset = 0;
|
||||
size = (size << 3) - (n_bits - 1);
|
||||
}
|
||||
r_off = offset;
|
||||
bits = n_bits;
|
||||
|
||||
// Extract the code from the buffer.
|
||||
bp += (r_off >> 3);
|
||||
r_off &= 7;
|
||||
|
||||
code = (*bp++ >> r_off);
|
||||
bits -= 8 - r_off;
|
||||
r_off = 8 - r_off;
|
||||
|
||||
if(bits >= 8)
|
||||
{
|
||||
code |= *bp++ << r_off;
|
||||
r_off += 8;
|
||||
bits -= 8;
|
||||
}
|
||||
code |= (*bp & ((1 << bits) - 1)) << r_off;
|
||||
offset += n_bits;
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
// Main LZW decompression logic.
|
||||
static int arc_decompress_lzw(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len,
|
||||
int squash)
|
||||
{
|
||||
// Basic validation of pointers.
|
||||
if(!in_buf || !out_buf || !out_len) { return -1; }
|
||||
|
||||
// Initialize buffer pointers and lengths.
|
||||
in_buf_ptr = in_buf;
|
||||
in_len_rem = in_len;
|
||||
|
||||
// Set parameters based on whether we're unsquashing or uncrushing.
|
||||
if(squash) { Bits = SQBITS; }
|
||||
else
|
||||
{
|
||||
Bits = CRBITS;
|
||||
if(in_len_rem > 0)
|
||||
{
|
||||
// Crunch format has a header byte indicating max bits.
|
||||
if(*in_buf_ptr != CRBITS) return -1;
|
||||
in_buf_ptr++;
|
||||
in_len_rem--;
|
||||
}
|
||||
}
|
||||
|
||||
if(in_len_rem <= 0)
|
||||
{
|
||||
*out_len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initialize LZW parameters.
|
||||
max_maxcode = 1 << Bits;
|
||||
clear_flg = 0;
|
||||
n_bits = INIT_BITS;
|
||||
maxcode = MAXCODE(n_bits);
|
||||
|
||||
// Allocate memory for LZW tables.
|
||||
prefix = (unsigned short *)malloc(max_maxcode * sizeof(unsigned short));
|
||||
suffix = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
|
||||
stack = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
|
||||
|
||||
if(!prefix || !suffix || !stack)
|
||||
{
|
||||
if(prefix) free(prefix);
|
||||
if(suffix) free(suffix);
|
||||
if(stack) free(stack);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize the first 256 entries of the dictionary.
|
||||
memset(prefix, 0, 256 * sizeof(unsigned short));
|
||||
for(int code = 255; code >= 0; code--) { suffix[code] = (unsigned char)code; }
|
||||
|
||||
free_ent = FIRST;
|
||||
offset = 0;
|
||||
|
||||
// Main decompression loop.
|
||||
int finchar, oldcode, incode;
|
||||
finchar = oldcode = getcode();
|
||||
if(oldcode == -1)
|
||||
{
|
||||
*out_len = 0;
|
||||
free(prefix);
|
||||
free(suffix);
|
||||
free(stack);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t out_pos = 0;
|
||||
if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
|
||||
|
||||
unsigned char *stackp = stack;
|
||||
int code;
|
||||
while((code = getcode()) > -1)
|
||||
{
|
||||
if(code == CLEAR)
|
||||
{
|
||||
// Clear the dictionary.
|
||||
memset(prefix, 0, 256 * sizeof(unsigned short));
|
||||
clear_flg = 1;
|
||||
free_ent = FIRST - 1;
|
||||
if((code = getcode()) == -1) break;
|
||||
}
|
||||
incode = code;
|
||||
// Handle KwKwK case.
|
||||
if(code >= free_ent)
|
||||
{
|
||||
if(code > free_ent)
|
||||
{
|
||||
// Error: invalid code.
|
||||
break;
|
||||
}
|
||||
*stackp++ = finchar;
|
||||
code = oldcode;
|
||||
}
|
||||
// Decode the string by traversing the dictionary.
|
||||
while(code >= 256)
|
||||
{
|
||||
*stackp++ = suffix[code];
|
||||
code = prefix[code];
|
||||
}
|
||||
*stackp++ = finchar = suffix[code];
|
||||
|
||||
// Write the decoded string to the output buffer.
|
||||
do {
|
||||
if(out_pos < *out_len) { out_buf[out_pos++] = *--stackp; }
|
||||
else
|
||||
{
|
||||
stackp--; // Discard if output buffer is full.
|
||||
}
|
||||
} while(stackp > stack);
|
||||
|
||||
// Add the new string to the dictionary.
|
||||
if((code = free_ent) < max_maxcode)
|
||||
{
|
||||
prefix[code] = (unsigned short)oldcode;
|
||||
suffix[code] = finchar;
|
||||
free_ent = code + 1;
|
||||
}
|
||||
oldcode = incode;
|
||||
}
|
||||
|
||||
// Clean up and return.
|
||||
*out_len = out_pos;
|
||||
free(prefix);
|
||||
free(suffix);
|
||||
free(stack);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Decompresses squashed data.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
return arc_decompress_lzw(in_buf, in_len, out_buf, out_len, 1);
|
||||
}
|
||||
|
||||
// Decompresses crunched data.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
// Allocate a temporary buffer.
|
||||
size_t temp_len = *out_len * 2; // Heuristic.
|
||||
unsigned char *temp_buf = malloc(temp_len);
|
||||
if(!temp_buf) return -1;
|
||||
|
||||
// Decompress crunched data.
|
||||
int result = arc_decompress_lzw(in_buf, in_len, temp_buf, &temp_len, 0);
|
||||
if(result == 0)
|
||||
{
|
||||
// Decompress non-repeat packing.
|
||||
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
||||
}
|
||||
|
||||
free(temp_buf);
|
||||
return result;
|
||||
}
|
||||
@@ -134,4 +134,12 @@ AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_b
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len);
|
||||
|
||||
// Method 8: Dynamic LZW (crunching)
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len);
|
||||
|
||||
// Method 9: Dynamic LZW with 13 bits (squashing)
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len);
|
||||
|
||||
#endif // AARU_COMPRESSION_NATIVE_LIBRARY_H
|
||||
|
||||
@@ -51,9 +51,18 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcpack.bin
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsqueeze.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunchnr.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunch_dynamic.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsquash.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
# 'Google_Tests_run' is the target name
|
||||
# 'test1.cpp tests2.cpp' are source files with tests
|
||||
add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp
|
||||
zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp
|
||||
arc/crunch.cpp)
|
||||
zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp arc/crunch.cpp
|
||||
arc/squash.cpp)
|
||||
target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native")
|
||||
|
||||
@@ -77,5 +77,57 @@ TEST_F(crunchFixture, crunch)
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
|
||||
class crunchDynamicFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
crunchDynamicFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/arccrunch_dynamic.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(73189);
|
||||
fread((void *)buffer, 1, 73189, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~crunchDynamicFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(crunchDynamicFixture, crunchDynamic)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 73189;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = arc_decompress_crunch_dynamic(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
81
tests/arc/squash.cpp
Normal file
81
tests/arc/squash.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class squashFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
squashFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/arcsquash.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(67308);
|
||||
fread((void *)buffer, 1, 67308, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~squashFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(squashFixture, squash)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 67308;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = arc_decompress_squash(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
BIN
tests/data/arccrunch_dynamic.bin
Executable file
BIN
tests/data/arccrunch_dynamic.bin
Executable file
Binary file not shown.
BIN
tests/data/arcsquash.bin
Executable file
BIN
tests/data/arcsquash.bin
Executable file
Binary file not shown.
Reference in New Issue
Block a user