8 Commits

23 changed files with 5169 additions and 162 deletions

View File

@@ -2,7 +2,7 @@
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
<metadata>
<id>Aaru.Compression.Native</id>
<version>6.0.0-alpha.11.2</version>
<version>6.0.0-alpha.11.3</version>
<description>C implementation of compression algorithms used by Aaru.</description>
<authors>claunia</authors>
<projectUrl>https://github.com/aaru-dps/Aaru.Compression.Native</projectUrl>

View File

@@ -139,7 +139,11 @@ add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h a
zoo/lh5.c
zoo/lzh.h
zoo/ar.h
zoo/maketbl.c)
zoo/maketbl.c
arc/pack.c
arc/squeeze.c
arc/crunch.c
arc/lzw.c)
include(3rdparty/bzip2.cmake)
include(3rdparty/flac.cmake)

295
arc/crunch.c Normal file
View File

@@ -0,0 +1,295 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
* Copyright © 2018-2019 David Ryskalczyk
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "../library.h"
#define FALSE 0
#define TRUE !FALSE
#define TABSIZE 4096 // Size of the string table.
#define NO_PRED 0xFFFF // Indicates no predecessor in the string table.
#define EMPTY 0xFFFF // Indicates an empty stack.
typedef unsigned char u_char;
typedef unsigned short u_short;
// Entry in the string table.
struct entry
{
char used; // Is this entry in use?
u_char follower; // The character that follows the string.
u_short next; // Next entry in a collision chain.
u_short predecessor; // Code for the preceding string.
};
// Static variables for decompression state.
static struct entry *string_tab;
static u_char *stack;
static int sp;
// Buffer management variables.
static const u_char *in_buf_ptr;
static size_t in_len_rem;
static int inflag;
// Pointer to the hash function to use.
static u_short (*h)(u_short, u_char);
// Original hash function from ARC.
static u_short oldh(u_short pred, u_char foll)
{
long local;
local = ((pred + foll) | 0x0800) & 0xFFFF;
local *= local;
return (local >> 6) & 0x0FFF;
}
// Newer, faster hash function.
static u_short newh(u_short pred, u_char foll) { return (((pred + foll) & 0xFFFF) * 15073) & 0xFFF; }
// Finds the end of a collision list.
static u_short eolist(u_short index)
{
int temp;
while((temp = string_tab[index].next)) index = temp;
return index;
}
// Hashes a string to find its position in the table.
static u_short hash_it(u_short pred, u_char foll)
{
u_short local, tempnext;
struct entry *ep;
local = (*h)(pred, foll);
if(!string_tab[local].used)
return local;
else
{
local = eolist(local);
tempnext = (local + 101) & 0x0FFF;
ep = &string_tab[tempnext];
while(ep->used)
{
if(++tempnext == TABSIZE)
{
tempnext = 0;
ep = string_tab;
}
else
++ep;
}
string_tab[local].next = tempnext;
return tempnext;
}
}
// Adds a new string to the table.
static void upd_tab(u_short pred, u_short foll)
{
struct entry *ep;
ep = &string_tab[hash_it(pred, foll)];
ep->used = TRUE;
ep->next = 0;
ep->predecessor = pred;
ep->follower = foll;
}
// Initializes the string table.
static void init_tab()
{
memset((char *)string_tab, 0, TABSIZE * sizeof(struct entry));
for(unsigned int i = 0; i < 256; i++) upd_tab(NO_PRED, i);
}
// Reads a 12-bit code from the input buffer.
static int get_code()
{
int code;
if(in_len_rem < 2) return -1;
if((inflag ^= 1))
{
code = (*in_buf_ptr++ << 4);
code |= (*in_buf_ptr >> 4);
in_len_rem--;
}
else
{
code = (*in_buf_ptr++ & 0x0f) << 8;
code |= (*in_buf_ptr++);
in_len_rem -= 2;
}
return code;
}
// Pushes a character onto the stack.
#define PUSH(c) \
do { \
stack[sp] = ((char)(c)); \
if(++sp >= TABSIZE) return -1; \
} while(0)
// Pops a character from the stack.
#define POP() ((sp > 0) ? (int)stack[--sp] : EMPTY)
// Internal crunch decompression logic.
static int arc_decompress_crunch_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len, int new_hash)
{
// Basic validation of pointers.
if(!in_buf || !out_buf || !out_len) { return -1; }
// Allocate memory for tables.
string_tab = (struct entry *)malloc(TABSIZE * sizeof(struct entry));
stack = (u_char *)malloc(TABSIZE * sizeof(u_char));
if(!string_tab || !stack)
{
if(string_tab) free(string_tab);
if(stack) free(stack);
return -1;
}
// Select the hash function.
if(new_hash)
h = newh;
else
h = oldh;
// Initialize state.
sp = 0;
init_tab();
int code_count = TABSIZE - 256;
in_buf_ptr = in_buf;
in_len_rem = in_len;
inflag = 0;
// Main decompression loop.
int oldcode = get_code();
if(oldcode == -1)
{
*out_len = 0;
free(string_tab);
free(stack);
return 0;
}
int finchar = string_tab[oldcode].follower;
size_t out_pos = 0;
if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
int newcode;
while((newcode = get_code()) != -1)
{
int code = newcode;
struct entry *ep = &string_tab[code];
// Handle unknown codes and KwKwK case.
if(!ep->used)
{
code = oldcode;
ep = &string_tab[code];
PUSH(finchar);
}
// Decode the string by traversing the table.
while(ep->predecessor != NO_PRED)
{
PUSH(ep->follower);
code = ep->predecessor;
ep = &string_tab[code];
}
PUSH(finchar = ep->follower);
// Add the new string to the table if there's room.
if(code_count)
{
upd_tab(oldcode, finchar);
--code_count;
}
oldcode = newcode;
// Write the decoded string to the output buffer.
while(sp > 0)
{
int c = POP();
if(c == EMPTY) break;
if(out_pos < *out_len) { out_buf[out_pos++] = (unsigned char)c; }
}
}
// Clean up and return.
*out_len = out_pos;
free(string_tab);
free(stack);
return 0;
}
// Decompresses crunched data.
AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len)
{
return arc_decompress_crunch_internal(in_buf, in_len, out_buf, out_len, 0);
}
// Decompresses crunched data with non-repeat packing.
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len,
unsigned char *out_buf, size_t *out_len)
{
// Allocate a temporary buffer for the intermediate decompressed data.
size_t temp_len = *out_len * 2; // Heuristic for temp buffer size.
unsigned char *temp_buf = malloc(temp_len);
if(!temp_buf) return -1;
// First, decompress the crunched data.
int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 0);
if(result == 0)
{
// Then, decompress the non-repeat packing.
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
}
free(temp_buf);
return result;
}
// Decompresses crunched data with non-repeat packing and the new hash function.
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len,
unsigned char *out_buf, size_t *out_len)
{
// Allocate a temporary buffer.
size_t temp_len = *out_len * 2; // Heuristic.
unsigned char *temp_buf = malloc(temp_len);
if(!temp_buf) return -1;
// Decompress crunched data with the new hash.
int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 1);
if(result == 0)
{
// Decompress non-repeat packing.
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
}
free(temp_buf);
return result;
}

271
arc/lzw.c Normal file
View File

@@ -0,0 +1,271 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
* Copyright © 2018-2019 David Ryskalczyk
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include "../library.h"
#define CRBITS 12 // Max bits for crunching.
#define SQBITS 13 // Max bits for squashing.
#define INIT_BITS 9 // Initial number of bits per code.
#define MAXCODE(n) ((1 << (n)) - 1) // Macro to calculate max code for n bits.
#define FIRST 257 // First available code.
#define CLEAR 256 // Code to clear the dictionary.
// LZW decompression state variables.
static int Bits;
static int max_maxcode;
static int n_bits;
static int maxcode;
static int clear_flg;
static int free_ent;
static unsigned short *prefix;
static unsigned char *suffix;
static unsigned char *stack;
// Buffer management variables.
static const unsigned char *in_buf_ptr;
static size_t in_len_rem;
static int offset;
static char buf[SQBITS];
// Reads a variable-length code from the input buffer.
static int getcode()
{
int code;
static int size = 0;
int r_off, bits;
unsigned char *bp = (unsigned char *)buf;
// Check if we need to increase code size or handle a clear flag.
if(clear_flg > 0 || offset >= size || free_ent > maxcode)
{
if(free_ent > maxcode)
{
n_bits++;
if(n_bits == Bits)
maxcode = max_maxcode;
else
maxcode = MAXCODE(n_bits);
}
if(clear_flg > 0)
{
maxcode = MAXCODE(n_bits = INIT_BITS);
clear_flg = 0;
}
// Read n_bits bytes into the buffer.
for(size = 0; size < n_bits; size++)
{
if(in_len_rem == 0)
{
code = -1;
break;
}
code = *in_buf_ptr++;
in_len_rem--;
buf[size] = (char)code;
}
if(size <= 0) return -1; // End of file.
offset = 0;
size = (size << 3) - (n_bits - 1);
}
r_off = offset;
bits = n_bits;
// Extract the code from the buffer.
bp += (r_off >> 3);
r_off &= 7;
code = (*bp++ >> r_off);
bits -= 8 - r_off;
r_off = 8 - r_off;
if(bits >= 8)
{
code |= *bp++ << r_off;
r_off += 8;
bits -= 8;
}
code |= (*bp & ((1 << bits) - 1)) << r_off;
offset += n_bits;
return code;
}
// Main LZW decompression logic.
static int arc_decompress_lzw(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len,
int squash)
{
// Basic validation of pointers.
if(!in_buf || !out_buf || !out_len) { return -1; }
// Initialize buffer pointers and lengths.
in_buf_ptr = in_buf;
in_len_rem = in_len;
// Set parameters based on whether we're unsquashing or uncrushing.
if(squash) { Bits = SQBITS; }
else
{
Bits = CRBITS;
if(in_len_rem > 0)
{
// Crunch format has a header byte indicating max bits.
if(*in_buf_ptr != CRBITS) return -1;
in_buf_ptr++;
in_len_rem--;
}
}
if(in_len_rem <= 0)
{
*out_len = 0;
return 0;
}
// Initialize LZW parameters.
max_maxcode = 1 << Bits;
clear_flg = 0;
n_bits = INIT_BITS;
maxcode = MAXCODE(n_bits);
// Allocate memory for LZW tables.
prefix = (unsigned short *)malloc(max_maxcode * sizeof(unsigned short));
suffix = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
stack = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
if(!prefix || !suffix || !stack)
{
if(prefix) free(prefix);
if(suffix) free(suffix);
if(stack) free(stack);
return -1;
}
// Initialize the first 256 entries of the dictionary.
memset(prefix, 0, 256 * sizeof(unsigned short));
for(int code = 255; code >= 0; code--) { suffix[code] = (unsigned char)code; }
free_ent = FIRST;
offset = 0;
// Main decompression loop.
int finchar, oldcode, incode;
finchar = oldcode = getcode();
if(oldcode == -1)
{
*out_len = 0;
free(prefix);
free(suffix);
free(stack);
return 0;
}
size_t out_pos = 0;
if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
unsigned char *stackp = stack;
int code;
while((code = getcode()) > -1)
{
if(code == CLEAR)
{
// Clear the dictionary.
memset(prefix, 0, 256 * sizeof(unsigned short));
clear_flg = 1;
free_ent = FIRST - 1;
if((code = getcode()) == -1) break;
}
incode = code;
// Handle KwKwK case.
if(code >= free_ent)
{
if(code > free_ent)
{
// Error: invalid code.
break;
}
*stackp++ = finchar;
code = oldcode;
}
// Decode the string by traversing the dictionary.
while(code >= 256)
{
*stackp++ = suffix[code];
code = prefix[code];
}
*stackp++ = finchar = suffix[code];
// Write the decoded string to the output buffer.
do {
if(out_pos < *out_len) { out_buf[out_pos++] = *--stackp; }
else
{
stackp--; // Discard if output buffer is full.
}
} while(stackp > stack);
// Add the new string to the dictionary.
if((code = free_ent) < max_maxcode)
{
prefix[code] = (unsigned short)oldcode;
suffix[code] = finchar;
free_ent = code + 1;
}
oldcode = incode;
}
// Clean up and return.
*out_len = out_pos;
free(prefix);
free(suffix);
free(stack);
return 0;
}
// Decompresses squashed data.
AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len)
{
return arc_decompress_lzw(in_buf, in_len, out_buf, out_len, 1);
}
// Decompresses crunched data.
AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
unsigned char *out_buf, size_t *out_len)
{
// Allocate a temporary buffer.
size_t temp_len = *out_len * 2; // Heuristic.
unsigned char *temp_buf = malloc(temp_len);
if(!temp_buf) return -1;
// Decompress crunched data.
int result = arc_decompress_lzw(in_buf, in_len, temp_buf, &temp_len, 0);
if(result == 0)
{
// Decompress non-repeat packing.
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
}
free(temp_buf);
return result;
}

78
arc/pack.c Normal file
View File

@@ -0,0 +1,78 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
* Copyright © 2018-2019 David Ryskalczyk
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
#include <string.h>
#include "../library.h"
#define DLE 0x90 // Data Link Escape character, used as a repeat marker.
// Decompresses data using non-repeat packing.
// This algorithm encodes runs of identical bytes.
AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
{
// Basic validation of pointers.
if(!in_buf || !out_buf || !out_len) { return -1; }
size_t in_pos = 0;
size_t out_pos = 0;
unsigned char state = 0; // 0 for normal (NOHIST), 1 for in-repeat (INREP).
unsigned char lastc = 0; // Last character seen.
// Loop through the input buffer until it's exhausted or the output buffer is full.
while(in_pos < in_len && out_pos < *out_len)
{
if(state == 1)
{ // We are in a repeat sequence.
if(in_buf[in_pos])
{ // The byte after DLE is the repeat count.
unsigned char count = in_buf[in_pos];
// Write the last character 'count' times.
while(--count && out_pos < *out_len) { out_buf[out_pos++] = lastc; }
}
else
{ // A count of 0 means the DLE character itself should be written.
if(out_pos < *out_len) { out_buf[out_pos++] = DLE; }
}
state = 0; // Return to normal state.
in_pos++;
}
else
{ // Normal state.
if(in_buf[in_pos] != DLE)
{ // Not a repeat sequence.
if(out_pos < *out_len)
{
// Copy the character and save it as the last character.
out_buf[out_pos++] = lastc = in_buf[in_pos];
}
}
else
{ // DLE marks the start of a repeat sequence.
state = 1; // Enter repeat state.
}
in_pos++;
}
}
// Update the output length to the number of bytes written.
*out_len = out_pos;
// Return success.
return 0;
}

148
arc/squeeze.c Normal file
View File

@@ -0,0 +1,148 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
* Copyright © 2018-2019 David Ryskalczyk
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include "../library.h"
#define SPEOF 256 // Special end-of-file token.
#define NUMVALS 257 // Number of values in the Huffman tree (256 chars + SPEOF).
// Node structure for the Huffman decoding tree.
struct nd
{
int child[2]; // Children of the node.
};
// Static variables for the decompression state.
static struct nd nodes[NUMVALS]; // The Huffman tree.
static int numnodes; // Number of nodes in the tree.
static int bpos; // Bit position in the current byte.
static unsigned char curin; // Current byte being read.
// Pointers for buffer management.
static const unsigned char *in_buf_ptr;
static size_t in_len_rem;
static unsigned char *out_buf_ptr;
static size_t out_len_rem;
// Reads a byte from the input buffer.
static int get_byte()
{
if(in_len_rem == 0) { return EOF; }
in_len_rem--;
return *in_buf_ptr++;
}
static int arc_decompress_huffman(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
{
// Basic validation of pointers.
if(!in_buf || !out_buf || !out_len) { return -1; }
// Initialize buffer pointers and lengths.
in_buf_ptr = in_buf;
in_len_rem = in_len;
out_buf_ptr = out_buf;
out_len_rem = *out_len;
bpos = 99; // Force initial read.
// Read the number of nodes in the Huffman tree.
if(in_len_rem < 2) return -1;
numnodes = get_byte();
numnodes |= get_byte() << 8;
if(numnodes < 0 || numnodes >= NUMVALS)
{
return -1; // Invalid tree.
}
// ARC: initialize for possible empty tree (SPEOF only)
nodes[0].child[0] = -(SPEOF + 1);
nodes[0].child[1] = -(SPEOF + 1);
// Read the Huffman tree from the input buffer, sign-extend 16-bit values
for(int i = 0; i < numnodes; ++i)
{
if(in_len_rem < 4) return -1;
uint8_t b0 = get_byte();
uint8_t b1 = get_byte();
uint8_t b2 = get_byte();
uint8_t b3 = get_byte();
nodes[i].child[0] = (int16_t)((b0) | (b1 << 8));
nodes[i].child[1] = (int16_t)((b2) | (b3 << 8));
}
size_t written = 0;
// bpos is already 99 from init
while(written < *out_len)
{
int i = 0;
// follow bit stream in tree to a leaf
while(i >= 0)
{
if(++bpos > 7)
{
int c = get_byte();
if(c == EOF)
{
*out_len = written;
return 0; // End of input
}
curin = c;
bpos = 0;
// move a level deeper in tree
i = nodes[i].child[curin & 1];
}
else { i = nodes[i].child[1 & (curin >>= 1)]; }
}
// decode fake node index to original data value
int value = -(i + 1);
if(value == SPEOF)
{
break; // End of data
}
*out_buf_ptr++ = value;
written++;
}
*out_len = written;
return 0;
}
// Decompresses data using Huffman squeezing.
AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len)
{
size_t temp_len = *out_len * 2;
unsigned char *temp_buf = malloc(temp_len);
if(!temp_buf) return -1;
int result = arc_decompress_huffman(in_buf, in_len, temp_buf, &temp_len);
if(result == 0) { result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); }
free(temp_buf);
return result;
}

View File

@@ -179,7 +179,7 @@ docker/dockcross-win-arm cmake -DCMAKE_BUILD_TYPE=Release -DAARU_BUILD_PACKAGE=1
sed -e 's/\-soname,libAaru\.Compression\.Native\.so//g' ./CMakeFiles/Aaru.Compression.Native.dir/link.txt > link.txt
mv link.txt ./CMakeFiles/Aaru.Compression.Native.dir/link.txt
docker/dockcross-win-arm make Aaru.Compression.Native
mv libAaru.Compression.Native.so runtimes/win-arm/native/libAaru.Compression.Native.dll
mv libAaru.Compression.Native.dll runtimes/win-arm/native/
## Windows (ARM64)
# Detected system processor: aarch64
@@ -191,7 +191,7 @@ docker/dockcross-win-arm64 cmake -DCMAKE_BUILD_TYPE=Release -DAARU_BUILD_PACKAGE
sed -e 's/\-soname,libAaru\.Compression\.Native\.so//g' ./CMakeFiles/Aaru.Compression.Native.dir/link.txt > link.txt
mv link.txt ./CMakeFiles/Aaru.Compression.Native.dir/link.txt
docker/dockcross-win-arm64 make Aaru.Compression.Native
mv libAaru.Compression.Native.so runtimes/win-arm64/native/libAaru.Compression.Native.dll
mv libAaru.Compression.Native.dll runtimes/win-arm64/native/
## Windows (AMD64)
# Detected system processor: x86_64

View File

@@ -118,4 +118,28 @@ AARU_EXPORT int AARU_CALL lh5_decompress(const uint8_t *in_buf, size_t in_len, u
AARU_EXPORT uint64_t AARU_CALL AARU_get_acn_version();
// ARC method 3: Stored with non-repeat packing
AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len);
// ARC method 4: Huffman squeezing
AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len);
// Method 5: LZW (crunching)
AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len);
// Method 6: LZW with non-repeat packing (crunching)
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len,
unsigned char *out_buf, size_t *out_len);
// Method 7: LZW with non-repeat packing and new hash (Crunching)
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len,
unsigned char *out_buf, size_t *out_len);
// Method 8: Dynamic LZW (crunching)
AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
unsigned char *out_buf, size_t *out_len);
// Method 9: Dynamic LZW with 13 bits (squashing)
AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
size_t *out_len);
#endif // AARU_COMPRESSION_NATIVE_LIBRARY_H

View File

@@ -45,9 +45,24 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lzd
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lh5
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcpack.bin
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsqueeze.bin
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunchnr.bin
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunch_dynamic.bin
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsquash.bin
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
# 'Google_Tests_run' is the target name
# 'test1.cpp tests2.cpp' are source files with tests
add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp
zoo/lzd.cpp
lh5.cpp)
zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp arc/crunch.cpp
arc/squash.cpp)
target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native")

133
tests/arc/crunch.cpp Normal file
View File

@@ -0,0 +1,133 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <climits>
#include <cstddef>
#include <cstdint>
#include "../../library.h"
#include "../crc32.h"
#include "gtest/gtest.h"
#define EXPECTED_CRC32 0x66007dba
static const uint8_t *buffer;
class crunchFixture : public ::testing::Test
{
public:
crunchFixture()
{
// initialization;
// can also be done in SetUp()
}
protected:
void SetUp()
{
char path[PATH_MAX];
char filename[PATH_MAX];
getcwd(path, PATH_MAX);
snprintf(filename, PATH_MAX, "%s/data/arccrunchnr.bin", path);
FILE *file = fopen(filename, "rb");
buffer = (const uint8_t *)malloc(72537);
fread((void *)buffer, 1, 72537, file);
fclose(file);
}
void TearDown() { free((void *)buffer); }
~crunchFixture()
{
// resources cleanup, no exceptions allowed
}
// shared user data
};
TEST_F(crunchFixture, crunch)
{
size_t destLen = 152089;
size_t srcLen = 72537;
auto *outBuf = (uint8_t *)malloc(152089);
auto err = arc_decompress_crunch_nrpack(buffer, srcLen, outBuf, &destLen);
EXPECT_EQ(err, 0);
EXPECT_EQ(destLen, 152089);
auto crc = crc32_data(outBuf, 152089);
free(outBuf);
EXPECT_EQ(crc, EXPECTED_CRC32);
}
class crunchDynamicFixture : public ::testing::Test
{
public:
crunchDynamicFixture()
{
// initialization;
// can also be done in SetUp()
}
protected:
void SetUp()
{
char path[PATH_MAX];
char filename[PATH_MAX];
getcwd(path, PATH_MAX);
snprintf(filename, PATH_MAX, "%s/data/arccrunch_dynamic.bin", path);
FILE *file = fopen(filename, "rb");
buffer = (const uint8_t *)malloc(73189);
fread((void *)buffer, 1, 73189, file);
fclose(file);
}
void TearDown() { free((void *)buffer); }
~crunchDynamicFixture()
{
// resources cleanup, no exceptions allowed
}
// shared user data
};
TEST_F(crunchDynamicFixture, crunchDynamic)
{
size_t destLen = 152089;
size_t srcLen = 73189;
auto *outBuf = (uint8_t *)malloc(152089);
auto err = arc_decompress_crunch_dynamic(buffer, srcLen, outBuf, &destLen);
EXPECT_EQ(err, 0);
EXPECT_EQ(destLen, 152089);
auto crc = crc32_data(outBuf, 152089);
free(outBuf);
EXPECT_EQ(crc, EXPECTED_CRC32);
}

82
tests/arc/pack.cpp Normal file
View File

@@ -0,0 +1,82 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <climits>
#include <cstddef>
#include <cstdint>
#include "../../library.h"
#include "../crc32.h"
#include "gtest/gtest.h"
#define EXPECTED_CRC32 0x66007dba
static const uint8_t *buffer;
class packFixture : public ::testing::Test
{
public:
packFixture()
{
// initialization;
// can also be done in SetUp()
}
protected:
void SetUp()
{
char path[PATH_MAX];
char filename[PATH_MAX];
getcwd(path, PATH_MAX);
snprintf(filename, PATH_MAX, "%s/data/arcpack.bin", path);
FILE *file = fopen(filename, "rb");
buffer = (const uint8_t *)malloc(149855);
fread((void *)buffer, 1, 149855, file);
fclose(file);
}
void TearDown() { free((void *)buffer); }
~packFixture()
{
// resources cleanup, no exceptions allowed
}
// shared user data
};
TEST_F(packFixture, pack)
{
uint8_t params[] = {0x5D, 0x00, 0x00, 0x00, 0x02};
size_t destLen = 152089;
size_t srcLen = 149855;
auto *outBuf = (uint8_t *)malloc(152089);
auto err = arc_decompress_pack(buffer, srcLen, outBuf, &destLen);
EXPECT_EQ(err, 0);
EXPECT_EQ(destLen, 152089);
auto crc = crc32_data(outBuf, 152089);
free(outBuf);
EXPECT_EQ(crc, EXPECTED_CRC32);
}

81
tests/arc/squash.cpp Normal file
View File

@@ -0,0 +1,81 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <climits>
#include <cstddef>
#include <cstdint>
#include "../../library.h"
#include "../crc32.h"
#include "gtest/gtest.h"
#define EXPECTED_CRC32 0x66007dba
static const uint8_t *buffer;
class squashFixture : public ::testing::Test
{
public:
squashFixture()
{
// initialization;
// can also be done in SetUp()
}
protected:
void SetUp()
{
char path[PATH_MAX];
char filename[PATH_MAX];
getcwd(path, PATH_MAX);
snprintf(filename, PATH_MAX, "%s/data/arcsquash.bin", path);
FILE *file = fopen(filename, "rb");
buffer = (const uint8_t *)malloc(67308);
fread((void *)buffer, 1, 67308, file);
fclose(file);
}
void TearDown() { free((void *)buffer); }
~squashFixture()
{
// resources cleanup, no exceptions allowed
}
// shared user data
};
TEST_F(squashFixture, squash)
{
size_t destLen = 152089;
size_t srcLen = 67308;
auto *outBuf = (uint8_t *)malloc(152089);
auto err = arc_decompress_squash(buffer, srcLen, outBuf, &destLen);
EXPECT_EQ(err, 0);
EXPECT_EQ(destLen, 152089);
auto crc = crc32_data(outBuf, 152089);
free(outBuf);
EXPECT_EQ(crc, EXPECTED_CRC32);
}

81
tests/arc/squeeze.cpp Normal file
View File

@@ -0,0 +1,81 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <climits>
#include <cstddef>
#include <cstdint>
#include "../../library.h"
#include "../crc32.h"
#include "gtest/gtest.h"
#define EXPECTED_CRC32 0x66007dba
static const uint8_t *buffer;
class squeezeFixture : public ::testing::Test
{
public:
squeezeFixture()
{
// initialization;
// can also be done in SetUp()
}
protected:
void SetUp()
{
char path[PATH_MAX];
char filename[PATH_MAX];
getcwd(path, PATH_MAX);
snprintf(filename, PATH_MAX, "%s/data/arcsqueeze.bin", path);
FILE *file = fopen(filename, "rb");
buffer = (const uint8_t *)malloc(88044);
fread((void *)buffer, 1, 88044, file);
fclose(file);
}
void TearDown() { free((void *)buffer); }
~squeezeFixture()
{
// resources cleanup, no exceptions allowed
}
// shared user data
};
TEST_F(squeezeFixture, squeeze)
{
size_t destLen = 152089;
size_t srcLen = 88044;
auto *outBuf = (uint8_t *)malloc(152089);
auto err = arc_decompress_squeeze(buffer, srcLen, outBuf, &destLen);
EXPECT_EQ(err, 0);
EXPECT_EQ(destLen, 152089);
auto crc = crc32_data(outBuf, 152089);
free(outBuf);
EXPECT_EQ(crc, EXPECTED_CRC32);
}

BIN
tests/data/arccrunch_dynamic.bin Executable file

Binary file not shown.

BIN
tests/data/arccrunchnr.bin Executable file

Binary file not shown.

3628
tests/data/arcpack.bin Executable file

File diff suppressed because it is too large Load Diff

BIN
tests/data/arcsquash.bin Executable file

Binary file not shown.

BIN
tests/data/arcsqueeze.bin Executable file

Binary file not shown.

View File

@@ -25,7 +25,7 @@
#include <sys/stat.h>
#include <unistd.h>
#include "../library.h"
#include "../../library.h"
#include "../crc32.h"
#include "gtest/gtest.h"

View File

@@ -3,63 +3,96 @@
/***********************************************************
decode.c
Adapted from "ar" archiver written by Haruhiko Okumura.
Adapted from Haruhiko Okumuras “ar” archiver. This
version has been modified in 2025 by Natalia Portillo
for in-memory decompression.
***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include <limits.h>
#include <stdint.h>
#include <limits.h> // for UCHAR_MAX
#include <stdint.h> // for fixed-width integer types
#include "ar.h"
#include "lzh.h"
#include "ar.h" // archive format constants
#include "lzh.h" // LZH-specific constants (DICSIZ, THRESHOLD, etc.)
extern int decoded; /* from huf.c */
extern int decoded; // flag set by decode_c() when end-of-stream is reached
static int j; /* remaining bytes to copy */
static int j; // number of literal/copy runs remaining from a match
/*
* decode_start()
*
* Prepare the decoder for a new file:
* - Initialize the Huffman bitstream (via huf_decode_start())
* - Reset the sliding-window copy counter `j`
* - Clear the end-of-data flag `decoded`
*/
void decode_start()
{
huf_decode_start();
j = 0;
decoded = 0;
huf_decode_start(); // reset bit-reader state
j = 0; // no pending copy runs yet
decoded = 0; // not yet at end-of-stream
}
/*
decodes; returns no. of chars decoded
*/
* decode(count, buffer)
*
* Decode up to `count` bytes (usually DICSIZ) into `buffer[]`.
* Returns the actual number of bytes written, or 0 if `decoded` is set.
*
* Slidingwindow logic:
* 1. If `j` > 0, we are in the middle of copying a previous match:
* - Copy one byte from `buffer[i]` into `buffer[r]`
* - Advance `i` (circular within DICSIZ) and `r`
* - Decrement `j` and repeat until `j` = 0 or `r` = count
* 2. Otherwise, fetch the next symbol `c = decode_c()`:
* - If `c <= UCHAR_MAX`, its a literal byte: emit it directly
* - Else its a match:
* • compute `j = match_length = c - (UCHAR_MAX + 1 - THRESHOLD)`
* • compute `i = (r - match_offset - 1) mod DICSIZ`,
* where match_offset = decode_p()
* • enter copy loop from step 1
*/
int decode(uint32_t count, uint8_t *buffer)
/* The calling function must keep the number of
bytes to be processed. This function decodes
either 'count' bytes or 'DICSIZ' bytes, whichever
is smaller, into the array 'buffer[]' of size
'DICSIZ' or more.
Call decode_start() once for each new file
before calling this function. */
{
static uint32_t i;
uint32_t r, c;
static uint32_t i; // sliding-window read index (circular)
uint32_t r; // write position in buffer
uint32_t c; // symbol or match code
r = 0;
// Step 1: finish any pending copy from a previous match
while(--j >= 0)
{
buffer[r] = buffer[i];
i = (i + 1) & (DICSIZ - 1);
if(++r == count) return r;
buffer[r] = buffer[i]; // copy one byte from history
i = (i + 1) & (DICSIZ - 1); // wrap index within [0, DICSIZ)
if(++r == count) // if output buffer is full
return r; // return bytes written so far
}
// Step 2: decode new symbols until end-of-stream or buffer full
for(;;)
{
c = decode_c();
if(decoded) return r;
c = decode_c(); // get next Huffman symbol
if(decoded) // end-of-stream marker reached
return r; // no more bytes to decode
if(c <= UCHAR_MAX)
{
buffer[r] = c;
// Literal byte: emit it directly
buffer[r] = (uint8_t)c;
if(++r == count) return r;
}
else
{
// Match sequence: compute how many bytes to copy
// j = match length
j = c - (UCHAR_MAX + 1 - THRESHOLD);
// i = start position in sliding window:
// current output position minus offset minus 1, wrapped
i = (r - decode_p() - 1) & (DICSIZ - 1);
// Copy `j` bytes from history
while(--j >= 0)
{
buffer[r] = buffer[i];
@@ -68,4 +101,4 @@ int decode(uint32_t count, uint8_t *buffer)
}
}
}
}
}

159
zoo/huf.c
View File

@@ -1,110 +1,148 @@
/*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/
/*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/
/***********************************************************
huf.c -- static Huffman
huf.c -- static Huffman decoding
Adapted from "ar" archiver written by Haruhiko Okumura.
Adapted from Haruhiko Okumuras “ar” archiver.
Modified in 2025 by Natalia Portillo for in-memory I/O.
***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include <limits.h>
#include "ar.h"
#include "lzh.h"
#include <limits.h> // UCHAR_MAX
#include "ar.h" // archive format constants
#include "lzh.h" // LZH algorithm constants (NC, DICBIT, CODE_BIT, etc.)
// NP = number of position codes = DICBIT+1
// NT = number of tree codes = CODE_BIT+3
// PBIT, TBIT = bitwidth to transmit NP/NT in header
#define NP (DICBIT + 1)
#define NT (CODE_BIT + 3)
#define PBIT 4 /* smallest integer such that (1U << PBIT) > NP */
#define TBIT 5 /* smallest integer such that (1U << TBIT) > NT */
#define PBIT 4 /* smallest bits so (1<<PBIT)>NP */
#define TBIT 5 /* smallest bits so (1<<TBIT)>NT */
// NPT = max(NP,NT) for prefixtree lengths
#if NT > NP
#define NPT NT
#else
#define NPT NP
#endif
static void read_pt_len(int, int, int);
static void read_c_len();
// forward declarations of helper routines
static void read_pt_len(int nn, int nbit, int i_special);
static void read_c_len(void);
int decoded; /* for use in decode.c */
int decoded; // flag set when end-of-stream block is seen
uint16_t left[2 * NC - 1], right[2 * NC - 1];
// Huffman tree storage arrays
// left[]/right[] store the binary tree structure for fast decoding
uint16_t left[2 * NC - 1], right[2 * NC - 1];
// c_len[] = code lengths for literal/length tree (NC symbols)
// pt_len[] = code lengths for positiontree / prefix table (NPT symbols)
// buf = temporary buffer pointer used during encoding; unused in decode
static uint8_t *buf, c_len[NC], pt_len[NPT];
// size of buf if used, and remaining symbols in current block
static uint32_t bufsiz = 0, blocksize;
static uint16_t c_freq[2 * NC - 1], c_table[4096], c_code[NC], p_freq[2 * NP - 1], pt_table[256], pt_code[NPT],
t_freq[2 * NT - 1];
/***** decoding *****/
// Frequency, code and decodetable structures
static uint16_t c_freq[2 * NC - 1], // literal/length frequency counts
c_table[4096], // fastlookup table for literal/length decoding
c_code[NC], // canonical Huffman codes for literals
p_freq[2 * NP - 1], // position frequency counts
pt_table[256], // prefixtree fast lookup (for reading code lengths)
pt_code[NPT], // canonical codes for prefixtree
t_freq[2 * NT - 1]; // temporary freq for tree of codelength codes
/***** decoding helper: read prefixtree code-lengths *****/
static void read_pt_len(int nn, int nbit, int i_special)
{
int i, c, n;
uint32_t mask;
// 1) read how many codelengths to consume
n = getbits(nbit);
if(n == 0)
{
// special case: all codelengths are identical
c = getbits(nbit);
for(i = 0; i < nn; i++) pt_len[i] = 0;
for(i = 0; i < 256; i++) pt_table[i] = c;
for(i = 0; i < nn; i++) // zero out lengths
pt_len[i] = 0;
for(i = 0; i < 256; i++) // prefixtable always returns 'c'
pt_table[i] = c;
}
else
{
// 2) read code lengths one by one
i = 0;
while(i < n)
{
// peek top 3 bits of bitbuf to guess small lengths
c = bitbuf >> (BITBUFSIZ - 3);
if(c == 7)
{
mask = (unsigned)1 << (BITBUFSIZ - 1 - 3);
// if all three bits are 1, count additional ones
mask = 1U << (BITBUFSIZ - 1 - 3);
while(mask & bitbuf)
{
mask >>= 1;
c++;
mask >>= 1;
}
}
fillbuf((c < 7) ? 3 : c - 3);
// consume the actual length bits
fillbuf((c < 7) ? 3 : (c - 3));
pt_len[i++] = c;
// at special index, read a small run of zeros
if(i == i_special)
{
c = getbits(2);
while(--c >= 0) pt_len[i++] = 0;
while(--c >= 0 && i < nn) pt_len[i++] = 0;
}
}
// any remaining symbols get codelength zero
while(i < nn) pt_len[i++] = 0;
// build fast lookup table from lengths
make_table(nn, pt_len, 8, pt_table);
}
}
static void read_c_len()
/***** decoding helper: read literal/length codelengths *****/
static void read_c_len(void)
{
int i, c, n;
uint32_t mask;
// 1) how many literal codes?
n = getbits(CBIT);
if(n == 0)
{
// all codelengths identical
c = getbits(CBIT);
for(i = 0; i < NC; i++) c_len[i] = 0;
for(i = 0; i < 4096; i++) c_table[i] = c;
}
else
{
// 2) read each code length via prefixtree
i = 0;
while(i < n)
{
// lookup next symbol in prefixtable
c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
if(c >= NT)
{
mask = (unsigned)1 << (BITBUFSIZ - 1 - 8);
// if prefix code is non-leaf, walk tree
mask = 1U << (BITBUFSIZ - 1 - 8);
do {
if(bitbuf & mask)
c = right[c];
else
c = left[c];
c = (bitbuf & mask) ? right[c] : left[c];
mask >>= 1;
} while(c >= NT);
}
// consume codelength bits
fillbuf(pt_len[c]);
// c ≤ 2: run-length encoding of zeros
if(c <= 2)
{
if(c == 0)
@@ -113,75 +151,94 @@ static void read_c_len()
c = getbits(4) + 3;
else
c = getbits(CBIT) + 20;
while(--c >= 0) c_len[i++] = 0;
while(--c >= 0 && i < NC) c_len[i++] = 0;
}
else
c_len[i++] = c - 2;
{
// real code-length = c2
c_len[i++] = (uint8_t)(c - 2);
}
}
// fill rest with zero lengths
while(i < NC) c_len[i++] = 0;
// build fast lookup for literal/length codes
make_table(NC, c_len, 12, c_table);
}
}
uint32_t decode_c()
/***** decode next literal/length symbol or end-of-block *****/
uint32_t decode_c(void)
{
uint32_t j, mask;
// if starting a new block, read its header
if(blocksize == 0)
{
blocksize = getbits(16);
blocksize = getbits(16); // block size = number of symbols
if(blocksize == 0)
{
#if 0
(void) fprintf(stderr, "block size = 0, decoded\n"); /* debug */
#endif
{ // zero block → end of data
decoded = 1;
return 0;
}
// read three Huffman trees for this block:
// 1) code-length codes for literal tree (NT,TBIT,3)
read_pt_len(NT, TBIT, 3);
// 2) literal/length tree lengths (CBIT)
read_c_len();
// 3) prefix-tree lengths for positions (NP,PBIT,-1)
read_pt_len(NP, PBIT, -1);
}
// consume one symbol from this block
blocksize--;
// fast table lookup: top 12 bits
j = c_table[bitbuf >> (BITBUFSIZ - 12)];
if(j >= NC)
{
mask = (unsigned)1 << (BITBUFSIZ - 1 - 12);
// need to walk tree if overflow
mask = 1U << (BITBUFSIZ - 1 - 12);
do {
if(bitbuf & mask)
j = right[j];
else
j = left[j];
j = (bitbuf & mask) ? right[j] : left[j];
mask >>= 1;
} while(j >= NC);
}
// remove js code length bits from bitbuf
fillbuf(c_len[j]);
return j;
}
uint32_t decode_p()
/***** decode match-position extra bits *****/
uint32_t decode_p(void)
{
uint32_t j, mask;
// fast table lookup: top 8 bits
j = pt_table[bitbuf >> (BITBUFSIZ - 8)];
if(j >= NP)
{
mask = (unsigned)1 << (BITBUFSIZ - 1 - 8);
// tree walk for long codes
mask = 1U << (BITBUFSIZ - 1 - 8);
do {
if(bitbuf & mask)
j = right[j];
else
j = left[j];
j = (bitbuf & mask) ? right[j] : left[j];
mask >>= 1;
} while(j >= NP);
}
// consume prefix bits
fillbuf(pt_len[j]);
if(j != 0) j = ((unsigned)1 << (j - 1)) + getbits((int)(j - 1));
// if non-zero, read extra bits to form full position
if(j != 0) j = (1U << (j - 1)) + getbits((int)(j - 1));
return j;
}
void huf_decode_start()
/***** start a new Huffman decode session *****/
void huf_decode_start(void)
{
init_getbits();
blocksize = 0;
init_getbits(); // reset bit buffer & subbitbuf state
blocksize = 0; // force reading a fresh block header
}

112
zoo/io.c
View File

@@ -3,103 +3,127 @@
/***********************************************************
io.c -- input/output (modified for in-memory I/O)
Adapted from "ar" archiver written by Haruhiko Okumura.
This version reads compressed bytes from an input buffer
via mem_getc() and writes output bytes to a buffer via
mem_putc(), removing all FILE* dependencies for decompression.
Adapted from Haruhiko Okumuras “ar” archiver.
This version feeds compressed bytes from a memory buffer
(via mem_getc()) and writes decompressed output to a buffer
(via mem_putc()), eliminating FILE* dependencies.
Modified for in-memory decompression by Natalia Portillo, 2025
***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include <limits.h>
#include <limits.h> // Provides CHAR_BIT for bit-width operations
#include "ar.h"
#include "lzh.h"
#include "ar.h" // Archive format constants (e.g., CODE_BIT, NC)
#include "lh5.h" // Declarations for mem_getc(), mem_putc(), buffer state
#include "lzh.h" // LZH algorithm constants (e.g., BITBUFSIZ, DICSIZ)
#include "lh5.h" /* mem_getc(), mem_putc(), in_ptr/in_left, out_ptr/out_left */
//-----------------------------------------------------------------------------
// Global bit-I/O state
//-----------------------------------------------------------------------------
uint16_t bitbuf;
int unpackable;
size_t compsize, origsize;
uint32_t subbitbuf;
int bitcount;
uint16_t bitbuf; // Accumulates bits shifted in from the input stream
int unpackable; // Unused in decompression here (was for encode error)
// Byte counters (optional diagnostics; not used to gate decompression)
size_t compsize; // Count of output bytes produced (for compression mode)
size_t origsize; // Count of input bytes consumed (for CRC in file I/O)
uint32_t subbitbuf; // Holds the last byte fetched; bits are consumed from here
int bitcount; // How many valid bits remain in subbitbuf
/*
* fillbuf(n) -- shift bitbuf left by n bits and read in n new bits
* now reads bytes directly from in-memory input buffer
*/
//-----------------------------------------------------------------------------
// fillbuf(n)
// Shift the global bitbuf left by n bits, then read in n new bits
// from the input buffer (in-memory) to replenish bitbuf.
//-----------------------------------------------------------------------------
void fillbuf(int n) /* Shift bitbuf n bits left, read n bits */
{
// Make room for n bits
bitbuf <<= n;
// While we still need more bits than we have in subbitbuf...
while(n > bitcount)
{
// Pull any remaining bits from subbitbuf into bitbuf
bitbuf |= subbitbuf << (n -= bitcount);
/* fetch next compressed byte from in_buf */
// Fetch the next compressed byte from input memory
{
int c = mem_getc();
int c = mem_getc(); // read one byte or 0 at EOF
subbitbuf = (c == EOF ? 0 : (uint8_t)c);
}
// Reset bitcount: a full new byte is available
bitcount = CHAR_BIT;
}
// Finally, consume the last n bits from subbitbuf into bitbuf
bitbuf |= subbitbuf >> (bitcount -= n);
}
/*
* getbits(n) -- return next n bits from the bit buffer
*/
//-----------------------------------------------------------------------------
// getbits(n)
// Return the next n bits from bitbuf (highest-order bits), then
// call fillbuf(n) to replace them. Useful for reading variable-length codes.
//-----------------------------------------------------------------------------
uint32_t getbits(int n)
{
uint32_t x = bitbuf >> (BITBUFSIZ - n);
fillbuf(n);
uint32_t x = bitbuf >> (BITBUFSIZ - n); // extract top n bits
fillbuf(n); // replenish bitbuf for future reads
return x;
}
/*
* putbits(n,x) -- write the lowest n bits of x to the bit buffer
* now writes bytes directly to in-memory output buffer
*/
//-----------------------------------------------------------------------------
// putbits(n, x)
// Write the lowest n bits of x into the output buffer, packing them
// into bytes via subbitbuf/bitcount and sending full bytes out
// with mem_putc(). Used by the encoder; kept here for completeness.
//-----------------------------------------------------------------------------
void putbits(int n, uint32_t x) /* Write rightmost n bits of x */
{
// If we have enough room in subbitbuf, just pack the bits
if(n < bitcount) { subbitbuf |= x << (bitcount -= n); }
else
{
/* output first byte */
// Output the first full byte when subbitbuf fills
{
int w = (int)(subbitbuf | (x >> (n -= bitcount)));
mem_putc(w);
compsize++;
compsize++; // increment output counter (for compression)
}
// If remaining bits don't fill a full byte, stash them
if(n < CHAR_BIT) { subbitbuf = x << (bitcount = CHAR_BIT - n); }
else
{
/* output second byte */
// Otherwise, flush a second full byte
{
int w2 = (int)(x >> (n - CHAR_BIT));
mem_putc(w2);
compsize++;
}
// And stash any leftover bits beyond two bytes
subbitbuf = x << (bitcount = 2 * CHAR_BIT - n);
}
}
}
/*
* init_getbits -- initialize bit reader state
*/
//-----------------------------------------------------------------------------
// init_getbits()
// Reset the bit-reader state so that fillbuf() will load fresh bits
// from the start of the input buffer.
//-----------------------------------------------------------------------------
void init_getbits()
{
bitbuf = 0;
subbitbuf = 0;
bitcount = 0;
fillbuf(BITBUFSIZ);
bitbuf = 0; // clear accumulated bits
subbitbuf = 0; // no pending byte
bitcount = 0; // no bits available
fillbuf(BITBUFSIZ); // pre-load the bit buffer fully
}
/*
* init_putbits -- initialize bit writer state
*/
//-----------------------------------------------------------------------------
// init_putbits()
// Reset the bit-writer state so subsequent putbits() calls start fresh.
//-----------------------------------------------------------------------------
void init_putbits()
{
bitcount = CHAR_BIT;
subbitbuf = 0;
bitcount = CHAR_BIT; // subbitbuf is empty but ready for CHAR_BIT bits
subbitbuf = 0; // clear any leftover byte data
}

View File

@@ -3,75 +3,128 @@
/***********************************************************
maketbl.c -- make table for decoding
Adapted from "ar" archiver written by Haruhiko Okumura.
Builds a fast lookup table + fallback tree for Huffman
codes given code lengths. Used by decode_c() to map
input bit patterns to symbols efficiently.
Adapted from Haruhiko Okumuras “ar” archiver.
Modified for in-memory decompression by Natalia Portillo, 2025
***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include "ar.h"
#include "lzh.h"
#include <stdio.h>
#include "ar.h" // provides NC, CODE_BIT, etc.
#include "lzh.h" // provides BITBUFSIZ
/*
* make_table(nchar, bitlen, tablebits, table):
*
* nchar = number of symbols
* bitlen[] = array of code lengths for each symbol [0..nchar-1]
* tablebits = number of bits for fast direct lookup
* table[] = output table of size (1<<tablebits), entries are:
* - symbol index if code length ≤ tablebits
* - zero or tree node index to follow for longer codes
*
* Algorithm steps:
* 1) Count how many codes of each length (count[1..16]).
* 2) Compute 'start' offsets for each length in a 16-bit code space.
* 3) Normalize starts to 'tablebits' prefix domain, build 'weight'.
* 4) Fill direct-mapped entries for short codes.
* 5) Build binary tree (using left[]/right[]) for codes longer than tablebits.
*/
void make_table(int nchar, uint8_t *bitlen, int tablebits, uint16_t *table)
{
uint16_t count[17], weight[17], start[18], *p;
uint32_t i, k, len, ch, jutbits, avail, nextcode, mask;
uint16_t count[17]; // count[L] = number of symbols with length L
uint16_t weight[17]; // weight[L] = step size in prefix domain for length L
uint16_t start[18]; // start[L] = base code for length L in 16-bit space
uint16_t *p; // pointer into 'table' or tree
uint32_t i, k, len, ch;
uint32_t jutbits; // bits to drop when mapping into tablebits
uint32_t avail; // next free node index for left[]/right[] tree
uint32_t nextcode; // end-of-range code for current length
uint32_t mask; // bitmask for tree insertion
// 1) Zero counts, then tally code-lengths
for(i = 1; i <= 16; i++) count[i] = 0;
for(i = 0; i < nchar; i++) count[bitlen[i]]++;
for(i = 0; i < (uint32_t)nchar; i++) count[bitlen[i]]++;
// 2) Compute cumulative start positions in the 16-bit code space
start[1] = 0;
for(i = 1; i <= 16; i++) start[i + 1] = start[i] + (count[i] << (16 - i));
if(start[17] != (uint16_t)((unsigned)1 << 16)) fprintf(stderr, "Bad decode table\n");
// Validate: sum of all codes must fill 16-bit range
if(start[17] != (uint16_t)(1U << 16)) fprintf(stderr, "make_table: Bad decode table\n");
// Prepare for mapping into tablebits-bit table
jutbits = 16 - tablebits;
for(i = 1; i <= tablebits; i++)
for(i = 1; i <= (uint32_t)tablebits; i++)
{
// Shrink start[i] into prefix domain
start[i] >>= jutbits;
weight[i] = (unsigned)1 << (tablebits - i);
}
while(i <= 16)
{
weight[i] = (unsigned)1 << (16 - i);
i++;
// Weight = 2^(tablebits - i)
weight[i] = (uint16_t)(1U << (tablebits - i));
}
// For lengths > tablebits, weight = 2^(16 - length)
for(; i <= 16; i++) weight[i] = (uint16_t)(1U << (16 - i));
// 3) Clear any unused table slots between last short code and end
i = start[tablebits + 1] >> jutbits;
if(i != (uint16_t)((unsigned)1 << 16))
if(i != (uint16_t)(1U << tablebits))
{
k = 1 << tablebits;
while(i != k) table[i++] = 0;
k = 1U << tablebits;
while(i < k) table[i++] = 0;
}
// Initialize tree node index after the direct table entries
avail = nchar;
mask = (unsigned)1 << (15 - tablebits);
for(ch = 0; ch < nchar; ch++)
// Mask for inspecting bits when building tree
mask = 1U << (15 - tablebits);
// 4) For each symbol, place its codes in table or tree
for(ch = 0; ch < (uint32_t)nchar; ch++)
{
if((len = bitlen[ch]) == 0) continue;
len = bitlen[ch];
if(len == 0) continue; // skip symbols with no code
// Next code range = [start[len], start[len]+weight[len])
nextcode = start[len] + weight[len];
if(len <= tablebits)
{
for(i = start[len]; i < nextcode; i++) table[i] = ch;
// Direct mapping: fill all table slots in this range
for(k = start[len]; k < nextcode; k++) table[k] = (uint16_t)ch;
}
else
{
k = start[len];
p = &table[k >> jutbits];
i = len - tablebits;
while(i != 0)
// Build or extend tree for longer codes
// Start at table index for this prefix
k = start[len];
p = &table[k >> jutbits];
// Number of extra bits beyond tablebits
uint32_t extra = len - tablebits;
// Walk/construct tree nodes bit by bit
while(extra-- > 0)
{
if(*p == 0)
{
right[avail] = left[avail] = 0;
*p = avail++;
// allocate a new node for left[]/right[]
left[avail] = right[avail] = 0;
*p = (uint16_t)avail++;
}
// branch left or right based on current code bit
if(k & mask)
p = &right[*p];
else
p = &left[*p];
// shift to next bit in code
k <<= 1;
i--;
}
*p = ch;
// At leaf: assign symbol
*p = (uint16_t)ch;
}
// Advance start[len] for next code of same length
start[len] = nextcode;
}
}