mirror of
https://github.com/aaru-dps/Aaru.Compression.Native.git
synced 2025-12-16 19:24:31 +00:00
Compare commits
13 Commits
v6.0.0-alp
...
v6.0.0-alp
| Author | SHA1 | Date | |
|---|---|---|---|
|
54410bf59e
|
|||
|
d188ebe02e
|
|||
|
e97dd11da5
|
|||
|
27e2baf54a
|
|||
|
1323bba72f
|
|||
|
a39e6abb97
|
|||
|
afc6f3e2bc
|
|||
|
bfb9a6b524
|
|||
|
750df1cca9
|
|||
|
a336ce953e
|
|||
|
057d0c0242
|
|||
|
0068268c65
|
|||
|
beb8b405db
|
@@ -2,7 +2,7 @@
|
||||
<package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
|
||||
<metadata>
|
||||
<id>Aaru.Compression.Native</id>
|
||||
<version>6.0.0-alpha.11.1</version>
|
||||
<version>6.0.0-alpha.11.3</version>
|
||||
<description>C implementation of compression algorithms used by Aaru.</description>
|
||||
<authors>claunia</authors>
|
||||
<projectUrl>https://github.com/aaru-dps/Aaru.Compression.Native</projectUrl>
|
||||
|
||||
@@ -129,8 +129,10 @@ endif()
|
||||
add_subdirectory(3rdparty)
|
||||
|
||||
add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h adc.c adc.h lzip.c flac.c flac.h
|
||||
zoo/lzd.c
|
||||
zoo/lzd.h)
|
||||
zoo/lzd.c zoo/lzd.h zoo/lzh.c zoo/decode.c zoo/huf.c zoo/io.c zoo/lh5.c zoo/lh5.h zoo/lzh.h zoo/ar.h zoo/maketbl.c
|
||||
arc/pack.c arc/squeeze.c arc/crunch.c arc/lzw.c
|
||||
pak/crush.c pak/distill.c pak/bitstream.c pak/bitstream.h pak/lzw.c pak/lzw.h pak/prefixcode.c
|
||||
pak/prefixcode.h)
|
||||
|
||||
include(3rdparty/bzip2.cmake)
|
||||
include(3rdparty/flac.cmake)
|
||||
|
||||
295
arc/crunch.c
Normal file
295
arc/crunch.c
Normal file
@@ -0,0 +1,295 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
* Copyright © 2018-2019 David Ryskalczyk
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "../library.h"
|
||||
|
||||
#define FALSE 0
|
||||
#define TRUE !FALSE
|
||||
#define TABSIZE 4096 // Size of the string table.
|
||||
#define NO_PRED 0xFFFF // Indicates no predecessor in the string table.
|
||||
#define EMPTY 0xFFFF // Indicates an empty stack.
|
||||
|
||||
typedef unsigned char u_char;
|
||||
typedef unsigned short u_short;
|
||||
|
||||
// Entry in the string table.
|
||||
struct entry
|
||||
{
|
||||
char used; // Is this entry in use?
|
||||
u_char follower; // The character that follows the string.
|
||||
u_short next; // Next entry in a collision chain.
|
||||
u_short predecessor; // Code for the preceding string.
|
||||
};
|
||||
|
||||
// Static variables for decompression state.
|
||||
static struct entry *string_tab;
|
||||
static u_char *stack;
|
||||
static int sp;
|
||||
|
||||
// Buffer management variables.
|
||||
static const u_char *in_buf_ptr;
|
||||
static size_t in_len_rem;
|
||||
static int inflag;
|
||||
|
||||
// Pointer to the hash function to use.
|
||||
static u_short (*h)(u_short, u_char);
|
||||
|
||||
// Original hash function from ARC.
|
||||
static u_short oldh(u_short pred, u_char foll)
|
||||
{
|
||||
long local;
|
||||
local = ((pred + foll) | 0x0800) & 0xFFFF;
|
||||
local *= local;
|
||||
return (local >> 6) & 0x0FFF;
|
||||
}
|
||||
|
||||
// Newer, faster hash function.
|
||||
static u_short newh(u_short pred, u_char foll) { return (((pred + foll) & 0xFFFF) * 15073) & 0xFFF; }
|
||||
|
||||
// Finds the end of a collision list.
|
||||
static u_short eolist(u_short index)
|
||||
{
|
||||
int temp;
|
||||
while((temp = string_tab[index].next)) index = temp;
|
||||
return index;
|
||||
}
|
||||
|
||||
// Hashes a string to find its position in the table.
|
||||
static u_short hash_it(u_short pred, u_char foll)
|
||||
{
|
||||
u_short local, tempnext;
|
||||
struct entry *ep;
|
||||
|
||||
local = (*h)(pred, foll);
|
||||
|
||||
if(!string_tab[local].used)
|
||||
return local;
|
||||
else
|
||||
{
|
||||
local = eolist(local);
|
||||
tempnext = (local + 101) & 0x0FFF;
|
||||
ep = &string_tab[tempnext];
|
||||
|
||||
while(ep->used)
|
||||
{
|
||||
if(++tempnext == TABSIZE)
|
||||
{
|
||||
tempnext = 0;
|
||||
ep = string_tab;
|
||||
}
|
||||
else
|
||||
++ep;
|
||||
}
|
||||
string_tab[local].next = tempnext;
|
||||
return tempnext;
|
||||
}
|
||||
}
|
||||
|
||||
// Adds a new string to the table.
|
||||
static void upd_tab(u_short pred, u_short foll)
|
||||
{
|
||||
struct entry *ep;
|
||||
ep = &string_tab[hash_it(pred, foll)];
|
||||
ep->used = TRUE;
|
||||
ep->next = 0;
|
||||
ep->predecessor = pred;
|
||||
ep->follower = foll;
|
||||
}
|
||||
|
||||
// Initializes the string table.
|
||||
static void init_tab()
|
||||
{
|
||||
memset((char *)string_tab, 0, TABSIZE * sizeof(struct entry));
|
||||
for(unsigned int i = 0; i < 256; i++) upd_tab(NO_PRED, i);
|
||||
}
|
||||
|
||||
// Reads a 12-bit code from the input buffer.
|
||||
static int get_code()
|
||||
{
|
||||
int code;
|
||||
if(in_len_rem < 2) return -1;
|
||||
|
||||
if((inflag ^= 1))
|
||||
{
|
||||
code = (*in_buf_ptr++ << 4);
|
||||
code |= (*in_buf_ptr >> 4);
|
||||
in_len_rem--;
|
||||
}
|
||||
else
|
||||
{
|
||||
code = (*in_buf_ptr++ & 0x0f) << 8;
|
||||
code |= (*in_buf_ptr++);
|
||||
in_len_rem -= 2;
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
// Pushes a character onto the stack.
|
||||
#define PUSH(c) \
|
||||
do { \
|
||||
stack[sp] = ((char)(c)); \
|
||||
if(++sp >= TABSIZE) return -1; \
|
||||
} while(0)
|
||||
|
||||
// Pops a character from the stack.
|
||||
#define POP() ((sp > 0) ? (int)stack[--sp] : EMPTY)
|
||||
|
||||
// Internal crunch decompression logic.
|
||||
static int arc_decompress_crunch_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len, int new_hash)
|
||||
{
|
||||
// Basic validation of pointers.
|
||||
if(!in_buf || !out_buf || !out_len) { return -1; }
|
||||
|
||||
// Allocate memory for tables.
|
||||
string_tab = (struct entry *)malloc(TABSIZE * sizeof(struct entry));
|
||||
stack = (u_char *)malloc(TABSIZE * sizeof(u_char));
|
||||
if(!string_tab || !stack)
|
||||
{
|
||||
if(string_tab) free(string_tab);
|
||||
if(stack) free(stack);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Select the hash function.
|
||||
if(new_hash)
|
||||
h = newh;
|
||||
else
|
||||
h = oldh;
|
||||
|
||||
// Initialize state.
|
||||
sp = 0;
|
||||
init_tab();
|
||||
int code_count = TABSIZE - 256;
|
||||
in_buf_ptr = in_buf;
|
||||
in_len_rem = in_len;
|
||||
inflag = 0;
|
||||
|
||||
// Main decompression loop.
|
||||
int oldcode = get_code();
|
||||
if(oldcode == -1)
|
||||
{
|
||||
*out_len = 0;
|
||||
free(string_tab);
|
||||
free(stack);
|
||||
return 0;
|
||||
}
|
||||
int finchar = string_tab[oldcode].follower;
|
||||
|
||||
size_t out_pos = 0;
|
||||
if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
|
||||
|
||||
int newcode;
|
||||
while((newcode = get_code()) != -1)
|
||||
{
|
||||
int code = newcode;
|
||||
struct entry *ep = &string_tab[code];
|
||||
|
||||
// Handle unknown codes and KwKwK case.
|
||||
if(!ep->used)
|
||||
{
|
||||
code = oldcode;
|
||||
ep = &string_tab[code];
|
||||
PUSH(finchar);
|
||||
}
|
||||
// Decode the string by traversing the table.
|
||||
while(ep->predecessor != NO_PRED)
|
||||
{
|
||||
PUSH(ep->follower);
|
||||
code = ep->predecessor;
|
||||
ep = &string_tab[code];
|
||||
}
|
||||
PUSH(finchar = ep->follower);
|
||||
|
||||
// Add the new string to the table if there's room.
|
||||
if(code_count)
|
||||
{
|
||||
upd_tab(oldcode, finchar);
|
||||
--code_count;
|
||||
}
|
||||
oldcode = newcode;
|
||||
|
||||
// Write the decoded string to the output buffer.
|
||||
while(sp > 0)
|
||||
{
|
||||
int c = POP();
|
||||
if(c == EMPTY) break;
|
||||
if(out_pos < *out_len) { out_buf[out_pos++] = (unsigned char)c; }
|
||||
}
|
||||
}
|
||||
|
||||
// Clean up and return.
|
||||
*out_len = out_pos;
|
||||
free(string_tab);
|
||||
free(stack);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Decompresses crunched data.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
return arc_decompress_crunch_internal(in_buf, in_len, out_buf, out_len, 0);
|
||||
}
|
||||
|
||||
// Decompresses crunched data with non-repeat packing.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
// Allocate a temporary buffer for the intermediate decompressed data.
|
||||
size_t temp_len = *out_len * 2; // Heuristic for temp buffer size.
|
||||
unsigned char *temp_buf = malloc(temp_len);
|
||||
if(!temp_buf) return -1;
|
||||
|
||||
// First, decompress the crunched data.
|
||||
int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 0);
|
||||
if(result == 0)
|
||||
{
|
||||
// Then, decompress the non-repeat packing.
|
||||
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
||||
}
|
||||
|
||||
free(temp_buf);
|
||||
return result;
|
||||
}
|
||||
|
||||
// Decompresses crunched data with non-repeat packing and the new hash function.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
// Allocate a temporary buffer.
|
||||
size_t temp_len = *out_len * 2; // Heuristic.
|
||||
unsigned char *temp_buf = malloc(temp_len);
|
||||
if(!temp_buf) return -1;
|
||||
|
||||
// Decompress crunched data with the new hash.
|
||||
int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 1);
|
||||
if(result == 0)
|
||||
{
|
||||
// Decompress non-repeat packing.
|
||||
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
||||
}
|
||||
|
||||
free(temp_buf);
|
||||
return result;
|
||||
}
|
||||
271
arc/lzw.c
Normal file
271
arc/lzw.c
Normal file
@@ -0,0 +1,271 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
* Copyright © 2018-2019 David Ryskalczyk
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "../library.h"
|
||||
|
||||
#define CRBITS 12 // Max bits for crunching.
|
||||
#define SQBITS 13 // Max bits for squashing.
|
||||
#define INIT_BITS 9 // Initial number of bits per code.
|
||||
#define MAXCODE(n) ((1 << (n)) - 1) // Macro to calculate max code for n bits.
|
||||
#define FIRST 257 // First available code.
|
||||
#define CLEAR 256 // Code to clear the dictionary.
|
||||
|
||||
// LZW decompression state variables.
|
||||
static int Bits;
|
||||
static int max_maxcode;
|
||||
static int n_bits;
|
||||
static int maxcode;
|
||||
static int clear_flg;
|
||||
static int free_ent;
|
||||
static unsigned short *prefix;
|
||||
static unsigned char *suffix;
|
||||
static unsigned char *stack;
|
||||
|
||||
// Buffer management variables.
|
||||
static const unsigned char *in_buf_ptr;
|
||||
static size_t in_len_rem;
|
||||
static int offset;
|
||||
static char buf[SQBITS];
|
||||
|
||||
// Reads a variable-length code from the input buffer.
|
||||
static int getcode()
|
||||
{
|
||||
int code;
|
||||
static int size = 0;
|
||||
int r_off, bits;
|
||||
unsigned char *bp = (unsigned char *)buf;
|
||||
|
||||
// Check if we need to increase code size or handle a clear flag.
|
||||
if(clear_flg > 0 || offset >= size || free_ent > maxcode)
|
||||
{
|
||||
if(free_ent > maxcode)
|
||||
{
|
||||
n_bits++;
|
||||
if(n_bits == Bits)
|
||||
maxcode = max_maxcode;
|
||||
else
|
||||
maxcode = MAXCODE(n_bits);
|
||||
}
|
||||
if(clear_flg > 0)
|
||||
{
|
||||
maxcode = MAXCODE(n_bits = INIT_BITS);
|
||||
clear_flg = 0;
|
||||
}
|
||||
// Read n_bits bytes into the buffer.
|
||||
for(size = 0; size < n_bits; size++)
|
||||
{
|
||||
if(in_len_rem == 0)
|
||||
{
|
||||
code = -1;
|
||||
break;
|
||||
}
|
||||
code = *in_buf_ptr++;
|
||||
in_len_rem--;
|
||||
buf[size] = (char)code;
|
||||
}
|
||||
if(size <= 0) return -1; // End of file.
|
||||
|
||||
offset = 0;
|
||||
size = (size << 3) - (n_bits - 1);
|
||||
}
|
||||
r_off = offset;
|
||||
bits = n_bits;
|
||||
|
||||
// Extract the code from the buffer.
|
||||
bp += (r_off >> 3);
|
||||
r_off &= 7;
|
||||
|
||||
code = (*bp++ >> r_off);
|
||||
bits -= 8 - r_off;
|
||||
r_off = 8 - r_off;
|
||||
|
||||
if(bits >= 8)
|
||||
{
|
||||
code |= *bp++ << r_off;
|
||||
r_off += 8;
|
||||
bits -= 8;
|
||||
}
|
||||
code |= (*bp & ((1 << bits) - 1)) << r_off;
|
||||
offset += n_bits;
|
||||
|
||||
return code;
|
||||
}
|
||||
|
||||
// Main LZW decompression logic.
|
||||
static int arc_decompress_lzw(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len,
|
||||
int squash)
|
||||
{
|
||||
// Basic validation of pointers.
|
||||
if(!in_buf || !out_buf || !out_len) { return -1; }
|
||||
|
||||
// Initialize buffer pointers and lengths.
|
||||
in_buf_ptr = in_buf;
|
||||
in_len_rem = in_len;
|
||||
|
||||
// Set parameters based on whether we're unsquashing or uncrushing.
|
||||
if(squash) { Bits = SQBITS; }
|
||||
else
|
||||
{
|
||||
Bits = CRBITS;
|
||||
if(in_len_rem > 0)
|
||||
{
|
||||
// Crunch format has a header byte indicating max bits.
|
||||
if(*in_buf_ptr != CRBITS) return -1;
|
||||
in_buf_ptr++;
|
||||
in_len_rem--;
|
||||
}
|
||||
}
|
||||
|
||||
if(in_len_rem <= 0)
|
||||
{
|
||||
*out_len = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Initialize LZW parameters.
|
||||
max_maxcode = 1 << Bits;
|
||||
clear_flg = 0;
|
||||
n_bits = INIT_BITS;
|
||||
maxcode = MAXCODE(n_bits);
|
||||
|
||||
// Allocate memory for LZW tables.
|
||||
prefix = (unsigned short *)malloc(max_maxcode * sizeof(unsigned short));
|
||||
suffix = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
|
||||
stack = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
|
||||
|
||||
if(!prefix || !suffix || !stack)
|
||||
{
|
||||
if(prefix) free(prefix);
|
||||
if(suffix) free(suffix);
|
||||
if(stack) free(stack);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Initialize the first 256 entries of the dictionary.
|
||||
memset(prefix, 0, 256 * sizeof(unsigned short));
|
||||
for(int code = 255; code >= 0; code--) { suffix[code] = (unsigned char)code; }
|
||||
|
||||
free_ent = FIRST;
|
||||
offset = 0;
|
||||
|
||||
// Main decompression loop.
|
||||
int finchar, oldcode, incode;
|
||||
finchar = oldcode = getcode();
|
||||
if(oldcode == -1)
|
||||
{
|
||||
*out_len = 0;
|
||||
free(prefix);
|
||||
free(suffix);
|
||||
free(stack);
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t out_pos = 0;
|
||||
if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
|
||||
|
||||
unsigned char *stackp = stack;
|
||||
int code;
|
||||
while((code = getcode()) > -1)
|
||||
{
|
||||
if(code == CLEAR)
|
||||
{
|
||||
// Clear the dictionary.
|
||||
memset(prefix, 0, 256 * sizeof(unsigned short));
|
||||
clear_flg = 1;
|
||||
free_ent = FIRST - 1;
|
||||
if((code = getcode()) == -1) break;
|
||||
}
|
||||
incode = code;
|
||||
// Handle KwKwK case.
|
||||
if(code >= free_ent)
|
||||
{
|
||||
if(code > free_ent)
|
||||
{
|
||||
// Error: invalid code.
|
||||
break;
|
||||
}
|
||||
*stackp++ = finchar;
|
||||
code = oldcode;
|
||||
}
|
||||
// Decode the string by traversing the dictionary.
|
||||
while(code >= 256)
|
||||
{
|
||||
*stackp++ = suffix[code];
|
||||
code = prefix[code];
|
||||
}
|
||||
*stackp++ = finchar = suffix[code];
|
||||
|
||||
// Write the decoded string to the output buffer.
|
||||
do {
|
||||
if(out_pos < *out_len) { out_buf[out_pos++] = *--stackp; }
|
||||
else
|
||||
{
|
||||
stackp--; // Discard if output buffer is full.
|
||||
}
|
||||
} while(stackp > stack);
|
||||
|
||||
// Add the new string to the dictionary.
|
||||
if((code = free_ent) < max_maxcode)
|
||||
{
|
||||
prefix[code] = (unsigned short)oldcode;
|
||||
suffix[code] = finchar;
|
||||
free_ent = code + 1;
|
||||
}
|
||||
oldcode = incode;
|
||||
}
|
||||
|
||||
// Clean up and return.
|
||||
*out_len = out_pos;
|
||||
free(prefix);
|
||||
free(suffix);
|
||||
free(stack);
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Decompresses squashed data.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
return arc_decompress_lzw(in_buf, in_len, out_buf, out_len, 1);
|
||||
}
|
||||
|
||||
// Decompresses crunched data.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
// Allocate a temporary buffer.
|
||||
size_t temp_len = *out_len * 2; // Heuristic.
|
||||
unsigned char *temp_buf = malloc(temp_len);
|
||||
if(!temp_buf) return -1;
|
||||
|
||||
// Decompress crunched data.
|
||||
int result = arc_decompress_lzw(in_buf, in_len, temp_buf, &temp_len, 0);
|
||||
if(result == 0)
|
||||
{
|
||||
// Decompress non-repeat packing.
|
||||
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
||||
}
|
||||
|
||||
free(temp_buf);
|
||||
return result;
|
||||
}
|
||||
78
arc/pack.c
Normal file
78
arc/pack.c
Normal file
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
* Copyright © 2018-2019 David Ryskalczyk
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <string.h>
|
||||
#include "../library.h"
|
||||
|
||||
#define DLE 0x90 // Data Link Escape character, used as a repeat marker.
|
||||
|
||||
// Decompresses data using non-repeat packing.
|
||||
// This algorithm encodes runs of identical bytes.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
// Basic validation of pointers.
|
||||
if(!in_buf || !out_buf || !out_len) { return -1; }
|
||||
|
||||
size_t in_pos = 0;
|
||||
size_t out_pos = 0;
|
||||
unsigned char state = 0; // 0 for normal (NOHIST), 1 for in-repeat (INREP).
|
||||
unsigned char lastc = 0; // Last character seen.
|
||||
|
||||
// Loop through the input buffer until it's exhausted or the output buffer is full.
|
||||
while(in_pos < in_len && out_pos < *out_len)
|
||||
{
|
||||
if(state == 1)
|
||||
{ // We are in a repeat sequence.
|
||||
if(in_buf[in_pos])
|
||||
{ // The byte after DLE is the repeat count.
|
||||
unsigned char count = in_buf[in_pos];
|
||||
// Write the last character 'count' times.
|
||||
while(--count && out_pos < *out_len) { out_buf[out_pos++] = lastc; }
|
||||
}
|
||||
else
|
||||
{ // A count of 0 means the DLE character itself should be written.
|
||||
if(out_pos < *out_len) { out_buf[out_pos++] = DLE; }
|
||||
}
|
||||
state = 0; // Return to normal state.
|
||||
in_pos++;
|
||||
}
|
||||
else
|
||||
{ // Normal state.
|
||||
if(in_buf[in_pos] != DLE)
|
||||
{ // Not a repeat sequence.
|
||||
if(out_pos < *out_len)
|
||||
{
|
||||
// Copy the character and save it as the last character.
|
||||
out_buf[out_pos++] = lastc = in_buf[in_pos];
|
||||
}
|
||||
}
|
||||
else
|
||||
{ // DLE marks the start of a repeat sequence.
|
||||
state = 1; // Enter repeat state.
|
||||
}
|
||||
in_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// Update the output length to the number of bytes written.
|
||||
*out_len = out_pos;
|
||||
// Return success.
|
||||
return 0;
|
||||
}
|
||||
148
arc/squeeze.c
Normal file
148
arc/squeeze.c
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
* Copyright © 2018-2019 David Ryskalczyk
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "../library.h"
|
||||
|
||||
#define SPEOF 256 // Special end-of-file token.
|
||||
#define NUMVALS 257 // Number of values in the Huffman tree (256 chars + SPEOF).
|
||||
|
||||
// Node structure for the Huffman decoding tree.
|
||||
struct nd
|
||||
{
|
||||
int child[2]; // Children of the node.
|
||||
};
|
||||
|
||||
// Static variables for the decompression state.
|
||||
static struct nd nodes[NUMVALS]; // The Huffman tree.
|
||||
static int numnodes; // Number of nodes in the tree.
|
||||
|
||||
static int bpos; // Bit position in the current byte.
|
||||
static unsigned char curin; // Current byte being read.
|
||||
|
||||
// Pointers for buffer management.
|
||||
static const unsigned char *in_buf_ptr;
|
||||
static size_t in_len_rem;
|
||||
static unsigned char *out_buf_ptr;
|
||||
static size_t out_len_rem;
|
||||
|
||||
// Reads a byte from the input buffer.
|
||||
static int get_byte()
|
||||
{
|
||||
if(in_len_rem == 0) { return EOF; }
|
||||
in_len_rem--;
|
||||
return *in_buf_ptr++;
|
||||
}
|
||||
|
||||
static int arc_decompress_huffman(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
// Basic validation of pointers.
|
||||
if(!in_buf || !out_buf || !out_len) { return -1; }
|
||||
|
||||
// Initialize buffer pointers and lengths.
|
||||
in_buf_ptr = in_buf;
|
||||
in_len_rem = in_len;
|
||||
out_buf_ptr = out_buf;
|
||||
out_len_rem = *out_len;
|
||||
|
||||
bpos = 99; // Force initial read.
|
||||
|
||||
// Read the number of nodes in the Huffman tree.
|
||||
if(in_len_rem < 2) return -1;
|
||||
numnodes = get_byte();
|
||||
numnodes |= get_byte() << 8;
|
||||
|
||||
if(numnodes < 0 || numnodes >= NUMVALS)
|
||||
{
|
||||
return -1; // Invalid tree.
|
||||
}
|
||||
|
||||
// ARC: initialize for possible empty tree (SPEOF only)
|
||||
nodes[0].child[0] = -(SPEOF + 1);
|
||||
nodes[0].child[1] = -(SPEOF + 1);
|
||||
|
||||
// Read the Huffman tree from the input buffer, sign-extend 16-bit values
|
||||
for(int i = 0; i < numnodes; ++i)
|
||||
{
|
||||
if(in_len_rem < 4) return -1;
|
||||
uint8_t b0 = get_byte();
|
||||
uint8_t b1 = get_byte();
|
||||
uint8_t b2 = get_byte();
|
||||
uint8_t b3 = get_byte();
|
||||
nodes[i].child[0] = (int16_t)((b0) | (b1 << 8));
|
||||
nodes[i].child[1] = (int16_t)((b2) | (b3 << 8));
|
||||
}
|
||||
|
||||
size_t written = 0;
|
||||
// bpos is already 99 from init
|
||||
|
||||
while(written < *out_len)
|
||||
{
|
||||
int i = 0;
|
||||
// follow bit stream in tree to a leaf
|
||||
while(i >= 0)
|
||||
{
|
||||
if(++bpos > 7)
|
||||
{
|
||||
int c = get_byte();
|
||||
if(c == EOF)
|
||||
{
|
||||
*out_len = written;
|
||||
return 0; // End of input
|
||||
}
|
||||
curin = c;
|
||||
bpos = 0;
|
||||
// move a level deeper in tree
|
||||
i = nodes[i].child[curin & 1];
|
||||
}
|
||||
else { i = nodes[i].child[1 & (curin >>= 1)]; }
|
||||
}
|
||||
|
||||
// decode fake node index to original data value
|
||||
int value = -(i + 1);
|
||||
|
||||
if(value == SPEOF)
|
||||
{
|
||||
break; // End of data
|
||||
}
|
||||
|
||||
*out_buf_ptr++ = value;
|
||||
written++;
|
||||
}
|
||||
|
||||
*out_len = written;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Decompresses data using Huffman squeezing.
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
size_t temp_len = *out_len * 2;
|
||||
unsigned char *temp_buf = malloc(temp_len);
|
||||
if(!temp_buf) return -1;
|
||||
|
||||
int result = arc_decompress_huffman(in_buf, in_len, temp_buf, &temp_len);
|
||||
if(result == 0) { result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); }
|
||||
|
||||
free(temp_buf);
|
||||
return result;
|
||||
}
|
||||
4
build.sh
4
build.sh
@@ -179,7 +179,7 @@ docker/dockcross-win-arm cmake -DCMAKE_BUILD_TYPE=Release -DAARU_BUILD_PACKAGE=1
|
||||
sed -e 's/\-soname,libAaru\.Compression\.Native\.so//g' ./CMakeFiles/Aaru.Compression.Native.dir/link.txt > link.txt
|
||||
mv link.txt ./CMakeFiles/Aaru.Compression.Native.dir/link.txt
|
||||
docker/dockcross-win-arm make Aaru.Compression.Native
|
||||
mv libAaru.Compression.Native.so runtimes/win-arm/native/libAaru.Compression.Native.dll
|
||||
mv libAaru.Compression.Native.dll runtimes/win-arm/native/
|
||||
|
||||
## Windows (ARM64)
|
||||
# Detected system processor: aarch64
|
||||
@@ -191,7 +191,7 @@ docker/dockcross-win-arm64 cmake -DCMAKE_BUILD_TYPE=Release -DAARU_BUILD_PACKAGE
|
||||
sed -e 's/\-soname,libAaru\.Compression\.Native\.so//g' ./CMakeFiles/Aaru.Compression.Native.dir/link.txt > link.txt
|
||||
mv link.txt ./CMakeFiles/Aaru.Compression.Native.dir/link.txt
|
||||
docker/dockcross-win-arm64 make Aaru.Compression.Native
|
||||
mv libAaru.Compression.Native.so runtimes/win-arm64/native/libAaru.Compression.Native.dll
|
||||
mv libAaru.Compression.Native.dll runtimes/win-arm64/native/
|
||||
|
||||
## Windows (AMD64)
|
||||
# Detected system processor: x86_64
|
||||
|
||||
34
library.h
34
library.h
@@ -112,8 +112,42 @@ AARU_EXPORT int AARU_CALL LZD_FeedNative(void *ctx, const unsigned char *data, s
|
||||
|
||||
AARU_EXPORT int AARU_CALL LZD_DrainNative(void *ctx, unsigned char *outBuf, size_t outBufLen, size_t *produced);
|
||||
|
||||
AARU_EXPORT int AARU_CALL lh5_decompress(const uint8_t *in_buf, size_t in_len, uint8_t *out_buf, size_t *out_len);
|
||||
|
||||
#define AARU_CHECKUMS_NATIVE_VERSION 0x06000089
|
||||
|
||||
AARU_EXPORT uint64_t AARU_CALL AARU_get_acn_version();
|
||||
|
||||
// ARC method 3: Stored with non-repeat packing
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len);
|
||||
// ARC method 4: Huffman squeezing
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len);
|
||||
// ARC Method 5: LZW (crunching)
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len);
|
||||
// ARC Method 6: LZW with non-repeat packing (crunching)
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len);
|
||||
// ARC Method 7: LZW with non-repeat packing and new hash (Crunching)
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len);
|
||||
|
||||
// ARC Method 8: Dynamic LZW (crunching)
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
|
||||
unsigned char *out_buf, size_t *out_len);
|
||||
|
||||
// ARC Method 9: Dynamic LZW with 13 bits (squashing)
|
||||
AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len);
|
||||
|
||||
// ARC/PAK Method 10: LZW (crush) (unsure why it's different of the others but even XADMaster uses different codepaths)
|
||||
AARU_EXPORT int AARU_CALL pak_decompress_crush(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len);
|
||||
|
||||
// ARC/PAK Method 11: LZSS (distill)
|
||||
AARU_EXPORT int AARU_CALL pak_decompress_distill(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len);
|
||||
|
||||
#endif // AARU_COMPRESSION_NATIVE_LIBRARY_H
|
||||
|
||||
166
pak/bitstream.c
Normal file
166
pak/bitstream.c
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* bitstream.c - Bit stream input implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "bitstream.h"
|
||||
|
||||
void bitstream_init(BitStream *bs, const uint8_t *data, size_t length)
|
||||
{
|
||||
bs->data = data;
|
||||
bs->length = length;
|
||||
bs->pos = 0;
|
||||
bs->bitbuffer = 0;
|
||||
bs->bitcount = 0;
|
||||
bs->eof = false;
|
||||
}
|
||||
|
||||
static void bitstream_fill_buffer(BitStream *bs)
|
||||
{
|
||||
while(bs->bitcount < 24 && bs->pos < bs->length)
|
||||
{
|
||||
bs->bitbuffer |= (uint32_t)bs->data[bs->pos] << (24 - bs->bitcount);
|
||||
bs->bitcount += 8;
|
||||
bs->pos++;
|
||||
}
|
||||
if(bs->pos >= bs->length && bs->bitcount == 0) { bs->eof = true; }
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bit(BitStream *bs)
|
||||
{
|
||||
if(bs->eof) return 0;
|
||||
|
||||
if(bs->bitcount == 0) { bitstream_fill_buffer(bs); }
|
||||
|
||||
if(bs->bitcount == 0)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t bit = (bs->bitbuffer >> 31) & 1;
|
||||
bs->bitbuffer <<= 1;
|
||||
bs->bitcount--;
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bit_le(BitStream *bs)
|
||||
{
|
||||
if(bs->eof) return 0;
|
||||
|
||||
if(bs->bitcount == 0)
|
||||
{
|
||||
if(bs->pos >= bs->length)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
bs->bitbuffer = bs->data[bs->pos++];
|
||||
bs->bitcount = 8;
|
||||
}
|
||||
|
||||
uint32_t bit = bs->bitbuffer & 1;
|
||||
bs->bitbuffer >>= 1;
|
||||
bs->bitcount--;
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bits(BitStream *bs, int count)
|
||||
{
|
||||
uint32_t result = 0;
|
||||
for(int i = 0; i < count; i++) { result = (result << 1) | bitstream_read_bit(bs); }
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bits_le(BitStream *bs, int count)
|
||||
{
|
||||
uint32_t result = 0;
|
||||
for(int i = 0; i < count; i++) { result |= bitstream_read_bit_le(bs) << i; }
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t bitstream_peek_bits(BitStream *bs, int count)
|
||||
{
|
||||
// Save current state
|
||||
uint32_t saved_buffer = bs->bitbuffer;
|
||||
int saved_bitcount = bs->bitcount;
|
||||
size_t saved_pos = bs->pos;
|
||||
bool saved_eof = bs->eof;
|
||||
|
||||
// Read the bits
|
||||
uint32_t result = bitstream_read_bits(bs, count);
|
||||
|
||||
// Restore state
|
||||
bs->bitbuffer = saved_buffer;
|
||||
bs->bitcount = saved_bitcount;
|
||||
bs->pos = saved_pos;
|
||||
bs->eof = saved_eof;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t bitstream_peek_bits_le(BitStream *bs, int count)
|
||||
{
|
||||
// Save current state
|
||||
uint32_t saved_buffer = bs->bitbuffer;
|
||||
int saved_bitcount = bs->bitcount;
|
||||
size_t saved_pos = bs->pos;
|
||||
bool saved_eof = bs->eof;
|
||||
|
||||
// Read the bits
|
||||
uint32_t result = bitstream_read_bits_le(bs, count);
|
||||
|
||||
// Restore state
|
||||
bs->bitbuffer = saved_buffer;
|
||||
bs->bitcount = saved_bitcount;
|
||||
bs->pos = saved_pos;
|
||||
bs->eof = saved_eof;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void bitstream_skip_bits(BitStream *bs, int count) { bitstream_read_bits(bs, count); }
|
||||
|
||||
void bitstream_skip_bits_le(BitStream *bs, int count) { bitstream_read_bits_le(bs, count); }
|
||||
|
||||
uint8_t bitstream_read_byte(BitStream *bs)
|
||||
{
|
||||
if(bs->pos >= bs->length)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
return bs->data[bs->pos++];
|
||||
}
|
||||
|
||||
uint16_t bitstream_read_uint16_le(BitStream *bs)
|
||||
{
|
||||
if(bs->pos + 1 >= bs->length)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
uint16_t result = bs->data[bs->pos] | (bs->data[bs->pos + 1] << 8);
|
||||
bs->pos += 2;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool bitstream_eof(BitStream *bs) { return bs->eof; }
|
||||
75
pak/bitstream.h
Normal file
75
pak/bitstream.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* bitstream.h - Bit stream input implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef BITSTREAM_H
|
||||
#define BITSTREAM_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct BitStream
|
||||
{
|
||||
const uint8_t *data;
|
||||
size_t length;
|
||||
size_t pos;
|
||||
uint32_t bitbuffer;
|
||||
int bitcount;
|
||||
bool eof;
|
||||
} BitStream;
|
||||
|
||||
// Initialize bit stream
|
||||
void bitstream_init(BitStream *bs, const uint8_t *data, size_t length);
|
||||
|
||||
// Read a single bit (MSB first)
|
||||
uint32_t bitstream_read_bit(BitStream *bs);
|
||||
|
||||
// Read a single bit (LSB first)
|
||||
uint32_t bitstream_read_bit_le(BitStream *bs);
|
||||
|
||||
// Read multiple bits (MSB first)
|
||||
uint32_t bitstream_read_bits(BitStream *bs, int count);
|
||||
|
||||
// Read multiple bits (LSB first)
|
||||
uint32_t bitstream_read_bits_le(BitStream *bs, int count);
|
||||
|
||||
// Peek at bits without consuming them (MSB first)
|
||||
uint32_t bitstream_peek_bits(BitStream *bs, int count);
|
||||
|
||||
// Peek at bits without consuming them (LSB first)
|
||||
uint32_t bitstream_peek_bits_le(BitStream *bs, int count);
|
||||
|
||||
// Skip previously peeked bits (MSB first)
|
||||
void bitstream_skip_bits(BitStream *bs, int count);
|
||||
|
||||
// Skip previously peeked bits (LSB first)
|
||||
void bitstream_skip_bits_le(BitStream *bs, int count);
|
||||
|
||||
// Read a byte
|
||||
uint8_t bitstream_read_byte(BitStream *bs);
|
||||
|
||||
// Read a 16-bit little endian integer
|
||||
uint16_t bitstream_read_uint16_le(BitStream *bs);
|
||||
|
||||
// Check if end of stream reached
|
||||
bool bitstream_eof(BitStream *bs);
|
||||
|
||||
#endif /* BITSTREAM_H */
|
||||
219
pak/crush.c
Normal file
219
pak/crush.c
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* arc_crush.c - ARC Crush decompression algorithm
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "../library.h"
|
||||
#include "bitstream.h"
|
||||
#include "lzw.h"
|
||||
|
||||
int pak_decompress_crush_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
if(!in_buf || !out_buf || !out_len || in_len == 0) { return -1; }
|
||||
|
||||
BitStream bs;
|
||||
bitstream_init(&bs, (const uint8_t *)in_buf, in_len);
|
||||
|
||||
LZW *lzw = lzw_alloc(8192, 1);
|
||||
if(!lzw) { return -1; }
|
||||
|
||||
// Initialize state
|
||||
int symbolsize = 1;
|
||||
int nextsizebump = 2;
|
||||
bool useliteralbit = true;
|
||||
|
||||
int numrecentstrings = 0;
|
||||
int ringindex = 0;
|
||||
bool stringring[500];
|
||||
memset(stringring, 0, sizeof(stringring));
|
||||
|
||||
int usageindex = 0x101;
|
||||
uint8_t usage[8192];
|
||||
memset(usage, 0, sizeof(usage));
|
||||
|
||||
int currbyte = 0;
|
||||
uint8_t buffer[8192];
|
||||
size_t outpos = 0;
|
||||
size_t max_output = *out_len;
|
||||
|
||||
while(!bitstream_eof(&bs) && outpos < max_output)
|
||||
{
|
||||
if(!currbyte)
|
||||
{
|
||||
// Read the next symbol. How depends on the mode we are operating in.
|
||||
int symbol;
|
||||
if(useliteralbit)
|
||||
{
|
||||
// Use codes prefixed by a bit that selects literal or string codes.
|
||||
// Literals are always 8 bits, strings vary.
|
||||
if(bitstream_read_bit_le(&bs)) { symbol = bitstream_read_bits_le(&bs, symbolsize) + 256; }
|
||||
else { symbol = bitstream_read_bits_le(&bs, 8); }
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use same-length codes for both literals and strings.
|
||||
// Due to an optimization quirk in the original decruncher,
|
||||
// literals have their bits inverted.
|
||||
symbol = bitstream_read_bits_le(&bs, symbolsize);
|
||||
if(symbol < 0x100) symbol ^= 0xff;
|
||||
}
|
||||
|
||||
// Code 0x100 is the EOF code.
|
||||
if(symbol == 0x100) { break; }
|
||||
|
||||
// Walk through the LZW tree, and set the usage count of the current
|
||||
// string and all its parents to 4. This is not necessary for literals,
|
||||
// but we do it anyway for simplicity.
|
||||
LZWTreeNode *nodes = lzw_symbols(lzw);
|
||||
int marksymbol = symbol;
|
||||
while(marksymbol >= 0)
|
||||
{
|
||||
if(marksymbol < 8192) { usage[marksymbol] = 4; }
|
||||
marksymbol = nodes[marksymbol].parent;
|
||||
}
|
||||
|
||||
// Adjust the count of recent strings versus literals.
|
||||
// Use a ring buffer of length 500 as a window to keep track
|
||||
// of how many strings have been encountered lately.
|
||||
|
||||
// First, decrease the count if a string leaves the window.
|
||||
if(stringring[ringindex]) numrecentstrings--;
|
||||
|
||||
// Then store the current type of symbol in the window, and
|
||||
// increase the count if the current symbol is a string.
|
||||
if(symbol < 0x100) { stringring[ringindex] = false; }
|
||||
else
|
||||
{
|
||||
stringring[ringindex] = true;
|
||||
numrecentstrings++;
|
||||
}
|
||||
|
||||
// Move the window forward.
|
||||
ringindex = (ringindex + 1) % 500;
|
||||
|
||||
// Check the number of strings. If there have been many literals
|
||||
// lately, bit-prefixed codes should be used. If we need to change
|
||||
// mode, re-calculate the point where we increase the code length.
|
||||
bool manyliterals = numrecentstrings < 375;
|
||||
if(manyliterals != useliteralbit)
|
||||
{
|
||||
useliteralbit = manyliterals;
|
||||
nextsizebump = 1 << symbolsize;
|
||||
if(!useliteralbit) nextsizebump -= 0x100;
|
||||
}
|
||||
|
||||
// Update the LZW tree.
|
||||
if(!lzw_symbol_list_full(lzw))
|
||||
{
|
||||
// If there is space in the tree, just add a new string as usual.
|
||||
if(lzw_next_symbol(lzw, symbol) != LZW_NO_ERROR)
|
||||
{
|
||||
lzw_free(lzw);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set the usage count of the newly created entry.
|
||||
int count = lzw_symbol_count(lzw);
|
||||
if(count > 0 && count - 1 < 8192) { usage[count - 1] = 2; }
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the tree is full, find a less-used symbol, and replace it.
|
||||
int minindex = 0, minusage = INT_MAX;
|
||||
int index = usageindex;
|
||||
do {
|
||||
index++;
|
||||
if(index == 8192) index = 0x101;
|
||||
|
||||
if(usage[index] < minusage)
|
||||
{
|
||||
minindex = index;
|
||||
minusage = usage[index];
|
||||
}
|
||||
|
||||
usage[index]--;
|
||||
if(usage[index] == 0) break;
|
||||
} while(index != usageindex);
|
||||
|
||||
usageindex = index;
|
||||
|
||||
if(lzw_replace_symbol(lzw, minindex, symbol) != LZW_NO_ERROR)
|
||||
{
|
||||
lzw_free(lzw);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set the usage count of the replaced entry.
|
||||
if(minindex < 8192) { usage[minindex] = 2; }
|
||||
}
|
||||
|
||||
// Extract the data to output.
|
||||
currbyte = lzw_reverse_output_to_buffer(lzw, buffer);
|
||||
|
||||
// Check if we need to increase the code size. The point at which
|
||||
// to increase varies depending on the coding mode.
|
||||
if(lzw_symbol_count(lzw) - 257 >= nextsizebump)
|
||||
{
|
||||
symbolsize++;
|
||||
nextsizebump = 1 << symbolsize;
|
||||
if(!useliteralbit) nextsizebump -= 0x100;
|
||||
}
|
||||
}
|
||||
|
||||
if(currbyte > 0 && outpos < max_output) { out_buf[outpos++] = (char)buffer[--currbyte]; }
|
||||
else if(currbyte == 0)
|
||||
{
|
||||
// No more bytes in buffer, continue to next symbol
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Output buffer full
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lzw_free(lzw);
|
||||
*out_len = outpos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL pak_decompress_crush(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
// Allocate a temporary buffer.
|
||||
size_t temp_len = *out_len * 2; // Heuristic.
|
||||
unsigned char *temp_buf = malloc(temp_len);
|
||||
if(!temp_buf) return -1;
|
||||
|
||||
// Decompress crunched data.
|
||||
int result = pak_decompress_crush_internal(in_buf, in_len, temp_buf, &temp_len);
|
||||
if(result == 0)
|
||||
{
|
||||
// Decompress non-repeat packing.
|
||||
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
||||
}
|
||||
|
||||
free(temp_buf);
|
||||
return result;
|
||||
}
|
||||
182
pak/distill.c
Normal file
182
pak/distill.c
Normal file
@@ -0,0 +1,182 @@
|
||||
/*
|
||||
* arc_distill.c - ARC Distill decompression algorithm
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../library.h"
|
||||
#include "bitstream.h"
|
||||
#include "prefixcode.h"
|
||||
|
||||
static const int offset_lengths[0x40] = {
|
||||
3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
};
|
||||
|
||||
static const int offset_codes[0x40] = {
|
||||
0x00, 0x02, 0x04, 0x0c, 0x01, 0x06, 0x0a, 0x0e, 0x11, 0x16, 0x1a, 0x1e, 0x05, 0x09, 0x0d, 0x15,
|
||||
0x19, 0x1d, 0x25, 0x29, 0x2d, 0x35, 0x39, 0x3d, 0x03, 0x07, 0x0b, 0x13, 0x17, 0x1b, 0x23, 0x27,
|
||||
0x2b, 0x33, 0x37, 0x3b, 0x43, 0x47, 0x4b, 0x53, 0x57, 0x5b, 0x63, 0x67, 0x6b, 0x73, 0x77, 0x7b,
|
||||
0x0f, 0x1f, 0x2f, 0x3f, 0x4f, 0x5f, 0x6f, 0x7f, 0x8f, 0x9f, 0xaf, 0xbf, 0xcf, 0xdf, 0xef, 0xff,
|
||||
};
|
||||
|
||||
static void build_code_from_tree(PrefixCode *code, int *tree, int node, int numnodes, int depth)
|
||||
{
|
||||
if(depth > 64)
|
||||
{
|
||||
// Too deep - error
|
||||
return;
|
||||
}
|
||||
|
||||
if(node >= numnodes) { prefix_code_make_leaf_with_value(code, node - numnodes); }
|
||||
else
|
||||
{
|
||||
prefix_code_start_zero_branch(code);
|
||||
build_code_from_tree(code, tree, tree[node], numnodes, depth + 1);
|
||||
prefix_code_start_one_branch(code);
|
||||
build_code_from_tree(code, tree, tree[node + 1], numnodes, depth + 1);
|
||||
prefix_code_finish_branches(code);
|
||||
}
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL pak_decompress_distill(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
if(!in_buf || !out_buf || !out_len || in_len == 0) { return -1; }
|
||||
|
||||
BitStream bs;
|
||||
bitstream_init(&bs, (const uint8_t *)in_buf, in_len);
|
||||
|
||||
// Read header information
|
||||
int numnodes = bitstream_read_uint16_le(&bs);
|
||||
int codelength = bitstream_read_byte(&bs);
|
||||
|
||||
if(numnodes < 2 || numnodes > 0x274) { return -1; }
|
||||
|
||||
// Read tree nodes
|
||||
int *nodes = malloc(numnodes * sizeof(int));
|
||||
if(!nodes) { return -1; }
|
||||
|
||||
for(int i = 0; i < numnodes; i++) { nodes[i] = bitstream_read_bits_le(&bs, codelength); }
|
||||
|
||||
// Build main code tree
|
||||
PrefixCode *maincode = prefix_code_alloc();
|
||||
if(!maincode)
|
||||
{
|
||||
free(nodes);
|
||||
return -1;
|
||||
}
|
||||
|
||||
prefix_code_start_building_tree(maincode);
|
||||
build_code_from_tree(maincode, nodes, numnodes - 2, numnodes, 0);
|
||||
|
||||
free(nodes);
|
||||
|
||||
// Build offset code tree
|
||||
PrefixCode *offsetcode = prefix_code_alloc();
|
||||
if(!offsetcode)
|
||||
{
|
||||
prefix_code_free(maincode);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(int i = 0; i < 0x40; i++)
|
||||
{
|
||||
if(prefix_code_add_value_low_bit_first(offsetcode, i, offset_codes[i], offset_lengths[i]) != PREFIX_CODE_OK)
|
||||
{
|
||||
prefix_code_free(maincode);
|
||||
prefix_code_free(offsetcode);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// LZSS decompression
|
||||
uint8_t window[8192];
|
||||
memset(window, 0, sizeof(window));
|
||||
int windowpos = 0;
|
||||
size_t outpos = 0;
|
||||
size_t max_output = *out_len;
|
||||
|
||||
while(!bitstream_eof(&bs) && outpos < max_output)
|
||||
{
|
||||
int symbol = prefix_code_read_symbol_le(&bs, maincode);
|
||||
if(symbol < 0) break;
|
||||
|
||||
if(symbol < 256)
|
||||
{
|
||||
// Literal byte
|
||||
if(outpos < max_output) { out_buf[outpos++] = (char)symbol; }
|
||||
window[windowpos] = symbol;
|
||||
windowpos = (windowpos + 1) & 0x1fff;
|
||||
}
|
||||
else if(symbol == 256)
|
||||
{
|
||||
// End of stream
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Match
|
||||
int length = symbol - 0x101 + 3;
|
||||
int offsetsymbol = prefix_code_read_symbol_le(&bs, offsetcode);
|
||||
if(offsetsymbol < 0) break;
|
||||
|
||||
int extralength;
|
||||
if(outpos >= 0x1000 - 0x3c)
|
||||
extralength = 7;
|
||||
else if(outpos >= 0x800 - 0x3c)
|
||||
extralength = 6;
|
||||
else if(outpos >= 0x400 - 0x3c)
|
||||
extralength = 5;
|
||||
else if(outpos >= 0x200 - 0x3c)
|
||||
extralength = 4;
|
||||
else if(outpos >= 0x100 - 0x3c)
|
||||
extralength = 3;
|
||||
else if(outpos >= 0x80 - 0x3c)
|
||||
extralength = 2;
|
||||
else if(outpos >= 0x40 - 0x3c)
|
||||
extralength = 1;
|
||||
else
|
||||
extralength = 0;
|
||||
|
||||
int extrabits = bitstream_read_bits_le(&bs, extralength);
|
||||
int offset = (offsetsymbol << extralength) + extrabits + 1;
|
||||
|
||||
// Copy match
|
||||
for(int i = 0; i < length; i++)
|
||||
{
|
||||
int sourcepos = (windowpos - offset) & 0x1fff;
|
||||
uint8_t byte = window[sourcepos];
|
||||
|
||||
if(outpos < max_output) { out_buf[outpos++] = (char)byte; }
|
||||
|
||||
window[windowpos] = byte;
|
||||
windowpos = (windowpos + 1) & 0x1fff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prefix_code_free(maincode);
|
||||
prefix_code_free(offsetcode);
|
||||
|
||||
*out_len = outpos;
|
||||
return 0;
|
||||
}
|
||||
162
pak/lzw.c
Normal file
162
pak/lzw.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* lzw.c - LZW decompression implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "lzw.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
LZW *lzw_alloc(int maxsymbols, int reservedsymbols)
|
||||
{
|
||||
LZW *self = (LZW *)malloc(sizeof(LZW) + sizeof(LZWTreeNode) * maxsymbols);
|
||||
if(!self) return NULL;
|
||||
|
||||
if(maxsymbols < 256 + reservedsymbols)
|
||||
{
|
||||
free(self);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self->maxsymbols = maxsymbols;
|
||||
self->reservedsymbols = reservedsymbols;
|
||||
|
||||
self->buffer = NULL;
|
||||
self->buffersize = 0;
|
||||
|
||||
for(int i = 0; i < 256; i++)
|
||||
{
|
||||
self->nodes[i].chr = i;
|
||||
self->nodes[i].parent = -1;
|
||||
}
|
||||
|
||||
lzw_clear_table(self);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void lzw_free(LZW *self)
|
||||
{
|
||||
if(self)
|
||||
{
|
||||
free(self->buffer);
|
||||
free(self);
|
||||
}
|
||||
}
|
||||
|
||||
void lzw_clear_table(LZW *self)
|
||||
{
|
||||
self->numsymbols = 256 + self->reservedsymbols;
|
||||
self->prevsymbol = -1;
|
||||
self->symbolsize = 9; // TODO: technically this depends on reservedsymbols
|
||||
}
|
||||
|
||||
static uint8_t find_first_byte(LZWTreeNode *nodes, int symbol)
|
||||
{
|
||||
while(nodes[symbol].parent >= 0) symbol = nodes[symbol].parent;
|
||||
return nodes[symbol].chr;
|
||||
}
|
||||
|
||||
int lzw_next_symbol(LZW *self, int symbol)
|
||||
{
|
||||
if(self->prevsymbol < 0)
|
||||
{
|
||||
if(symbol >= self->numsymbols) return LZW_INVALID_CODE_ERROR;
|
||||
self->prevsymbol = symbol;
|
||||
return LZW_NO_ERROR;
|
||||
}
|
||||
|
||||
int postfixbyte;
|
||||
if(symbol < self->numsymbols) { postfixbyte = find_first_byte(self->nodes, symbol); }
|
||||
else if(symbol == self->numsymbols) { postfixbyte = find_first_byte(self->nodes, self->prevsymbol); }
|
||||
else { return LZW_INVALID_CODE_ERROR; }
|
||||
|
||||
int parent = self->prevsymbol;
|
||||
self->prevsymbol = symbol;
|
||||
|
||||
if(!lzw_symbol_list_full(self))
|
||||
{
|
||||
self->nodes[self->numsymbols].parent = parent;
|
||||
self->nodes[self->numsymbols].chr = postfixbyte;
|
||||
self->numsymbols++;
|
||||
|
||||
if(!lzw_symbol_list_full(self))
|
||||
{
|
||||
if((self->numsymbols & (self->numsymbols - 1)) == 0) { self->symbolsize++; }
|
||||
}
|
||||
|
||||
return LZW_NO_ERROR;
|
||||
}
|
||||
else { return LZW_TOO_MANY_CODES_ERROR; }
|
||||
}
|
||||
|
||||
int lzw_replace_symbol(LZW *self, int oldsymbol, int symbol)
|
||||
{
|
||||
if(symbol >= self->numsymbols) return LZW_INVALID_CODE_ERROR;
|
||||
|
||||
self->nodes[oldsymbol].parent = self->prevsymbol;
|
||||
self->nodes[oldsymbol].chr = find_first_byte(self->nodes, symbol);
|
||||
|
||||
self->prevsymbol = symbol;
|
||||
|
||||
return LZW_NO_ERROR;
|
||||
}
|
||||
|
||||
int lzw_output_length(LZW *self)
|
||||
{
|
||||
int symbol = self->prevsymbol;
|
||||
int n = 0;
|
||||
|
||||
while(symbol >= 0)
|
||||
{
|
||||
symbol = self->nodes[symbol].parent;
|
||||
n++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int lzw_output_to_buffer(LZW *self, uint8_t *buffer)
|
||||
{
|
||||
int symbol = self->prevsymbol;
|
||||
int n = lzw_output_length(self);
|
||||
buffer += n;
|
||||
|
||||
while(symbol >= 0)
|
||||
{
|
||||
*--buffer = self->nodes[symbol].chr;
|
||||
symbol = self->nodes[symbol].parent;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int lzw_reverse_output_to_buffer(LZW *self, uint8_t *buffer)
|
||||
{
|
||||
int symbol = self->prevsymbol;
|
||||
int n = 0;
|
||||
|
||||
while(symbol >= 0)
|
||||
{
|
||||
*buffer++ = self->nodes[symbol].chr;
|
||||
symbol = self->nodes[symbol].parent;
|
||||
n++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
83
pak/lzw.h
Normal file
83
pak/lzw.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* lzw.h - LZW decompression implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef LZW_H
|
||||
#define LZW_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define LZW_NO_ERROR 0
|
||||
#define LZW_INVALID_CODE_ERROR 1
|
||||
#define LZW_TOO_MANY_CODES_ERROR 2
|
||||
|
||||
typedef struct LZWTreeNode
|
||||
{
|
||||
uint8_t chr;
|
||||
int parent;
|
||||
} LZWTreeNode;
|
||||
|
||||
typedef struct LZW
|
||||
{
|
||||
int numsymbols;
|
||||
int maxsymbols;
|
||||
int reservedsymbols;
|
||||
int prevsymbol;
|
||||
int symbolsize;
|
||||
|
||||
uint8_t *buffer;
|
||||
int buffersize;
|
||||
|
||||
LZWTreeNode nodes[]; // Flexible array member (C99)
|
||||
} LZW;
|
||||
|
||||
// Allocate LZW structure
|
||||
LZW *lzw_alloc(int maxsymbols, int reservedsymbols);
|
||||
|
||||
// Free LZW structure
|
||||
void lzw_free(LZW *self);
|
||||
|
||||
// Clear/reset LZW table
|
||||
void lzw_clear_table(LZW *self);
|
||||
|
||||
// Process next symbol
|
||||
int lzw_next_symbol(LZW *self, int symbol);
|
||||
|
||||
// Replace a symbol
|
||||
int lzw_replace_symbol(LZW *self, int oldsymbol, int symbol);
|
||||
|
||||
// Get output length
|
||||
int lzw_output_length(LZW *self);
|
||||
|
||||
// Output to buffer (normal order)
|
||||
int lzw_output_to_buffer(LZW *self, uint8_t *buffer);
|
||||
|
||||
// Output to buffer (reverse order)
|
||||
int lzw_reverse_output_to_buffer(LZW *self, uint8_t *buffer);
|
||||
|
||||
// Inline helper functions
|
||||
static inline int lzw_symbol_count(LZW *self) { return self->numsymbols; }
|
||||
|
||||
static inline bool lzw_symbol_list_full(LZW *self) { return self->numsymbols == self->maxsymbols; }
|
||||
|
||||
static inline LZWTreeNode *lzw_symbols(LZW *self) { return self->nodes; }
|
||||
|
||||
#endif /* LZW_H */
|
||||
539
pak/prefixcode.c
Normal file
539
pak/prefixcode.c
Normal file
@@ -0,0 +1,539 @@
|
||||
/*
|
||||
* prefixcode.c - Prefix code tree implementation
|
||||
*
|
||||
* Copyright (c) 2017-pstatic inline bool is_invalid_node(PrefixCode *self, int node) {
|
||||
(void)self; // Suppress unused parameter warning
|
||||
return (node < 0);
|
||||
}ent, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "prefixcode.h"
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Safe realloc that frees original pointer on failure
|
||||
static void *safe_realloc(void *ptr, size_t newsize)
|
||||
{
|
||||
void *newptr = realloc(ptr, newsize);
|
||||
if(!newptr && newsize > 0)
|
||||
{
|
||||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
return newptr;
|
||||
}
|
||||
|
||||
// Inline helper functions
|
||||
static inline CodeTreeNode *node_pointer(PrefixCode *self, int node) { return &self->tree[node]; }
|
||||
|
||||
static inline int branch(PrefixCode *self, int node, int bit) { return node_pointer(self, node)->branches[bit]; }
|
||||
|
||||
static inline void set_branch(PrefixCode *self, int node, int bit, int nextnode)
|
||||
{
|
||||
node_pointer(self, node)->branches[bit] = nextnode;
|
||||
}
|
||||
|
||||
static inline int left_branch(PrefixCode *self, int node) { return branch(self, node, 0); }
|
||||
|
||||
static inline int right_branch(PrefixCode *self, int node) { return branch(self, node, 1); }
|
||||
|
||||
static inline void set_left_branch(PrefixCode *self, int node, int nextnode) { set_branch(self, node, 0, nextnode); }
|
||||
|
||||
static inline void set_right_branch(PrefixCode *self, int node, int nextnode) { set_branch(self, node, 1, nextnode); }
|
||||
|
||||
static inline int leaf_value(PrefixCode *self, int node) { return left_branch(self, node); }
|
||||
|
||||
static inline void set_leaf_value(PrefixCode *self, int node, int value)
|
||||
{
|
||||
set_left_branch(self, node, value);
|
||||
set_right_branch(self, node, value);
|
||||
}
|
||||
|
||||
static inline void set_empty_node(PrefixCode *self, int node)
|
||||
{
|
||||
set_left_branch(self, node, -1);
|
||||
set_right_branch(self, node, -2);
|
||||
}
|
||||
|
||||
static inline bool is_invalid_node(PrefixCode *self, int node) { return node < 0; }
|
||||
|
||||
static inline bool is_open_branch(PrefixCode *self, int node, int bit)
|
||||
{
|
||||
return is_invalid_node(self, branch(self, node, bit));
|
||||
}
|
||||
|
||||
static inline bool is_empty_node(PrefixCode *self, int node)
|
||||
{
|
||||
return left_branch(self, node) == -1 && right_branch(self, node) == -2;
|
||||
}
|
||||
|
||||
static inline bool is_leaf_node(PrefixCode *self, int node)
|
||||
{
|
||||
return left_branch(self, node) == right_branch(self, node);
|
||||
}
|
||||
|
||||
static int new_node(PrefixCode *self)
|
||||
{
|
||||
CodeTreeNode *newtree = safe_realloc(self->tree, (self->numentries + 1) * sizeof(CodeTreeNode));
|
||||
if(!newtree) return -1;
|
||||
|
||||
self->tree = newtree;
|
||||
set_empty_node(self, self->numentries);
|
||||
return self->numentries++;
|
||||
}
|
||||
|
||||
// Stack implementation for tree building
|
||||
static PrefixCodeStack *prefix_code_stack_alloc(void)
|
||||
{
|
||||
PrefixCodeStack *stack = malloc(sizeof(PrefixCodeStack));
|
||||
if(!stack) return NULL;
|
||||
|
||||
stack->data = malloc(16 * sizeof(int));
|
||||
if(!stack->data)
|
||||
{
|
||||
free(stack);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
stack->count = 0;
|
||||
stack->capacity = 16;
|
||||
return stack;
|
||||
}
|
||||
|
||||
static void prefix_code_stack_free(PrefixCodeStack *stack)
|
||||
{
|
||||
if(!stack) return;
|
||||
free(stack->data);
|
||||
free(stack);
|
||||
}
|
||||
|
||||
static int prefix_code_stack_push(PrefixCodeStack *stack, int value)
|
||||
{
|
||||
if(stack->count >= stack->capacity)
|
||||
{
|
||||
int newcapacity = stack->capacity * 2;
|
||||
int *newdata = safe_realloc(stack->data, newcapacity * sizeof(int));
|
||||
if(!newdata) return -1;
|
||||
|
||||
stack->data = newdata;
|
||||
stack->capacity = newcapacity;
|
||||
}
|
||||
|
||||
stack->data[stack->count++] = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prefix_code_stack_pop(PrefixCodeStack *stack)
|
||||
{
|
||||
if(stack->count == 0) return -1;
|
||||
return stack->data[--stack->count];
|
||||
}
|
||||
|
||||
static void prefix_code_stack_clear(PrefixCodeStack *stack) { stack->count = 0; }
|
||||
|
||||
// Bit reversal functions
|
||||
static uint32_t reverse_32(uint32_t val)
|
||||
{
|
||||
val = ((val >> 1) & 0x55555555) | ((val & 0x55555555) << 1);
|
||||
val = ((val >> 2) & 0x33333333) | ((val & 0x33333333) << 2);
|
||||
val = ((val >> 4) & 0x0F0F0F0F) | ((val & 0x0F0F0F0F) << 4);
|
||||
val = ((val >> 8) & 0x00FF00FF) | ((val & 0x00FF00FF) << 8);
|
||||
return (val >> 16) | (val << 16);
|
||||
}
|
||||
|
||||
static uint32_t reverse_n(uint32_t val, int length) { return reverse_32(val) >> (32 - length); }
|
||||
|
||||
// Table construction functions
|
||||
#define TABLE_MAX_SIZE 10
|
||||
|
||||
static void make_table(PrefixCode *code, int node, CodeTableEntry *table, int depth, int maxdepth)
|
||||
{
|
||||
int currtablesize = 1 << (maxdepth - depth);
|
||||
|
||||
if(is_invalid_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++) table[i].length = -1;
|
||||
}
|
||||
else if(is_leaf_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++)
|
||||
{
|
||||
table[i].length = depth;
|
||||
table[i].value = leaf_value(code, node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(depth == maxdepth)
|
||||
{
|
||||
table[0].length = maxdepth + 1;
|
||||
table[0].value = node;
|
||||
}
|
||||
else
|
||||
{
|
||||
make_table(code, left_branch(code, node), table, depth + 1, maxdepth);
|
||||
make_table(code, right_branch(code, node), table + currtablesize / 2, depth + 1, maxdepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void make_table_le(PrefixCode *code, int node, CodeTableEntry *table, int depth, int maxdepth)
|
||||
{
|
||||
int currtablesize = 1 << (maxdepth - depth);
|
||||
int currstride = 1 << depth;
|
||||
|
||||
if(is_invalid_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++) table[i * currstride].length = -1;
|
||||
}
|
||||
else if(is_leaf_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++)
|
||||
{
|
||||
table[i * currstride].length = depth;
|
||||
table[i * currstride].value = leaf_value(code, node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(depth == maxdepth)
|
||||
{
|
||||
table[0].length = maxdepth + 1;
|
||||
table[0].value = node;
|
||||
}
|
||||
else
|
||||
{
|
||||
make_table_le(code, left_branch(code, node), table, depth + 1, maxdepth);
|
||||
make_table_le(code, right_branch(code, node), table + currstride, depth + 1, maxdepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int prefix_code_make_table(PrefixCode *self)
|
||||
{
|
||||
if(self->table1) return PREFIX_CODE_OK;
|
||||
|
||||
if(self->maxlength < self->minlength)
|
||||
self->tablesize = TABLE_MAX_SIZE; // no code lengths recorded
|
||||
else if(self->maxlength >= TABLE_MAX_SIZE)
|
||||
self->tablesize = TABLE_MAX_SIZE;
|
||||
else
|
||||
self->tablesize = self->maxlength;
|
||||
|
||||
self->table1 = malloc(sizeof(CodeTableEntry) * (1 << self->tablesize));
|
||||
if(!self->table1) return PREFIX_CODE_INVALID;
|
||||
|
||||
make_table(self, 0, self->table1, 0, self->tablesize);
|
||||
return PREFIX_CODE_OK;
|
||||
}
|
||||
|
||||
static int prefix_code_make_table_le(PrefixCode *self)
|
||||
{
|
||||
if(self->table2) return PREFIX_CODE_OK;
|
||||
|
||||
if(self->maxlength < self->minlength)
|
||||
self->tablesize = TABLE_MAX_SIZE; // no code lengths recorded
|
||||
else if(self->maxlength >= TABLE_MAX_SIZE)
|
||||
self->tablesize = TABLE_MAX_SIZE;
|
||||
else
|
||||
self->tablesize = self->maxlength;
|
||||
|
||||
self->table2 = malloc(sizeof(CodeTableEntry) * (1 << self->tablesize));
|
||||
if(!self->table2) return PREFIX_CODE_INVALID;
|
||||
|
||||
make_table_le(self, 0, self->table2, 0, self->tablesize);
|
||||
return PREFIX_CODE_OK;
|
||||
}
|
||||
|
||||
// Public functions
|
||||
|
||||
PrefixCode *prefix_code_alloc(void)
|
||||
{
|
||||
PrefixCode *self = malloc(sizeof(PrefixCode));
|
||||
if(!self) return NULL;
|
||||
|
||||
self->tree = malloc(sizeof(CodeTreeNode));
|
||||
if(!self->tree)
|
||||
{
|
||||
free(self);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
set_empty_node(self, 0);
|
||||
self->numentries = 1;
|
||||
self->minlength = INT_MAX;
|
||||
self->maxlength = INT_MIN;
|
||||
self->isstatic = false;
|
||||
|
||||
self->stack = NULL;
|
||||
self->table1 = self->table2 = NULL;
|
||||
self->tablesize = 0;
|
||||
self->currnode = 0;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
PrefixCode *prefix_code_alloc_with_static_table(int (*statictable)[2])
|
||||
{
|
||||
PrefixCode *self = malloc(sizeof(PrefixCode));
|
||||
if(!self) return NULL;
|
||||
|
||||
self->tree = (CodeTreeNode *)statictable; // TODO: fix the ugly cast
|
||||
self->isstatic = true;
|
||||
|
||||
self->stack = NULL;
|
||||
self->table1 = self->table2 = NULL;
|
||||
self->tablesize = 0;
|
||||
self->currnode = 0;
|
||||
self->numentries = 0;
|
||||
self->minlength = INT_MAX;
|
||||
self->maxlength = INT_MIN;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
PrefixCode *prefix_code_alloc_with_lengths(const int *lengths, int numsymbols, int maxcodelength, bool zeros)
|
||||
{
|
||||
PrefixCode *self = prefix_code_alloc();
|
||||
if(!self) return NULL;
|
||||
|
||||
int code = 0, symbolsleft = numsymbols;
|
||||
|
||||
for(int length = 1; length <= maxcodelength; length++)
|
||||
{
|
||||
for(int i = 0; i < numsymbols; i++)
|
||||
{
|
||||
if(lengths[i] != length) continue;
|
||||
// Instead of reversing to get a low-bit-first code, we shift and use high-bit-first.
|
||||
int result;
|
||||
if(zeros) { result = prefix_code_add_value_high_bit_first(self, i, code, length); }
|
||||
else { result = prefix_code_add_value_high_bit_first(self, i, ~code, length); }
|
||||
if(result != PREFIX_CODE_OK)
|
||||
{
|
||||
prefix_code_free(self);
|
||||
return NULL;
|
||||
}
|
||||
code++;
|
||||
if(--symbolsleft == 0) return self; // early exit if all codes have been handled
|
||||
}
|
||||
code <<= 1;
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void prefix_code_free(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
if(!self->isstatic) free(self->tree);
|
||||
free(self->table1);
|
||||
free(self->table2);
|
||||
if(self->stack) prefix_code_stack_free(self->stack);
|
||||
free(self);
|
||||
}
|
||||
|
||||
int prefix_code_add_value_high_bit_first(PrefixCode *self, int value, uint32_t code, int length)
|
||||
{
|
||||
return prefix_code_add_value_high_bit_first_repeat(self, value, code, length, length);
|
||||
}
|
||||
|
||||
int prefix_code_add_value_high_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos)
|
||||
{
|
||||
if(!self || self->isstatic) return PREFIX_CODE_INVALID;
|
||||
|
||||
free(self->table1);
|
||||
free(self->table2);
|
||||
self->table1 = self->table2 = NULL;
|
||||
|
||||
if(length > self->maxlength) self->maxlength = length;
|
||||
if(length < self->minlength) self->minlength = length;
|
||||
|
||||
repeatpos = length - 1 - repeatpos;
|
||||
if(repeatpos == 0 ||
|
||||
(repeatpos >= 0 && (((code >> (repeatpos - 1)) & 3) == 0 || ((code >> (repeatpos - 1)) & 3) == 3)))
|
||||
{
|
||||
return PREFIX_CODE_INVALID;
|
||||
}
|
||||
|
||||
int lastnode = 0;
|
||||
for(int bitpos = length - 1; bitpos >= 0; bitpos--)
|
||||
{
|
||||
int bit = (code >> bitpos) & 1;
|
||||
|
||||
if(is_leaf_node(self, lastnode)) return PREFIX_CODE_INVALID;
|
||||
|
||||
if(bitpos == repeatpos)
|
||||
{
|
||||
if(!is_open_branch(self, lastnode, bit)) return PREFIX_CODE_INVALID;
|
||||
|
||||
int repeatnode = new_node(self);
|
||||
int nextnode = new_node(self);
|
||||
if(repeatnode < 0 || nextnode < 0) return PREFIX_CODE_INVALID;
|
||||
|
||||
set_branch(self, lastnode, bit, repeatnode);
|
||||
set_branch(self, repeatnode, bit, repeatnode);
|
||||
set_branch(self, repeatnode, bit ^ 1, nextnode);
|
||||
lastnode = nextnode;
|
||||
|
||||
bitpos++; // terminating bit already handled, skip it
|
||||
}
|
||||
else
|
||||
{
|
||||
if(is_open_branch(self, lastnode, bit))
|
||||
{
|
||||
int newnode = new_node(self);
|
||||
if(newnode < 0) return PREFIX_CODE_INVALID;
|
||||
set_branch(self, lastnode, bit, newnode);
|
||||
}
|
||||
lastnode = branch(self, lastnode, bit);
|
||||
}
|
||||
}
|
||||
|
||||
if(!is_empty_node(self, lastnode)) return PREFIX_CODE_INVALID;
|
||||
set_leaf_value(self, lastnode, value);
|
||||
return PREFIX_CODE_OK;
|
||||
}
|
||||
|
||||
int prefix_code_add_value_low_bit_first(PrefixCode *self, int value, uint32_t code, int length)
|
||||
{
|
||||
return prefix_code_add_value_high_bit_first(self, value, reverse_n(code, length), length);
|
||||
}
|
||||
|
||||
int prefix_code_add_value_low_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos)
|
||||
{
|
||||
return prefix_code_add_value_high_bit_first_repeat(self, value, reverse_n(code, length), length, repeatpos);
|
||||
}
|
||||
|
||||
void prefix_code_start_building_tree(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
self->currnode = 0;
|
||||
if(!self->stack) { self->stack = prefix_code_stack_alloc(); }
|
||||
else { prefix_code_stack_clear(self->stack); }
|
||||
}
|
||||
|
||||
void prefix_code_start_zero_branch(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
int new = new_node(self);
|
||||
if(new < 0) return;
|
||||
|
||||
set_branch(self, self->currnode, 0, new);
|
||||
prefix_code_stack_push(self->stack, self->currnode);
|
||||
self->currnode = new;
|
||||
}
|
||||
|
||||
void prefix_code_start_one_branch(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
int new = new_node(self);
|
||||
if(new < 0) return;
|
||||
|
||||
set_branch(self, self->currnode, 1, new);
|
||||
prefix_code_stack_push(self->stack, self->currnode);
|
||||
self->currnode = new;
|
||||
}
|
||||
|
||||
void prefix_code_finish_branches(PrefixCode *self)
|
||||
{
|
||||
if(!self || !self->stack) return;
|
||||
|
||||
int node = prefix_code_stack_pop(self->stack);
|
||||
if(node >= 0) self->currnode = node;
|
||||
}
|
||||
|
||||
void prefix_code_make_leaf_with_value(PrefixCode *self, int value)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
set_leaf_value(self, self->currnode, value);
|
||||
prefix_code_finish_branches(self);
|
||||
}
|
||||
|
||||
// BitStream interface functions
|
||||
|
||||
int prefix_code_read_symbol(BitStream *bs, PrefixCode *code)
|
||||
{
|
||||
if(!code) return PREFIX_CODE_INVALID;
|
||||
if(!code->table1)
|
||||
{
|
||||
if(prefix_code_make_table(code) != PREFIX_CODE_OK) return PREFIX_CODE_INVALID;
|
||||
}
|
||||
|
||||
int bits = bitstream_peek_bits(bs, code->tablesize);
|
||||
|
||||
int length = code->table1[bits].length;
|
||||
int value = code->table1[bits].value;
|
||||
|
||||
if(length < 0) return PREFIX_CODE_INVALID;
|
||||
|
||||
if(length <= code->tablesize)
|
||||
{
|
||||
bitstream_skip_bits(bs, length);
|
||||
return value;
|
||||
}
|
||||
|
||||
bitstream_skip_bits(bs, code->tablesize);
|
||||
|
||||
int node = value;
|
||||
while(!is_leaf_node(code, node))
|
||||
{
|
||||
int bit = bitstream_read_bit(bs);
|
||||
if(is_open_branch(code, node, bit)) return PREFIX_CODE_INVALID;
|
||||
node = branch(code, node, bit);
|
||||
}
|
||||
return leaf_value(code, node);
|
||||
}
|
||||
|
||||
int prefix_code_read_symbol_le(BitStream *bs, PrefixCode *code)
|
||||
{
|
||||
if(!code) return PREFIX_CODE_INVALID;
|
||||
if(!code->table2)
|
||||
{
|
||||
if(prefix_code_make_table_le(code) != PREFIX_CODE_OK) return PREFIX_CODE_INVALID;
|
||||
}
|
||||
|
||||
int bits = bitstream_peek_bits_le(bs, code->tablesize);
|
||||
|
||||
int length = code->table2[bits].length;
|
||||
int value = code->table2[bits].value;
|
||||
|
||||
if(length < 0) return PREFIX_CODE_INVALID;
|
||||
|
||||
if(length <= code->tablesize)
|
||||
{
|
||||
bitstream_skip_bits_le(bs, length);
|
||||
return value;
|
||||
}
|
||||
|
||||
bitstream_skip_bits_le(bs, code->tablesize);
|
||||
|
||||
int node = value;
|
||||
while(!is_leaf_node(code, node))
|
||||
{
|
||||
int bit = bitstream_read_bit_le(bs);
|
||||
if(is_open_branch(code, node, bit)) return PREFIX_CODE_INVALID;
|
||||
node = branch(code, node, bit);
|
||||
}
|
||||
return leaf_value(code, node);
|
||||
}
|
||||
89
pak/prefixcode.h
Normal file
89
pak/prefixcode.h
Normal file
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* prefixcode.h - Prefix code tree implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef PREFIXCODE_H
|
||||
#define PREFIXCODE_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "bitstream.h"
|
||||
|
||||
// Error codes
|
||||
#define PREFIX_CODE_OK 0
|
||||
#define PREFIX_CODE_INVALID -1
|
||||
|
||||
typedef struct CodeTreeNode
|
||||
{
|
||||
int branches[2];
|
||||
} CodeTreeNode;
|
||||
|
||||
typedef struct CodeTableEntry
|
||||
{
|
||||
uint32_t length;
|
||||
int32_t value;
|
||||
} CodeTableEntry;
|
||||
|
||||
// Simple stack implementation for tree building
|
||||
typedef struct PrefixCodeStack
|
||||
{
|
||||
int *data;
|
||||
int count;
|
||||
int capacity;
|
||||
} PrefixCodeStack;
|
||||
|
||||
typedef struct PrefixCode
|
||||
{
|
||||
CodeTreeNode *tree;
|
||||
int numentries;
|
||||
int minlength;
|
||||
int maxlength;
|
||||
bool isstatic;
|
||||
|
||||
int currnode;
|
||||
PrefixCodeStack *stack;
|
||||
|
||||
int tablesize;
|
||||
CodeTableEntry *table1;
|
||||
CodeTableEntry *table2;
|
||||
} PrefixCode;
|
||||
|
||||
// Function declarations
|
||||
PrefixCode *prefix_code_alloc(void);
|
||||
PrefixCode *prefix_code_alloc_with_lengths(const int *lengths, int numsymbols, int maxlength, bool shortestCodeIsZeros);
|
||||
PrefixCode *prefix_code_alloc_with_static_table(int (*statictable)[2]);
|
||||
void prefix_code_free(PrefixCode *self);
|
||||
|
||||
int prefix_code_add_value_high_bit_first(PrefixCode *self, int value, uint32_t code, int length);
|
||||
int prefix_code_add_value_high_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos);
|
||||
int prefix_code_add_value_low_bit_first(PrefixCode *self, int value, uint32_t code, int length);
|
||||
int prefix_code_add_value_low_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos);
|
||||
|
||||
void prefix_code_start_building_tree(PrefixCode *self);
|
||||
void prefix_code_start_zero_branch(PrefixCode *self);
|
||||
void prefix_code_start_one_branch(PrefixCode *self);
|
||||
void prefix_code_finish_branches(PrefixCode *self);
|
||||
void prefix_code_make_leaf_with_value(PrefixCode *self, int value);
|
||||
|
||||
// BitStream interface functions
|
||||
int prefix_code_read_symbol(BitStream *bs, PrefixCode *code);
|
||||
int prefix_code_read_symbol_le(BitStream *bs, PrefixCode *code);
|
||||
|
||||
#endif /* PREFIXCODE_H */
|
||||
@@ -42,8 +42,35 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/data.bin
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lzd
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lh5
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcpack.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsqueeze.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunchnr.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunch_dynamic.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsquash.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/pak_crush.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/pak_distill.bin
|
||||
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
|
||||
|
||||
# 'Google_Tests_run' is the target name
|
||||
# 'test1.cpp tests2.cpp' are source files with tests
|
||||
add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp
|
||||
zoo/lzd.cpp)
|
||||
zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp arc/crunch.cpp
|
||||
arc/squash.cpp
|
||||
pak/crush.cpp
|
||||
pak/distill.cpp)
|
||||
target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native")
|
||||
|
||||
133
tests/arc/crunch.cpp
Normal file
133
tests/arc/crunch.cpp
Normal file
@@ -0,0 +1,133 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class crunchFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
crunchFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/arccrunchnr.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(72537);
|
||||
fread((void *)buffer, 1, 72537, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~crunchFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(crunchFixture, crunch)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 72537;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = arc_decompress_crunch_nrpack(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
|
||||
class crunchDynamicFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
crunchDynamicFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/arccrunch_dynamic.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(73189);
|
||||
fread((void *)buffer, 1, 73189, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~crunchDynamicFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(crunchDynamicFixture, crunchDynamic)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 73189;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = arc_decompress_crunch_dynamic(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
82
tests/arc/pack.cpp
Normal file
82
tests/arc/pack.cpp
Normal file
@@ -0,0 +1,82 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class packFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
packFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/arcpack.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(149855);
|
||||
fread((void *)buffer, 1, 149855, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~packFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(packFixture, pack)
|
||||
{
|
||||
uint8_t params[] = {0x5D, 0x00, 0x00, 0x00, 0x02};
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 149855;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = arc_decompress_pack(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
81
tests/arc/squash.cpp
Normal file
81
tests/arc/squash.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class squashFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
squashFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/arcsquash.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(67308);
|
||||
fread((void *)buffer, 1, 67308, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~squashFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(squashFixture, squash)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 67308;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = arc_decompress_squash(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
81
tests/arc/squeeze.cpp
Normal file
81
tests/arc/squeeze.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class squeezeFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
squeezeFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/arcsqueeze.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(88044);
|
||||
fread((void *)buffer, 1, 88044, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~squeezeFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(squeezeFixture, squeeze)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 88044;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = arc_decompress_squeeze(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
BIN
tests/data/alice29.lh5
Normal file
BIN
tests/data/alice29.lh5
Normal file
Binary file not shown.
BIN
tests/data/alice29.lzd
Normal file
BIN
tests/data/alice29.lzd
Normal file
Binary file not shown.
BIN
tests/data/arccrunch_dynamic.bin
Executable file
BIN
tests/data/arccrunch_dynamic.bin
Executable file
Binary file not shown.
BIN
tests/data/arccrunchnr.bin
Executable file
BIN
tests/data/arccrunchnr.bin
Executable file
Binary file not shown.
3628
tests/data/arcpack.bin
Executable file
3628
tests/data/arcpack.bin
Executable file
File diff suppressed because it is too large
Load Diff
BIN
tests/data/arcsquash.bin
Executable file
BIN
tests/data/arcsquash.bin
Executable file
Binary file not shown.
BIN
tests/data/arcsqueeze.bin
Executable file
BIN
tests/data/arcsqueeze.bin
Executable file
Binary file not shown.
BIN
tests/data/pak_crush.bin
Normal file
BIN
tests/data/pak_crush.bin
Normal file
Binary file not shown.
BIN
tests/data/pak_distill.bin
Normal file
BIN
tests/data/pak_distill.bin
Normal file
Binary file not shown.
84
tests/lh5.cpp
Normal file
84
tests/lh5.cpp
Normal file
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "../zoo/lh5.h"
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../library.h"
|
||||
#include "crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class lh5Fixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
lh5Fixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/alice29.lh5", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(59104);
|
||||
fread((void *)buffer, 1, 59104, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~lh5Fixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(lh5Fixture, lh5)
|
||||
{
|
||||
uint8_t params[] = {0x5D, 0x00, 0x00, 0x00, 0x02};
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 59104;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = lh5_decompress(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
81
tests/pak/crush.cpp
Normal file
81
tests/pak/crush.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class crushFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
crushFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/pak_crush.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(63282);
|
||||
fread((void *)buffer, 1, 63282, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~crushFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(crushFixture, crush)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 63282;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = pak_decompress_crush(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
81
tests/pak/distill.cpp
Normal file
81
tests/pak/distill.cpp
Normal file
@@ -0,0 +1,81 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include <climits>
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
#define EXPECTED_CRC32 0x66007dba
|
||||
|
||||
static const uint8_t *buffer;
|
||||
|
||||
class distillFixture : public ::testing::Test
|
||||
{
|
||||
public:
|
||||
distillFixture()
|
||||
{
|
||||
// initialization;
|
||||
// can also be done in SetUp()
|
||||
}
|
||||
|
||||
protected:
|
||||
void SetUp()
|
||||
{
|
||||
char path[PATH_MAX];
|
||||
char filename[PATH_MAX];
|
||||
|
||||
getcwd(path, PATH_MAX);
|
||||
snprintf(filename, PATH_MAX, "%s/data/pak_distill.bin", path);
|
||||
|
||||
FILE *file = fopen(filename, "rb");
|
||||
buffer = (const uint8_t *)malloc(60540);
|
||||
fread((void *)buffer, 1, 60540, file);
|
||||
fclose(file);
|
||||
}
|
||||
|
||||
void TearDown() { free((void *)buffer); }
|
||||
|
||||
~distillFixture()
|
||||
{
|
||||
// resources cleanup, no exceptions allowed
|
||||
}
|
||||
|
||||
// shared user data
|
||||
};
|
||||
|
||||
TEST_F(distillFixture, distill)
|
||||
{
|
||||
size_t destLen = 152089;
|
||||
size_t srcLen = 60540;
|
||||
auto *outBuf = (uint8_t *)malloc(152089);
|
||||
|
||||
auto err = pak_decompress_distill(buffer, srcLen, outBuf, &destLen);
|
||||
|
||||
EXPECT_EQ(err, 0);
|
||||
EXPECT_EQ(destLen, 152089);
|
||||
|
||||
auto crc = crc32_data(outBuf, 152089);
|
||||
|
||||
free(outBuf);
|
||||
|
||||
EXPECT_EQ(crc, EXPECTED_CRC32);
|
||||
}
|
||||
@@ -25,7 +25,7 @@
|
||||
#include <sys/stat.h>
|
||||
#include <unistd.h>
|
||||
|
||||
#include "../library.h"
|
||||
#include "../../library.h"
|
||||
#include "../crc32.h"
|
||||
#include "gtest/gtest.h"
|
||||
|
||||
|
||||
40
zoo/ar.h
Normal file
40
zoo/ar.h
Normal file
@@ -0,0 +1,40 @@
|
||||
/*$Source: /usr/home/dhesi/zoo/RCS/ar.h,v $*/
|
||||
/*$Id: ar.h,v 1.17 91/07/09 01:39:50 dhesi Exp $*/
|
||||
/***********************************************************
|
||||
ar.h
|
||||
|
||||
Adapted from "ar" archiver written by Haruhiko Okumura.
|
||||
***********************************************************/
|
||||
// Modified for in-memory decompression by Natalia Portillo, 2025
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
|
||||
/* all the prototypes follow here for all files */
|
||||
|
||||
/* DECODE.C */
|
||||
void decode_start();
|
||||
int decode(uint32_t count, uint8_t *buffer);
|
||||
|
||||
/* HUF.C */
|
||||
void output(uint32_t c, uint32_t p);
|
||||
uint32_t decode_c(void);
|
||||
uint32_t decode_p(void);
|
||||
void huf_decode_start(void);
|
||||
|
||||
/* IO.C */
|
||||
void fillbuf(int n);
|
||||
uint32_t getbits(int n);
|
||||
void putbits(int n, uint32_t x);
|
||||
void init_getbits(void);
|
||||
void init_putbits(void);
|
||||
|
||||
/* MAKETBL.C */
|
||||
void make_table(int nchar, uint8_t bitlen[], int tablebits, uint16_t table[]);
|
||||
|
||||
/* MAKETREE.C */
|
||||
int make_tree(int nparm, uint16_t freqparm[], uint8_t lenparm[], uint16_t codeparm[]);
|
||||
|
||||
/* for lzh modules and also for ar.c to use in defining buffer size */
|
||||
#define DICBIT 13 /* 12(-lh4-) or 13(-lh5-) */
|
||||
#define DICSIZ ((unsigned)1 << DICBIT)
|
||||
104
zoo/decode.c
Normal file
104
zoo/decode.c
Normal file
@@ -0,0 +1,104 @@
|
||||
/*$Source: /usr/home/dhesi/zoo/RCS/decode.c,v $*/
|
||||
/*$Id: decode.c,v 1.6 91/07/09 01:39:49 dhesi Exp $*/
|
||||
/***********************************************************
|
||||
decode.c
|
||||
|
||||
Adapted from Haruhiko Okumura’s “ar” archiver. This
|
||||
version has been modified in 2025 by Natalia Portillo
|
||||
for in-memory decompression.
|
||||
***********************************************************/
|
||||
|
||||
#include <limits.h> // for UCHAR_MAX
|
||||
#include <stdint.h> // for fixed-width integer types
|
||||
|
||||
#include "ar.h" // archive format constants
|
||||
#include "lzh.h" // LZH-specific constants (DICSIZ, THRESHOLD, etc.)
|
||||
|
||||
extern int decoded; // flag set by decode_c() when end-of-stream is reached
|
||||
|
||||
static int j; // number of literal/copy runs remaining from a match
|
||||
|
||||
/*
|
||||
* decode_start()
|
||||
*
|
||||
* Prepare the decoder for a new file:
|
||||
* - Initialize the Huffman bitstream (via huf_decode_start())
|
||||
* - Reset the sliding-window copy counter `j`
|
||||
* - Clear the end-of-data flag `decoded`
|
||||
*/
|
||||
void decode_start()
|
||||
{
|
||||
huf_decode_start(); // reset bit-reader state
|
||||
j = 0; // no pending copy runs yet
|
||||
decoded = 0; // not yet at end-of-stream
|
||||
}
|
||||
|
||||
/*
|
||||
* decode(count, buffer)
|
||||
*
|
||||
* Decode up to `count` bytes (usually DICSIZ) into `buffer[]`.
|
||||
* Returns the actual number of bytes written, or 0 if `decoded` is set.
|
||||
*
|
||||
* Sliding‐window logic:
|
||||
* 1. If `j` > 0, we are in the middle of copying a previous match:
|
||||
* - Copy one byte from `buffer[i]` into `buffer[r]`
|
||||
* - Advance `i` (circular within DICSIZ) and `r`
|
||||
* - Decrement `j` and repeat until `j` = 0 or `r` = count
|
||||
* 2. Otherwise, fetch the next symbol `c = decode_c()`:
|
||||
* - If `c <= UCHAR_MAX`, it’s a literal byte: emit it directly
|
||||
* - Else it’s a match:
|
||||
* • compute `j = match_length = c - (UCHAR_MAX + 1 - THRESHOLD)`
|
||||
* • compute `i = (r - match_offset - 1) mod DICSIZ`,
|
||||
* where match_offset = decode_p()
|
||||
* • enter copy loop from step 1
|
||||
*/
|
||||
int decode(uint32_t count, uint8_t *buffer)
|
||||
{
|
||||
static uint32_t i; // sliding-window read index (circular)
|
||||
uint32_t r; // write position in buffer
|
||||
uint32_t c; // symbol or match code
|
||||
|
||||
r = 0;
|
||||
|
||||
// Step 1: finish any pending copy from a previous match
|
||||
while(--j >= 0)
|
||||
{
|
||||
buffer[r] = buffer[i]; // copy one byte from history
|
||||
i = (i + 1) & (DICSIZ - 1); // wrap index within [0, DICSIZ)
|
||||
if(++r == count) // if output buffer is full
|
||||
return r; // return bytes written so far
|
||||
}
|
||||
|
||||
// Step 2: decode new symbols until end-of-stream or buffer full
|
||||
for(;;)
|
||||
{
|
||||
c = decode_c(); // get next Huffman symbol
|
||||
if(decoded) // end-of-stream marker reached
|
||||
return r; // no more bytes to decode
|
||||
|
||||
if(c <= UCHAR_MAX)
|
||||
{
|
||||
// Literal byte: emit it directly
|
||||
buffer[r] = (uint8_t)c;
|
||||
if(++r == count) return r;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Match sequence: compute how many bytes to copy
|
||||
// j = match length
|
||||
j = c - (UCHAR_MAX + 1 - THRESHOLD);
|
||||
|
||||
// i = start position in sliding window:
|
||||
// current output position minus offset minus 1, wrapped
|
||||
i = (r - decode_p() - 1) & (DICSIZ - 1);
|
||||
|
||||
// Copy `j` bytes from history
|
||||
while(--j >= 0)
|
||||
{
|
||||
buffer[r] = buffer[i];
|
||||
i = (i + 1) & (DICSIZ - 1);
|
||||
if(++r == count) return r;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
244
zoo/huf.c
Normal file
244
zoo/huf.c
Normal file
@@ -0,0 +1,244 @@
|
||||
/*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/
|
||||
/*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/
|
||||
/***********************************************************
|
||||
huf.c -- static Huffman decoding
|
||||
|
||||
Adapted from Haruhiko Okumura’s “ar” archiver.
|
||||
Modified in 2025 by Natalia Portillo for in-memory I/O.
|
||||
***********************************************************/
|
||||
|
||||
#include <limits.h> // UCHAR_MAX
|
||||
#include "ar.h" // archive format constants
|
||||
#include "lzh.h" // LZH algorithm constants (NC, DICBIT, CODE_BIT, etc.)
|
||||
|
||||
// NP = number of position codes = DICBIT+1
|
||||
// NT = number of tree codes = CODE_BIT+3
|
||||
// PBIT, TBIT = bit‐width to transmit NP/NT in header
|
||||
#define NP (DICBIT + 1)
|
||||
#define NT (CODE_BIT + 3)
|
||||
#define PBIT 4 /* smallest bits so (1<<PBIT)>NP */
|
||||
#define TBIT 5 /* smallest bits so (1<<TBIT)>NT */
|
||||
|
||||
// NPT = max(NP,NT) for prefix‐tree lengths
|
||||
#if NT > NP
|
||||
#define NPT NT
|
||||
#else
|
||||
#define NPT NP
|
||||
#endif
|
||||
|
||||
// forward declarations of helper routines
|
||||
static void read_pt_len(int nn, int nbit, int i_special);
|
||||
static void read_c_len(void);
|
||||
|
||||
int decoded; // flag set when end-of-stream block is seen
|
||||
|
||||
// Huffman tree storage arrays
|
||||
// left[]/right[] store the binary tree structure for fast decoding
|
||||
uint16_t left[2 * NC - 1], right[2 * NC - 1];
|
||||
|
||||
// c_len[] = code lengths for literal/length tree (NC symbols)
|
||||
// pt_len[] = code lengths for position‐tree / prefix table (NPT symbols)
|
||||
// buf = temporary buffer pointer used during encoding; unused in decode
|
||||
static uint8_t *buf, c_len[NC], pt_len[NPT];
|
||||
|
||||
// size of buf if used, and remaining symbols in current block
|
||||
static uint32_t bufsiz = 0, blocksize;
|
||||
|
||||
// Frequency, code and decode‐table structures
|
||||
static uint16_t c_freq[2 * NC - 1], // literal/length frequency counts
|
||||
c_table[4096], // fast‐lookup table for literal/length decoding
|
||||
c_code[NC], // canonical Huffman codes for literals
|
||||
p_freq[2 * NP - 1], // position frequency counts
|
||||
pt_table[256], // prefix‐tree fast lookup (for reading code lengths)
|
||||
pt_code[NPT], // canonical codes for prefix‐tree
|
||||
t_freq[2 * NT - 1]; // temporary freq for tree of code‐length codes
|
||||
|
||||
/***** decoding helper: read prefix‐tree code-lengths *****/
|
||||
static void read_pt_len(int nn, int nbit, int i_special)
|
||||
{
|
||||
int i, c, n;
|
||||
uint32_t mask;
|
||||
|
||||
// 1) read how many code‐lengths to consume
|
||||
n = getbits(nbit);
|
||||
if(n == 0)
|
||||
{
|
||||
// special case: all code‐lengths are identical
|
||||
c = getbits(nbit);
|
||||
for(i = 0; i < nn; i++) // zero out lengths
|
||||
pt_len[i] = 0;
|
||||
for(i = 0; i < 256; i++) // prefix‐table always returns 'c'
|
||||
pt_table[i] = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2) read code lengths one by one
|
||||
i = 0;
|
||||
while(i < n)
|
||||
{
|
||||
// peek top 3 bits of bitbuf to guess small lengths
|
||||
c = bitbuf >> (BITBUFSIZ - 3);
|
||||
if(c == 7)
|
||||
{
|
||||
// if all three bits are 1, count additional ones
|
||||
mask = 1U << (BITBUFSIZ - 1 - 3);
|
||||
while(mask & bitbuf)
|
||||
{
|
||||
c++;
|
||||
mask >>= 1;
|
||||
}
|
||||
}
|
||||
// consume the actual length bits
|
||||
fillbuf((c < 7) ? 3 : (c - 3));
|
||||
pt_len[i++] = c;
|
||||
|
||||
// at special index, read a small run of zeros
|
||||
if(i == i_special)
|
||||
{
|
||||
c = getbits(2);
|
||||
while(--c >= 0 && i < nn) pt_len[i++] = 0;
|
||||
}
|
||||
}
|
||||
// any remaining symbols get code‐length zero
|
||||
while(i < nn) pt_len[i++] = 0;
|
||||
|
||||
// build fast lookup table from lengths
|
||||
make_table(nn, pt_len, 8, pt_table);
|
||||
}
|
||||
}
|
||||
|
||||
/***** decoding helper: read literal/length code‐lengths *****/
|
||||
static void read_c_len(void)
|
||||
{
|
||||
int i, c, n;
|
||||
uint32_t mask;
|
||||
|
||||
// 1) how many literal codes?
|
||||
n = getbits(CBIT);
|
||||
if(n == 0)
|
||||
{
|
||||
// all code‐lengths identical
|
||||
c = getbits(CBIT);
|
||||
for(i = 0; i < NC; i++) c_len[i] = 0;
|
||||
for(i = 0; i < 4096; i++) c_table[i] = c;
|
||||
}
|
||||
else
|
||||
{
|
||||
// 2) read each code length via prefix‐tree
|
||||
i = 0;
|
||||
while(i < n)
|
||||
{
|
||||
// lookup next symbol in prefix‐table
|
||||
c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
|
||||
if(c >= NT)
|
||||
{
|
||||
// if prefix code is non-leaf, walk tree
|
||||
mask = 1U << (BITBUFSIZ - 1 - 8);
|
||||
do {
|
||||
c = (bitbuf & mask) ? right[c] : left[c];
|
||||
mask >>= 1;
|
||||
} while(c >= NT);
|
||||
}
|
||||
// consume code‐length bits
|
||||
fillbuf(pt_len[c]);
|
||||
|
||||
// c ≤ 2: run-length encoding of zeros
|
||||
if(c <= 2)
|
||||
{
|
||||
if(c == 0)
|
||||
c = 1;
|
||||
else if(c == 1)
|
||||
c = getbits(4) + 3;
|
||||
else
|
||||
c = getbits(CBIT) + 20;
|
||||
while(--c >= 0 && i < NC) c_len[i++] = 0;
|
||||
}
|
||||
else
|
||||
{
|
||||
// real code-length = c−2
|
||||
c_len[i++] = (uint8_t)(c - 2);
|
||||
}
|
||||
}
|
||||
// fill rest with zero lengths
|
||||
while(i < NC) c_len[i++] = 0;
|
||||
|
||||
// build fast lookup for literal/length codes
|
||||
make_table(NC, c_len, 12, c_table);
|
||||
}
|
||||
}
|
||||
|
||||
/***** decode next literal/length symbol or end-of-block *****/
|
||||
uint32_t decode_c(void)
|
||||
{
|
||||
uint32_t j, mask;
|
||||
|
||||
// if starting a new block, read its header
|
||||
if(blocksize == 0)
|
||||
{
|
||||
blocksize = getbits(16); // block size = number of symbols
|
||||
if(blocksize == 0)
|
||||
{ // zero block → end of data
|
||||
decoded = 1;
|
||||
return 0;
|
||||
}
|
||||
// read three Huffman trees for this block:
|
||||
// 1) code-length codes for literal tree (NT,TBIT,3)
|
||||
read_pt_len(NT, TBIT, 3);
|
||||
// 2) literal/length tree lengths (CBIT)
|
||||
read_c_len();
|
||||
// 3) prefix-tree lengths for positions (NP,PBIT,-1)
|
||||
read_pt_len(NP, PBIT, -1);
|
||||
}
|
||||
|
||||
// consume one symbol from this block
|
||||
blocksize--;
|
||||
|
||||
// fast table lookup: top 12 bits
|
||||
j = c_table[bitbuf >> (BITBUFSIZ - 12)];
|
||||
if(j >= NC)
|
||||
{
|
||||
// need to walk tree if overflow
|
||||
mask = 1U << (BITBUFSIZ - 1 - 12);
|
||||
do {
|
||||
j = (bitbuf & mask) ? right[j] : left[j];
|
||||
mask >>= 1;
|
||||
} while(j >= NC);
|
||||
}
|
||||
|
||||
// remove j’s code length bits from bitbuf
|
||||
fillbuf(c_len[j]);
|
||||
return j;
|
||||
}
|
||||
|
||||
/***** decode match-position extra bits *****/
|
||||
uint32_t decode_p(void)
|
||||
{
|
||||
uint32_t j, mask;
|
||||
|
||||
// fast table lookup: top 8 bits
|
||||
j = pt_table[bitbuf >> (BITBUFSIZ - 8)];
|
||||
if(j >= NP)
|
||||
{
|
||||
// tree walk for long codes
|
||||
mask = 1U << (BITBUFSIZ - 1 - 8);
|
||||
do {
|
||||
j = (bitbuf & mask) ? right[j] : left[j];
|
||||
mask >>= 1;
|
||||
} while(j >= NP);
|
||||
}
|
||||
|
||||
// consume prefix bits
|
||||
fillbuf(pt_len[j]);
|
||||
|
||||
// if non-zero, read extra bits to form full position
|
||||
if(j != 0) j = (1U << (j - 1)) + getbits((int)(j - 1));
|
||||
|
||||
return j;
|
||||
}
|
||||
|
||||
/***** start a new Huffman decode session *****/
|
||||
void huf_decode_start(void)
|
||||
{
|
||||
init_getbits(); // reset bit buffer & subbitbuf state
|
||||
blocksize = 0; // force reading a fresh block header
|
||||
}
|
||||
129
zoo/io.c
Normal file
129
zoo/io.c
Normal file
@@ -0,0 +1,129 @@
|
||||
/*$Source: /usr/home/dhesi/zoo/RCS/io.c,v $*/
|
||||
/*$Id: io.c,v 1.14 91/07/09 01:39:54 dhesi Exp $*/
|
||||
/***********************************************************
|
||||
io.c -- input/output (modified for in-memory I/O)
|
||||
|
||||
Adapted from Haruhiko Okumura’s “ar” archiver.
|
||||
This version feeds compressed bytes from a memory buffer
|
||||
(via mem_getc()) and writes decompressed output to a buffer
|
||||
(via mem_putc()), eliminating FILE* dependencies.
|
||||
Modified for in-memory decompression by Natalia Portillo, 2025
|
||||
***********************************************************/
|
||||
|
||||
#include <limits.h> // Provides CHAR_BIT for bit-width operations
|
||||
|
||||
#include "ar.h" // Archive format constants (e.g., CODE_BIT, NC)
|
||||
#include "lh5.h" // Declarations for mem_getc(), mem_putc(), buffer state
|
||||
#include "lzh.h" // LZH algorithm constants (e.g., BITBUFSIZ, DICSIZ)
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// Global bit-I/O state
|
||||
//-----------------------------------------------------------------------------
|
||||
|
||||
uint16_t bitbuf; // Accumulates bits shifted in from the input stream
|
||||
int unpackable; // Unused in decompression here (was for encode error)
|
||||
// Byte counters (optional diagnostics; not used to gate decompression)
|
||||
size_t compsize; // Count of output bytes produced (for compression mode)
|
||||
size_t origsize; // Count of input bytes consumed (for CRC in file I/O)
|
||||
uint32_t subbitbuf; // Holds the last byte fetched; bits are consumed from here
|
||||
int bitcount; // How many valid bits remain in subbitbuf
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// fillbuf(n)
|
||||
// Shift the global bitbuf left by n bits, then read in n new bits
|
||||
// from the input buffer (in-memory) to replenish bitbuf.
|
||||
//-----------------------------------------------------------------------------
|
||||
void fillbuf(int n) /* Shift bitbuf n bits left, read n bits */
|
||||
{
|
||||
// Make room for n bits
|
||||
bitbuf <<= n;
|
||||
|
||||
// While we still need more bits than we have in subbitbuf...
|
||||
while(n > bitcount)
|
||||
{
|
||||
// Pull any remaining bits from subbitbuf into bitbuf
|
||||
bitbuf |= subbitbuf << (n -= bitcount);
|
||||
|
||||
// Fetch the next compressed byte from input memory
|
||||
{
|
||||
int c = mem_getc(); // read one byte or 0 at EOF
|
||||
subbitbuf = (c == EOF ? 0 : (uint8_t)c);
|
||||
}
|
||||
|
||||
// Reset bitcount: a full new byte is available
|
||||
bitcount = CHAR_BIT;
|
||||
}
|
||||
|
||||
// Finally, consume the last n bits from subbitbuf into bitbuf
|
||||
bitbuf |= subbitbuf >> (bitcount -= n);
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// getbits(n)
|
||||
// Return the next n bits from bitbuf (highest-order bits), then
|
||||
// call fillbuf(n) to replace them. Useful for reading variable-length codes.
|
||||
//-----------------------------------------------------------------------------
|
||||
uint32_t getbits(int n)
|
||||
{
|
||||
uint32_t x = bitbuf >> (BITBUFSIZ - n); // extract top n bits
|
||||
fillbuf(n); // replenish bitbuf for future reads
|
||||
return x;
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// putbits(n, x)
|
||||
// Write the lowest n bits of x into the output buffer, packing them
|
||||
// into bytes via subbitbuf/bitcount and sending full bytes out
|
||||
// with mem_putc(). Used by the encoder; kept here for completeness.
|
||||
//-----------------------------------------------------------------------------
|
||||
void putbits(int n, uint32_t x) /* Write rightmost n bits of x */
|
||||
{
|
||||
// If we have enough room in subbitbuf, just pack the bits
|
||||
if(n < bitcount) { subbitbuf |= x << (bitcount -= n); }
|
||||
else
|
||||
{
|
||||
// Output the first full byte when subbitbuf fills
|
||||
{
|
||||
int w = (int)(subbitbuf | (x >> (n -= bitcount)));
|
||||
mem_putc(w);
|
||||
compsize++; // increment output counter (for compression)
|
||||
}
|
||||
|
||||
// If remaining bits don't fill a full byte, stash them
|
||||
if(n < CHAR_BIT) { subbitbuf = x << (bitcount = CHAR_BIT - n); }
|
||||
else
|
||||
{
|
||||
// Otherwise, flush a second full byte
|
||||
{
|
||||
int w2 = (int)(x >> (n - CHAR_BIT));
|
||||
mem_putc(w2);
|
||||
compsize++;
|
||||
}
|
||||
// And stash any leftover bits beyond two bytes
|
||||
subbitbuf = x << (bitcount = 2 * CHAR_BIT - n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// init_getbits()
|
||||
// Reset the bit-reader state so that fillbuf() will load fresh bits
|
||||
// from the start of the input buffer.
|
||||
//-----------------------------------------------------------------------------
|
||||
void init_getbits()
|
||||
{
|
||||
bitbuf = 0; // clear accumulated bits
|
||||
subbitbuf = 0; // no pending byte
|
||||
bitcount = 0; // no bits available
|
||||
fillbuf(BITBUFSIZ); // pre-load the bit buffer fully
|
||||
}
|
||||
|
||||
//-----------------------------------------------------------------------------
|
||||
// init_putbits()
|
||||
// Reset the bit-writer state so subsequent putbits() calls start fresh.
|
||||
//-----------------------------------------------------------------------------
|
||||
void init_putbits()
|
||||
{
|
||||
bitcount = CHAR_BIT; // subbitbuf is empty but ready for CHAR_BIT bits
|
||||
subbitbuf = 0; // clear any leftover byte data
|
||||
}
|
||||
94
zoo/lh5.c
Normal file
94
zoo/lh5.c
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* lh5_mem.c
|
||||
*
|
||||
* In-memory I/O glue for LH5 decompression.
|
||||
* Implements mem_getc(), mem_putc(), and the top-level
|
||||
* lh5_decompress() entry point.
|
||||
*/
|
||||
|
||||
#include "lh5.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include "../library.h"
|
||||
|
||||
/* Forward-declaration of the decompression driver in lzh.c */
|
||||
extern int lzh_decode(void);
|
||||
|
||||
/* Buffer I/O state (see lh5_mem.h for externs) */
|
||||
const uint8_t *in_ptr;
|
||||
size_t in_left;
|
||||
uint8_t *out_ptr;
|
||||
size_t out_left;
|
||||
int mem_error;
|
||||
|
||||
/*
|
||||
* mem_getc(): return next compressed byte, or 0 when in_left==0.
|
||||
* Never sets mem_error on input underflow.
|
||||
*/
|
||||
int mem_getc(void)
|
||||
{
|
||||
if(in_left == 0) { return 0; /* mimic feof → subbitbuf = 0 */ }
|
||||
int c = *in_ptr++;
|
||||
in_left--;
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* mem_putc(): write one output byte, set mem_error on overflow.
|
||||
*/
|
||||
int mem_putc(int c)
|
||||
{
|
||||
if(out_left == 0)
|
||||
{
|
||||
mem_error = 1;
|
||||
return EOF;
|
||||
}
|
||||
*out_ptr++ = (uint8_t)c;
|
||||
out_left--;
|
||||
return c;
|
||||
}
|
||||
|
||||
/*
|
||||
* Top-level in-memory decompressor.
|
||||
*
|
||||
* in_buf points to 'in_len' bytes of compressed data.
|
||||
* out_buf must have at least *out_len bytes available.
|
||||
* On return *out_len is set to the number of bytes written.
|
||||
*
|
||||
* Returns 0 on success
|
||||
* -1 on error (bad stream or output too small)
|
||||
*/
|
||||
AARU_EXPORT int AARU_CALL lh5_decompress(const uint8_t *in_buf, size_t in_len, uint8_t *out_buf, size_t *out_len)
|
||||
{
|
||||
/* Initialize buffer pointers and error flag */
|
||||
in_ptr = in_buf;
|
||||
in_left = in_len;
|
||||
out_ptr = out_buf;
|
||||
out_left = *out_len;
|
||||
mem_error = 0;
|
||||
|
||||
/* Invoke the core LH5 decode routine (now buffer-based) */
|
||||
if(lzh_decode() != 0 || mem_error) { return -1; }
|
||||
|
||||
/* Compute actual output size */
|
||||
*out_len = (size_t)(out_ptr - out_buf);
|
||||
return 0;
|
||||
}
|
||||
54
zoo/lh5.h
Normal file
54
zoo/lh5.h
Normal file
@@ -0,0 +1,54 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU Lesser General Public License as
|
||||
* published by the Free Software Foundation; either version 2.1 of the
|
||||
* License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful, but
|
||||
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
/* lh5_mem.h
|
||||
*
|
||||
* In-memory I/O glue for LH5 decompression.
|
||||
* Defines the mem_getc()/mem_putc() buffer readers/writers
|
||||
* and declares the lh5_decompress() entry point.
|
||||
*/
|
||||
#ifndef AARU_COMPRESSION_NATIVE_LH5_H
|
||||
#define AARU_COMPRESSION_NATIVE_LH5_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
/* --------------------------------------------------------------------------
|
||||
* State for in-memory I/O
|
||||
* --------------------------------------------------------------------------
|
||||
* in_ptr/in_left: where to read next compressed byte
|
||||
* out_ptr/out_left: where to write next decompressed byte
|
||||
* mem_error: set to 1 on underflow/overflow
|
||||
*/
|
||||
extern const uint8_t *in_ptr;
|
||||
extern size_t in_left;
|
||||
extern uint8_t *out_ptr;
|
||||
extern size_t out_left;
|
||||
extern int mem_error;
|
||||
|
||||
/* --------------------------------------------------------------------------
|
||||
* Fetch one byte from in_buf; returns EOF on underflow
|
||||
* -------------------------------------------------------------------------- */
|
||||
int mem_getc(void);
|
||||
|
||||
/* --------------------------------------------------------------------------
|
||||
* Write one byte into out_buf; returns c or EOF on overflow
|
||||
* -------------------------------------------------------------------------- */
|
||||
int mem_putc(int c);
|
||||
|
||||
#endif // AARU_COMPRESSION_NATIVE_LH5_H
|
||||
63
zoo/lzd.c
63
zoo/lzd.c
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* This file is part of the Aaru Data Preservation Suite.
|
||||
* Copyright (c) 2019-2025 Natalia Portillo.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or modify
|
||||
@@ -19,73 +19,83 @@
|
||||
// Lempel-Ziv-Davis compression implementation based on the public domain code from
|
||||
// Rahul Dhesi from zoo
|
||||
|
||||
#include "lzd.h"
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "lzd.h"
|
||||
#include "../library.h"
|
||||
|
||||
// Reset the dictionary to its initial state
|
||||
static void init_dict(LZDContext *ctx)
|
||||
{
|
||||
ctx->nbits = 9;
|
||||
ctx->max_code = 1u << 9;
|
||||
ctx->free_code = FIRST_FREE;
|
||||
ctx->have_old = 0;
|
||||
ctx->nbits = 9; // start with 9‑bit codes
|
||||
ctx->max_code = 1u << 9; // maximum code value for current nbits
|
||||
ctx->free_code = FIRST_FREE; // next free dictionary slot
|
||||
ctx->have_old = 0; // no "previous code" yet
|
||||
}
|
||||
|
||||
// Follow the head[] chain until you get the first literal byte of a code
|
||||
static int firstch(LZDContext *ctx, int code)
|
||||
{
|
||||
int steps = 0;
|
||||
while(code > 255)
|
||||
{
|
||||
if((unsigned)code > MAXMAX) return -1;
|
||||
if(++steps > (int)MAXMAX) return -1;
|
||||
{ // follow links until you hit a literal (0‑255)
|
||||
if((unsigned)code > MAXMAX) return -1; // invalid code range
|
||||
if(++steps > (int)MAXMAX) return -1; // prevent infinite loop
|
||||
code = ctx->head[code];
|
||||
}
|
||||
return code;
|
||||
}
|
||||
|
||||
// Ensure there are at least nbits available in the bit buffer
|
||||
static int fill_bits(LZDContext *ctx)
|
||||
{
|
||||
while(ctx->bitcount < (int)ctx->nbits)
|
||||
{
|
||||
if(ctx->in_pos >= ctx->in_len) return -1;
|
||||
if(ctx->in_pos >= ctx->in_len) return -1; // no more input available
|
||||
// pull a byte from the input stream into bitbuf at current position
|
||||
ctx->bitbuf |= (uint64_t)ctx->in_ptr[ctx->in_pos++] << ctx->bitcount;
|
||||
ctx->bitcount += 8;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Read the next code of nbits from the bit buffer
|
||||
static int read_code(LZDContext *ctx)
|
||||
{
|
||||
if(fill_bits(ctx) < 0) return -1;
|
||||
int code = (int)(ctx->bitbuf & masks[ctx->nbits]);
|
||||
ctx->bitbuf >>= ctx->nbits;
|
||||
if(fill_bits(ctx) < 0) return -1; // top up bits if needed
|
||||
int code = (int)(ctx->bitbuf & masks[ctx->nbits]); // mask off the low nbits
|
||||
ctx->bitbuf >>= ctx->nbits; // consume bits
|
||||
ctx->bitcount -= ctx->nbits;
|
||||
return code;
|
||||
}
|
||||
|
||||
// Initialise a decompression context: allocate tables, set up initial dictionary
|
||||
LZDStatus LZD_Init(LZDContext *ctx)
|
||||
{
|
||||
memset(ctx, 0, sizeof *ctx);
|
||||
// allocate head, tail and stack arrays to hold dictionary links and output stack
|
||||
ctx->head = malloc((MAXMAX + 1) * sizeof *ctx->head);
|
||||
ctx->tail = malloc((MAXMAX + 1) * sizeof *ctx->tail);
|
||||
ctx->stack = malloc((MAXMAX + 1) * sizeof *ctx->stack);
|
||||
if(!ctx->head || !ctx->tail || !ctx->stack) return LZD_NEED_INPUT;
|
||||
|
||||
// initialise first 256 dictionary entries to literal bytes
|
||||
for(int i = 0; i < 256; i++)
|
||||
{
|
||||
ctx->head[i] = -1;
|
||||
ctx->tail[i] = (uint8_t)i;
|
||||
}
|
||||
// set stack pointers to empty
|
||||
ctx->stack_lim = ctx->stack + (MAXMAX + 1);
|
||||
ctx->stack_ptr = ctx->stack_lim;
|
||||
init_dict(ctx);
|
||||
init_dict(ctx); // reset code size/free_code
|
||||
ctx->bitbuf = 0;
|
||||
ctx->bitcount = 0;
|
||||
return LZD_OK;
|
||||
}
|
||||
|
||||
// Point the context at a new input buffer
|
||||
LZDStatus LZD_Feed(LZDContext *ctx, const unsigned char *in, size_t in_len)
|
||||
{
|
||||
ctx->in_ptr = in;
|
||||
@@ -94,26 +104,31 @@ LZDStatus LZD_Feed(LZDContext *ctx, const unsigned char *in, size_t in_len)
|
||||
return LZD_OK;
|
||||
}
|
||||
|
||||
// Pull decompressed bytes into `out` up to out_len or until input is exhausted
|
||||
LZDStatus LZD_Drain(LZDContext *ctx, unsigned char *out, size_t out_len, size_t *out_produced)
|
||||
{
|
||||
size_t outpos = 0;
|
||||
|
||||
while(outpos < out_len)
|
||||
{
|
||||
// If there are bytes on the stack (from expanding a code), emit them first
|
||||
if(ctx->stack_ptr < ctx->stack_lim)
|
||||
{
|
||||
out[outpos++] = (uint8_t)*ctx->stack_ptr++;
|
||||
continue;
|
||||
}
|
||||
|
||||
// Otherwise, read the next code from the bitstream
|
||||
int raw = read_code(ctx);
|
||||
if(raw < 0)
|
||||
{
|
||||
*out_produced = outpos;
|
||||
// If we emitted something, signal OK; otherwise tell caller we need more input
|
||||
return outpos > 0 ? LZD_OK : LZD_NEED_INPUT;
|
||||
}
|
||||
unsigned code = (unsigned)raw;
|
||||
|
||||
// Special code: CLEAR – reset the dictionary and read a fresh literal
|
||||
if(code == CLEAR)
|
||||
{
|
||||
init_dict(ctx);
|
||||
@@ -128,6 +143,7 @@ LZDStatus LZD_Drain(LZDContext *ctx, unsigned char *out, size_t out_len, size_t
|
||||
out[outpos++] = (uint8_t)lit;
|
||||
continue;
|
||||
}
|
||||
// Special code: Z_EOF – end of compressed stream
|
||||
if(code == Z_EOF)
|
||||
{
|
||||
*out_produced = outpos;
|
||||
@@ -135,15 +151,17 @@ LZDStatus LZD_Drain(LZDContext *ctx, unsigned char *out, size_t out_len, size_t
|
||||
}
|
||||
|
||||
unsigned in_code = code;
|
||||
// Handle KwKwK case: code not yet in the dictionary
|
||||
if(code >= ctx->free_code)
|
||||
{
|
||||
if(!ctx->have_old) return LZD_DONE;
|
||||
int fc = firstch(ctx, ctx->old_code);
|
||||
int fc = firstch(ctx, ctx->old_code); // get first character of previous code
|
||||
if(fc < 0) return LZD_DONE;
|
||||
*--ctx->stack_ptr = (char)fc;
|
||||
code = ctx->old_code;
|
||||
}
|
||||
|
||||
// Walk backwards through dictionary, pushing bytes onto the stack
|
||||
while(code > 255)
|
||||
{
|
||||
*--ctx->stack_ptr = (char)ctx->tail[code];
|
||||
@@ -152,11 +170,13 @@ LZDStatus LZD_Drain(LZDContext *ctx, unsigned char *out, size_t out_len, size_t
|
||||
uint8_t first_byte = (uint8_t)code;
|
||||
*--ctx->stack_ptr = (char)first_byte;
|
||||
|
||||
// Add new sequence to dictionary if we have a valid previous code
|
||||
if(ctx->have_old && ctx->free_code <= MAXMAX)
|
||||
{
|
||||
ctx->tail[ctx->free_code] = first_byte;
|
||||
ctx->head[ctx->free_code] = (int)ctx->old_code;
|
||||
ctx->free_code++;
|
||||
// Increase code width when table fills up to current max_code
|
||||
if(ctx->free_code >= ctx->max_code && ctx->nbits < MAXBITS)
|
||||
{
|
||||
ctx->nbits++;
|
||||
@@ -171,6 +191,7 @@ LZDStatus LZD_Drain(LZDContext *ctx, unsigned char *out, size_t out_len, size_t
|
||||
return LZD_OK;
|
||||
}
|
||||
|
||||
// Free dynamic allocations inside an LZDContext
|
||||
void LZD_Destroy(LZDContext *ctx)
|
||||
{
|
||||
if(!ctx) return;
|
||||
@@ -179,12 +200,14 @@ void LZD_Destroy(LZDContext *ctx)
|
||||
free(ctx->stack);
|
||||
}
|
||||
|
||||
// Public API: allocate+initialise a new context
|
||||
AARU_EXPORT void AARU_CALL *CreateLZDContext(void)
|
||||
{
|
||||
LZDContext *c = malloc(sizeof *c);
|
||||
return c && LZD_Init(c) == LZD_OK ? c : (free(c), NULL);
|
||||
}
|
||||
|
||||
// Public API: destroy and free a context
|
||||
AARU_EXPORT void AARU_CALL DestroyLZDContext(void *ctx)
|
||||
{
|
||||
if(ctx)
|
||||
@@ -194,12 +217,14 @@ AARU_EXPORT void AARU_CALL DestroyLZDContext(void *ctx)
|
||||
}
|
||||
}
|
||||
|
||||
// Public API wrapper to feed new compressed data
|
||||
AARU_EXPORT int AARU_CALL LZD_FeedNative(void *ctx, const unsigned char *data, size_t length)
|
||||
{
|
||||
return (int)LZD_Feed(ctx, data, length);
|
||||
return LZD_Feed(ctx, data, length);
|
||||
}
|
||||
|
||||
// Public API wrapper to drain decompressed data
|
||||
AARU_EXPORT int AARU_CALL LZD_DrainNative(void *ctx, unsigned char *outBuf, size_t outBufLen, size_t *produced)
|
||||
{
|
||||
return (int)LZD_Drain(ctx, outBuf, outBufLen, produced);
|
||||
}
|
||||
return LZD_Drain(ctx, outBuf, outBufLen, produced);
|
||||
}
|
||||
31
zoo/lzh.c
Normal file
31
zoo/lzh.c
Normal file
@@ -0,0 +1,31 @@
|
||||
/* $Id: lzh.c,v 1.15 91/07/06 19:18:51 dhesi Exp $ */
|
||||
// Modified for in-memory decompression by Natalia Portillo, 2025
|
||||
|
||||
#include "lzh.h" /* prototypes for encode(), lzh_decode() */
|
||||
#include <stdint.h>
|
||||
#include "ar.h"
|
||||
#include "lh5.h" /* mem_getc(), mem_putc(), in_ptr/in_left, out_ptr/out_left */
|
||||
|
||||
extern int decoded; /* from huf.c */
|
||||
|
||||
/*
|
||||
* lzh_decode now reads from in_buf/in_len and writes into out_buf/out_len
|
||||
* entirely in memory, via mem_getc()/mem_putc().
|
||||
*/
|
||||
int lzh_decode(void)
|
||||
{
|
||||
int n, i;
|
||||
uint8_t buffer[DICSIZ];
|
||||
|
||||
/* Initialize the Huffman bit reader and sliding-window state */
|
||||
decode_start();
|
||||
|
||||
/* Decode blocks of up to DICSIZ bytes until end‐of‐stream */
|
||||
while(!decoded)
|
||||
{
|
||||
n = decode(DICSIZ, buffer);
|
||||
for(i = 0; i < n; i++) { mem_putc(buffer[i]); }
|
||||
}
|
||||
|
||||
return mem_error ? -1 : 0;
|
||||
}
|
||||
31
zoo/lzh.h
Normal file
31
zoo/lzh.h
Normal file
@@ -0,0 +1,31 @@
|
||||
/*$Source: /usr/home/dhesi/zoo/RCS/lzh.h,v $*/
|
||||
/*$Id: lzh.h,v 1.3 91/07/09 01:39:23 dhesi Exp $*/
|
||||
|
||||
/*
|
||||
Adapted from "ar" archiver written by Haruhiko Okumura.
|
||||
*/
|
||||
|
||||
// Modified for in-memory decompression by Natalia Portillo, 2025
|
||||
|
||||
/* io.c */
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
extern uint16_t bitbuf;
|
||||
#define BITBUFSIZ (CHAR_BIT * sizeof bitbuf)
|
||||
|
||||
/* encode.c and decode.c */
|
||||
|
||||
#define MATCHBIT 8 /* bits for MAXMATCH - THRESHOLD */
|
||||
#define MAXMATCH 256 /* formerly F (not more than UCHAR_MAX + 1) */
|
||||
#define THRESHOLD 3 /* choose optimal value */
|
||||
#define PERC_FLAG ((unsigned)0x8000)
|
||||
|
||||
/* huf.c */
|
||||
|
||||
#define NC (UCHAR_MAX + MAXMATCH + 2 - THRESHOLD)
|
||||
/* alphabet = {0, 1, 2, ..., NC - 1} */
|
||||
#define CBIT 9 /* $\lfloor \log_2 NC \rfloor + 1$ */
|
||||
#define CODE_BIT 16 /* codeword length */
|
||||
|
||||
extern uint16_t left[], right[];
|
||||
130
zoo/maketbl.c
Normal file
130
zoo/maketbl.c
Normal file
@@ -0,0 +1,130 @@
|
||||
/*$Source: /usr/home/dhesi/zoo/RCS/maketbl.c,v $*/
|
||||
/*$Id: maketbl.c,v 1.8 91/07/09 01:39:52 dhesi Exp $*/
|
||||
/***********************************************************
|
||||
maketbl.c -- make table for decoding
|
||||
|
||||
Builds a fast lookup table + fallback tree for Huffman
|
||||
codes given code lengths. Used by decode_c() to map
|
||||
input bit patterns to symbols efficiently.
|
||||
|
||||
Adapted from Haruhiko Okumura’s “ar” archiver.
|
||||
Modified for in-memory decompression by Natalia Portillo, 2025
|
||||
***********************************************************/
|
||||
|
||||
#include <stdio.h>
|
||||
#include "ar.h" // provides NC, CODE_BIT, etc.
|
||||
#include "lzh.h" // provides BITBUFSIZ
|
||||
|
||||
/*
|
||||
* make_table(nchar, bitlen, tablebits, table):
|
||||
*
|
||||
* nchar = number of symbols
|
||||
* bitlen[] = array of code lengths for each symbol [0..nchar-1]
|
||||
* tablebits = number of bits for fast direct lookup
|
||||
* table[] = output table of size (1<<tablebits), entries are:
|
||||
* - symbol index if code length ≤ tablebits
|
||||
* - zero or tree node index to follow for longer codes
|
||||
*
|
||||
* Algorithm steps:
|
||||
* 1) Count how many codes of each length (count[1..16]).
|
||||
* 2) Compute 'start' offsets for each length in a 16-bit code space.
|
||||
* 3) Normalize starts to 'tablebits' prefix domain, build 'weight'.
|
||||
* 4) Fill direct-mapped entries for short codes.
|
||||
* 5) Build binary tree (using left[]/right[]) for codes longer than tablebits.
|
||||
*/
|
||||
void make_table(int nchar, uint8_t *bitlen, int tablebits, uint16_t *table)
|
||||
{
|
||||
uint16_t count[17]; // count[L] = number of symbols with length L
|
||||
uint16_t weight[17]; // weight[L] = step size in prefix domain for length L
|
||||
uint16_t start[18]; // start[L] = base code for length L in 16-bit space
|
||||
uint16_t *p; // pointer into 'table' or tree
|
||||
uint32_t i, k, len, ch;
|
||||
uint32_t jutbits; // bits to drop when mapping into tablebits
|
||||
uint32_t avail; // next free node index for left[]/right[] tree
|
||||
uint32_t nextcode; // end-of-range code for current length
|
||||
uint32_t mask; // bitmask for tree insertion
|
||||
|
||||
// 1) Zero counts, then tally code-lengths
|
||||
for(i = 1; i <= 16; i++) count[i] = 0;
|
||||
for(i = 0; i < (uint32_t)nchar; i++) count[bitlen[i]]++;
|
||||
|
||||
// 2) Compute cumulative start positions in the 16-bit code space
|
||||
start[1] = 0;
|
||||
for(i = 1; i <= 16; i++) start[i + 1] = start[i] + (count[i] << (16 - i));
|
||||
|
||||
// Validate: sum of all codes must fill 16-bit range
|
||||
if(start[17] != (uint16_t)(1U << 16)) fprintf(stderr, "make_table: Bad decode table\n");
|
||||
|
||||
// Prepare for mapping into tablebits-bit table
|
||||
jutbits = 16 - tablebits;
|
||||
for(i = 1; i <= (uint32_t)tablebits; i++)
|
||||
{
|
||||
// Shrink start[i] into prefix domain
|
||||
start[i] >>= jutbits;
|
||||
// Weight = 2^(tablebits - i)
|
||||
weight[i] = (uint16_t)(1U << (tablebits - i));
|
||||
}
|
||||
// For lengths > tablebits, weight = 2^(16 - length)
|
||||
for(; i <= 16; i++) weight[i] = (uint16_t)(1U << (16 - i));
|
||||
|
||||
// 3) Clear any unused table slots between last short code and end
|
||||
i = start[tablebits + 1] >> jutbits;
|
||||
if(i != (uint16_t)(1U << tablebits))
|
||||
{
|
||||
k = 1U << tablebits;
|
||||
while(i < k) table[i++] = 0;
|
||||
}
|
||||
|
||||
// Initialize tree node index after the direct table entries
|
||||
avail = nchar;
|
||||
// Mask for inspecting bits when building tree
|
||||
mask = 1U << (15 - tablebits);
|
||||
|
||||
// 4) For each symbol, place its codes in table or tree
|
||||
for(ch = 0; ch < (uint32_t)nchar; ch++)
|
||||
{
|
||||
len = bitlen[ch];
|
||||
if(len == 0) continue; // skip symbols with no code
|
||||
|
||||
// Next code range = [start[len], start[len]+weight[len])
|
||||
nextcode = start[len] + weight[len];
|
||||
|
||||
if(len <= tablebits)
|
||||
{
|
||||
// Direct mapping: fill all table slots in this range
|
||||
for(k = start[len]; k < nextcode; k++) table[k] = (uint16_t)ch;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Build or extend tree for longer codes
|
||||
// Start at table index for this prefix
|
||||
k = start[len];
|
||||
p = &table[k >> jutbits];
|
||||
// Number of extra bits beyond tablebits
|
||||
uint32_t extra = len - tablebits;
|
||||
|
||||
// Walk/construct tree nodes bit by bit
|
||||
while(extra-- > 0)
|
||||
{
|
||||
if(*p == 0)
|
||||
{
|
||||
// allocate a new node for left[]/right[]
|
||||
left[avail] = right[avail] = 0;
|
||||
*p = (uint16_t)avail++;
|
||||
}
|
||||
// branch left or right based on current code bit
|
||||
if(k & mask)
|
||||
p = &right[*p];
|
||||
else
|
||||
p = &left[*p];
|
||||
|
||||
// shift to next bit in code
|
||||
k <<= 1;
|
||||
}
|
||||
// At leaf: assign symbol
|
||||
*p = (uint16_t)ch;
|
||||
}
|
||||
// Advance start[len] for next code of same length
|
||||
start[len] = nextcode;
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user