mirror of
https://github.com/aaru-dps/Aaru.Compression.Native.git
synced 2025-12-16 11:14:30 +00:00
272 lines
7.7 KiB
C
272 lines
7.7 KiB
C
/*
|
|
* This file is part of the Aaru Data Preservation Suite.
|
|
* Copyright (c) 2019-2025 Natalia Portillo.
|
|
* Copyright © 2018-2019 David Ryskalczyk
|
|
*
|
|
* This library is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as
|
|
* published by the Free Software Foundation; either version 2.1 of the
|
|
* License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include "../library.h"
|
|
|
|
#define CRBITS 12 // Max bits for crunching.
|
|
#define SQBITS 13 // Max bits for squashing.
|
|
#define INIT_BITS 9 // Initial number of bits per code.
|
|
#define MAXCODE(n) ((1 << (n)) - 1) // Macro to calculate max code for n bits.
|
|
#define FIRST 257 // First available code.
|
|
#define CLEAR 256 // Code to clear the dictionary.
|
|
|
|
// LZW decompression state variables.
|
|
static int Bits;
|
|
static int max_maxcode;
|
|
static int n_bits;
|
|
static int maxcode;
|
|
static int clear_flg;
|
|
static int free_ent;
|
|
static unsigned short *prefix;
|
|
static unsigned char *suffix;
|
|
static unsigned char *stack;
|
|
|
|
// Buffer management variables.
|
|
static const unsigned char *in_buf_ptr;
|
|
static size_t in_len_rem;
|
|
static int offset;
|
|
static char buf[SQBITS];
|
|
|
|
// Reads a variable-length code from the input buffer.
|
|
static int getcode()
|
|
{
|
|
int code;
|
|
static int size = 0;
|
|
int r_off, bits;
|
|
unsigned char *bp = (unsigned char *)buf;
|
|
|
|
// Check if we need to increase code size or handle a clear flag.
|
|
if(clear_flg > 0 || offset >= size || free_ent > maxcode)
|
|
{
|
|
if(free_ent > maxcode)
|
|
{
|
|
n_bits++;
|
|
if(n_bits == Bits)
|
|
maxcode = max_maxcode;
|
|
else
|
|
maxcode = MAXCODE(n_bits);
|
|
}
|
|
if(clear_flg > 0)
|
|
{
|
|
maxcode = MAXCODE(n_bits = INIT_BITS);
|
|
clear_flg = 0;
|
|
}
|
|
// Read n_bits bytes into the buffer.
|
|
for(size = 0; size < n_bits; size++)
|
|
{
|
|
if(in_len_rem == 0)
|
|
{
|
|
code = -1;
|
|
break;
|
|
}
|
|
code = *in_buf_ptr++;
|
|
in_len_rem--;
|
|
buf[size] = (char)code;
|
|
}
|
|
if(size <= 0) return -1; // End of file.
|
|
|
|
offset = 0;
|
|
size = (size << 3) - (n_bits - 1);
|
|
}
|
|
r_off = offset;
|
|
bits = n_bits;
|
|
|
|
// Extract the code from the buffer.
|
|
bp += (r_off >> 3);
|
|
r_off &= 7;
|
|
|
|
code = (*bp++ >> r_off);
|
|
bits -= 8 - r_off;
|
|
r_off = 8 - r_off;
|
|
|
|
if(bits >= 8)
|
|
{
|
|
code |= *bp++ << r_off;
|
|
r_off += 8;
|
|
bits -= 8;
|
|
}
|
|
code |= (*bp & ((1 << bits) - 1)) << r_off;
|
|
offset += n_bits;
|
|
|
|
return code;
|
|
}
|
|
|
|
// Main LZW decompression logic.
|
|
static int arc_decompress_lzw(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len,
|
|
int squash)
|
|
{
|
|
// Basic validation of pointers.
|
|
if(!in_buf || !out_buf || !out_len) { return -1; }
|
|
|
|
// Initialize buffer pointers and lengths.
|
|
in_buf_ptr = in_buf;
|
|
in_len_rem = in_len;
|
|
|
|
// Set parameters based on whether we're unsquashing or uncrushing.
|
|
if(squash) { Bits = SQBITS; }
|
|
else
|
|
{
|
|
Bits = CRBITS;
|
|
if(in_len_rem > 0)
|
|
{
|
|
// Crunch format has a header byte indicating max bits.
|
|
if(*in_buf_ptr != CRBITS) return -1;
|
|
in_buf_ptr++;
|
|
in_len_rem--;
|
|
}
|
|
}
|
|
|
|
if(in_len_rem <= 0)
|
|
{
|
|
*out_len = 0;
|
|
return 0;
|
|
}
|
|
|
|
// Initialize LZW parameters.
|
|
max_maxcode = 1 << Bits;
|
|
clear_flg = 0;
|
|
n_bits = INIT_BITS;
|
|
maxcode = MAXCODE(n_bits);
|
|
|
|
// Allocate memory for LZW tables.
|
|
prefix = (unsigned short *)malloc(max_maxcode * sizeof(unsigned short));
|
|
suffix = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
|
|
stack = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
|
|
|
|
if(!prefix || !suffix || !stack)
|
|
{
|
|
if(prefix) free(prefix);
|
|
if(suffix) free(suffix);
|
|
if(stack) free(stack);
|
|
return -1;
|
|
}
|
|
|
|
// Initialize the first 256 entries of the dictionary.
|
|
memset(prefix, 0, 256 * sizeof(unsigned short));
|
|
for(int code = 255; code >= 0; code--) { suffix[code] = (unsigned char)code; }
|
|
|
|
free_ent = FIRST;
|
|
offset = 0;
|
|
|
|
// Main decompression loop.
|
|
int finchar, oldcode, incode;
|
|
finchar = oldcode = getcode();
|
|
if(oldcode == -1)
|
|
{
|
|
*out_len = 0;
|
|
free(prefix);
|
|
free(suffix);
|
|
free(stack);
|
|
return 0;
|
|
}
|
|
|
|
size_t out_pos = 0;
|
|
if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
|
|
|
|
unsigned char *stackp = stack;
|
|
int code;
|
|
while((code = getcode()) > -1)
|
|
{
|
|
if(code == CLEAR)
|
|
{
|
|
// Clear the dictionary.
|
|
memset(prefix, 0, 256 * sizeof(unsigned short));
|
|
clear_flg = 1;
|
|
free_ent = FIRST - 1;
|
|
if((code = getcode()) == -1) break;
|
|
}
|
|
incode = code;
|
|
// Handle KwKwK case.
|
|
if(code >= free_ent)
|
|
{
|
|
if(code > free_ent)
|
|
{
|
|
// Error: invalid code.
|
|
break;
|
|
}
|
|
*stackp++ = finchar;
|
|
code = oldcode;
|
|
}
|
|
// Decode the string by traversing the dictionary.
|
|
while(code >= 256)
|
|
{
|
|
*stackp++ = suffix[code];
|
|
code = prefix[code];
|
|
}
|
|
*stackp++ = finchar = suffix[code];
|
|
|
|
// Write the decoded string to the output buffer.
|
|
do {
|
|
if(out_pos < *out_len) { out_buf[out_pos++] = *--stackp; }
|
|
else
|
|
{
|
|
stackp--; // Discard if output buffer is full.
|
|
}
|
|
} while(stackp > stack);
|
|
|
|
// Add the new string to the dictionary.
|
|
if((code = free_ent) < max_maxcode)
|
|
{
|
|
prefix[code] = (unsigned short)oldcode;
|
|
suffix[code] = finchar;
|
|
free_ent = code + 1;
|
|
}
|
|
oldcode = incode;
|
|
}
|
|
|
|
// Clean up and return.
|
|
*out_len = out_pos;
|
|
free(prefix);
|
|
free(suffix);
|
|
free(stack);
|
|
return 0;
|
|
}
|
|
|
|
// Decompresses squashed data.
|
|
AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
|
size_t *out_len)
|
|
{
|
|
return arc_decompress_lzw(in_buf, in_len, out_buf, out_len, 1);
|
|
}
|
|
|
|
// Decompresses crunched data.
|
|
AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
|
|
unsigned char *out_buf, size_t *out_len)
|
|
{
|
|
// Allocate a temporary buffer.
|
|
size_t temp_len = *out_len * 2; // Heuristic.
|
|
unsigned char *temp_buf = malloc(temp_len);
|
|
if(!temp_buf) return -1;
|
|
|
|
// Decompress crunched data.
|
|
int result = arc_decompress_lzw(in_buf, in_len, temp_buf, &temp_len, 0);
|
|
if(result == 0)
|
|
{
|
|
// Decompress non-repeat packing.
|
|
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
|
}
|
|
|
|
free(temp_buf);
|
|
return result;
|
|
}
|