mirror of
https://github.com/aaru-dps/Aaru.Compression.Native.git
synced 2025-12-16 19:24:31 +00:00
Add PAK methods 10 (crush) and 11 (distill).
This commit is contained in:
166
pak/bitstream.c
Normal file
166
pak/bitstream.c
Normal file
@@ -0,0 +1,166 @@
|
||||
/*
|
||||
* bitstream.c - Bit stream input implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "bitstream.h"
|
||||
|
||||
void bitstream_init(BitStream *bs, const uint8_t *data, size_t length)
|
||||
{
|
||||
bs->data = data;
|
||||
bs->length = length;
|
||||
bs->pos = 0;
|
||||
bs->bitbuffer = 0;
|
||||
bs->bitcount = 0;
|
||||
bs->eof = false;
|
||||
}
|
||||
|
||||
static void bitstream_fill_buffer(BitStream *bs)
|
||||
{
|
||||
while(bs->bitcount < 24 && bs->pos < bs->length)
|
||||
{
|
||||
bs->bitbuffer |= (uint32_t)bs->data[bs->pos] << (24 - bs->bitcount);
|
||||
bs->bitcount += 8;
|
||||
bs->pos++;
|
||||
}
|
||||
if(bs->pos >= bs->length && bs->bitcount == 0) { bs->eof = true; }
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bit(BitStream *bs)
|
||||
{
|
||||
if(bs->eof) return 0;
|
||||
|
||||
if(bs->bitcount == 0) { bitstream_fill_buffer(bs); }
|
||||
|
||||
if(bs->bitcount == 0)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint32_t bit = (bs->bitbuffer >> 31) & 1;
|
||||
bs->bitbuffer <<= 1;
|
||||
bs->bitcount--;
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bit_le(BitStream *bs)
|
||||
{
|
||||
if(bs->eof) return 0;
|
||||
|
||||
if(bs->bitcount == 0)
|
||||
{
|
||||
if(bs->pos >= bs->length)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
bs->bitbuffer = bs->data[bs->pos++];
|
||||
bs->bitcount = 8;
|
||||
}
|
||||
|
||||
uint32_t bit = bs->bitbuffer & 1;
|
||||
bs->bitbuffer >>= 1;
|
||||
bs->bitcount--;
|
||||
|
||||
return bit;
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bits(BitStream *bs, int count)
|
||||
{
|
||||
uint32_t result = 0;
|
||||
for(int i = 0; i < count; i++) { result = (result << 1) | bitstream_read_bit(bs); }
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t bitstream_read_bits_le(BitStream *bs, int count)
|
||||
{
|
||||
uint32_t result = 0;
|
||||
for(int i = 0; i < count; i++) { result |= bitstream_read_bit_le(bs) << i; }
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t bitstream_peek_bits(BitStream *bs, int count)
|
||||
{
|
||||
// Save current state
|
||||
uint32_t saved_buffer = bs->bitbuffer;
|
||||
int saved_bitcount = bs->bitcount;
|
||||
size_t saved_pos = bs->pos;
|
||||
bool saved_eof = bs->eof;
|
||||
|
||||
// Read the bits
|
||||
uint32_t result = bitstream_read_bits(bs, count);
|
||||
|
||||
// Restore state
|
||||
bs->bitbuffer = saved_buffer;
|
||||
bs->bitcount = saved_bitcount;
|
||||
bs->pos = saved_pos;
|
||||
bs->eof = saved_eof;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
uint32_t bitstream_peek_bits_le(BitStream *bs, int count)
|
||||
{
|
||||
// Save current state
|
||||
uint32_t saved_buffer = bs->bitbuffer;
|
||||
int saved_bitcount = bs->bitcount;
|
||||
size_t saved_pos = bs->pos;
|
||||
bool saved_eof = bs->eof;
|
||||
|
||||
// Read the bits
|
||||
uint32_t result = bitstream_read_bits_le(bs, count);
|
||||
|
||||
// Restore state
|
||||
bs->bitbuffer = saved_buffer;
|
||||
bs->bitcount = saved_bitcount;
|
||||
bs->pos = saved_pos;
|
||||
bs->eof = saved_eof;
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
void bitstream_skip_bits(BitStream *bs, int count) { bitstream_read_bits(bs, count); }
|
||||
|
||||
void bitstream_skip_bits_le(BitStream *bs, int count) { bitstream_read_bits_le(bs, count); }
|
||||
|
||||
uint8_t bitstream_read_byte(BitStream *bs)
|
||||
{
|
||||
if(bs->pos >= bs->length)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
return bs->data[bs->pos++];
|
||||
}
|
||||
|
||||
uint16_t bitstream_read_uint16_le(BitStream *bs)
|
||||
{
|
||||
if(bs->pos + 1 >= bs->length)
|
||||
{
|
||||
bs->eof = true;
|
||||
return 0;
|
||||
}
|
||||
uint16_t result = bs->data[bs->pos] | (bs->data[bs->pos + 1] << 8);
|
||||
bs->pos += 2;
|
||||
return result;
|
||||
}
|
||||
|
||||
bool bitstream_eof(BitStream *bs) { return bs->eof; }
|
||||
75
pak/bitstream.h
Normal file
75
pak/bitstream.h
Normal file
@@ -0,0 +1,75 @@
|
||||
/*
|
||||
* bitstream.h - Bit stream input implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef BITSTREAM_H
|
||||
#define BITSTREAM_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
typedef struct BitStream
|
||||
{
|
||||
const uint8_t *data;
|
||||
size_t length;
|
||||
size_t pos;
|
||||
uint32_t bitbuffer;
|
||||
int bitcount;
|
||||
bool eof;
|
||||
} BitStream;
|
||||
|
||||
// Initialize bit stream
|
||||
void bitstream_init(BitStream *bs, const uint8_t *data, size_t length);
|
||||
|
||||
// Read a single bit (MSB first)
|
||||
uint32_t bitstream_read_bit(BitStream *bs);
|
||||
|
||||
// Read a single bit (LSB first)
|
||||
uint32_t bitstream_read_bit_le(BitStream *bs);
|
||||
|
||||
// Read multiple bits (MSB first)
|
||||
uint32_t bitstream_read_bits(BitStream *bs, int count);
|
||||
|
||||
// Read multiple bits (LSB first)
|
||||
uint32_t bitstream_read_bits_le(BitStream *bs, int count);
|
||||
|
||||
// Peek at bits without consuming them (MSB first)
|
||||
uint32_t bitstream_peek_bits(BitStream *bs, int count);
|
||||
|
||||
// Peek at bits without consuming them (LSB first)
|
||||
uint32_t bitstream_peek_bits_le(BitStream *bs, int count);
|
||||
|
||||
// Skip previously peeked bits (MSB first)
|
||||
void bitstream_skip_bits(BitStream *bs, int count);
|
||||
|
||||
// Skip previously peeked bits (LSB first)
|
||||
void bitstream_skip_bits_le(BitStream *bs, int count);
|
||||
|
||||
// Read a byte
|
||||
uint8_t bitstream_read_byte(BitStream *bs);
|
||||
|
||||
// Read a 16-bit little endian integer
|
||||
uint16_t bitstream_read_uint16_le(BitStream *bs);
|
||||
|
||||
// Check if end of stream reached
|
||||
bool bitstream_eof(BitStream *bs);
|
||||
|
||||
#endif /* BITSTREAM_H */
|
||||
219
pak/crush.c
Normal file
219
pak/crush.c
Normal file
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* arc_crush.c - ARC Crush decompression algorithm
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <stdint.h>
|
||||
#include "../library.h"
|
||||
#include "bitstream.h"
|
||||
#include "lzw.h"
|
||||
|
||||
int pak_decompress_crush_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
|
||||
{
|
||||
if(!in_buf || !out_buf || !out_len || in_len == 0) { return -1; }
|
||||
|
||||
BitStream bs;
|
||||
bitstream_init(&bs, (const uint8_t *)in_buf, in_len);
|
||||
|
||||
LZW *lzw = lzw_alloc(8192, 1);
|
||||
if(!lzw) { return -1; }
|
||||
|
||||
// Initialize state
|
||||
int symbolsize = 1;
|
||||
int nextsizebump = 2;
|
||||
bool useliteralbit = true;
|
||||
|
||||
int numrecentstrings = 0;
|
||||
int ringindex = 0;
|
||||
bool stringring[500];
|
||||
memset(stringring, 0, sizeof(stringring));
|
||||
|
||||
int usageindex = 0x101;
|
||||
uint8_t usage[8192];
|
||||
memset(usage, 0, sizeof(usage));
|
||||
|
||||
int currbyte = 0;
|
||||
uint8_t buffer[8192];
|
||||
size_t outpos = 0;
|
||||
size_t max_output = *out_len;
|
||||
|
||||
while(!bitstream_eof(&bs) && outpos < max_output)
|
||||
{
|
||||
if(!currbyte)
|
||||
{
|
||||
// Read the next symbol. How depends on the mode we are operating in.
|
||||
int symbol;
|
||||
if(useliteralbit)
|
||||
{
|
||||
// Use codes prefixed by a bit that selects literal or string codes.
|
||||
// Literals are always 8 bits, strings vary.
|
||||
if(bitstream_read_bit_le(&bs)) { symbol = bitstream_read_bits_le(&bs, symbolsize) + 256; }
|
||||
else { symbol = bitstream_read_bits_le(&bs, 8); }
|
||||
}
|
||||
else
|
||||
{
|
||||
// Use same-length codes for both literals and strings.
|
||||
// Due to an optimization quirk in the original decruncher,
|
||||
// literals have their bits inverted.
|
||||
symbol = bitstream_read_bits_le(&bs, symbolsize);
|
||||
if(symbol < 0x100) symbol ^= 0xff;
|
||||
}
|
||||
|
||||
// Code 0x100 is the EOF code.
|
||||
if(symbol == 0x100) { break; }
|
||||
|
||||
// Walk through the LZW tree, and set the usage count of the current
|
||||
// string and all its parents to 4. This is not necessary for literals,
|
||||
// but we do it anyway for simplicity.
|
||||
LZWTreeNode *nodes = lzw_symbols(lzw);
|
||||
int marksymbol = symbol;
|
||||
while(marksymbol >= 0)
|
||||
{
|
||||
if(marksymbol < 8192) { usage[marksymbol] = 4; }
|
||||
marksymbol = nodes[marksymbol].parent;
|
||||
}
|
||||
|
||||
// Adjust the count of recent strings versus literals.
|
||||
// Use a ring buffer of length 500 as a window to keep track
|
||||
// of how many strings have been encountered lately.
|
||||
|
||||
// First, decrease the count if a string leaves the window.
|
||||
if(stringring[ringindex]) numrecentstrings--;
|
||||
|
||||
// Then store the current type of symbol in the window, and
|
||||
// increase the count if the current symbol is a string.
|
||||
if(symbol < 0x100) { stringring[ringindex] = false; }
|
||||
else
|
||||
{
|
||||
stringring[ringindex] = true;
|
||||
numrecentstrings++;
|
||||
}
|
||||
|
||||
// Move the window forward.
|
||||
ringindex = (ringindex + 1) % 500;
|
||||
|
||||
// Check the number of strings. If there have been many literals
|
||||
// lately, bit-prefixed codes should be used. If we need to change
|
||||
// mode, re-calculate the point where we increase the code length.
|
||||
bool manyliterals = numrecentstrings < 375;
|
||||
if(manyliterals != useliteralbit)
|
||||
{
|
||||
useliteralbit = manyliterals;
|
||||
nextsizebump = 1 << symbolsize;
|
||||
if(!useliteralbit) nextsizebump -= 0x100;
|
||||
}
|
||||
|
||||
// Update the LZW tree.
|
||||
if(!lzw_symbol_list_full(lzw))
|
||||
{
|
||||
// If there is space in the tree, just add a new string as usual.
|
||||
if(lzw_next_symbol(lzw, symbol) != LZW_NO_ERROR)
|
||||
{
|
||||
lzw_free(lzw);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set the usage count of the newly created entry.
|
||||
int count = lzw_symbol_count(lzw);
|
||||
if(count > 0 && count - 1 < 8192) { usage[count - 1] = 2; }
|
||||
}
|
||||
else
|
||||
{
|
||||
// If the tree is full, find a less-used symbol, and replace it.
|
||||
int minindex = 0, minusage = INT_MAX;
|
||||
int index = usageindex;
|
||||
do {
|
||||
index++;
|
||||
if(index == 8192) index = 0x101;
|
||||
|
||||
if(usage[index] < minusage)
|
||||
{
|
||||
minindex = index;
|
||||
minusage = usage[index];
|
||||
}
|
||||
|
||||
usage[index]--;
|
||||
if(usage[index] == 0) break;
|
||||
} while(index != usageindex);
|
||||
|
||||
usageindex = index;
|
||||
|
||||
if(lzw_replace_symbol(lzw, minindex, symbol) != LZW_NO_ERROR)
|
||||
{
|
||||
lzw_free(lzw);
|
||||
return -1;
|
||||
}
|
||||
|
||||
// Set the usage count of the replaced entry.
|
||||
if(minindex < 8192) { usage[minindex] = 2; }
|
||||
}
|
||||
|
||||
// Extract the data to output.
|
||||
currbyte = lzw_reverse_output_to_buffer(lzw, buffer);
|
||||
|
||||
// Check if we need to increase the code size. The point at which
|
||||
// to increase varies depending on the coding mode.
|
||||
if(lzw_symbol_count(lzw) - 257 >= nextsizebump)
|
||||
{
|
||||
symbolsize++;
|
||||
nextsizebump = 1 << symbolsize;
|
||||
if(!useliteralbit) nextsizebump -= 0x100;
|
||||
}
|
||||
}
|
||||
|
||||
if(currbyte > 0 && outpos < max_output) { out_buf[outpos++] = (char)buffer[--currbyte]; }
|
||||
else if(currbyte == 0)
|
||||
{
|
||||
// No more bytes in buffer, continue to next symbol
|
||||
continue;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Output buffer full
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lzw_free(lzw);
|
||||
*out_len = outpos;
|
||||
return 0;
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL pak_decompress_crush(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
// Allocate a temporary buffer.
|
||||
size_t temp_len = *out_len * 2; // Heuristic.
|
||||
unsigned char *temp_buf = malloc(temp_len);
|
||||
if(!temp_buf) return -1;
|
||||
|
||||
// Decompress crunched data.
|
||||
int result = pak_decompress_crush_internal(in_buf, in_len, temp_buf, &temp_len);
|
||||
if(result == 0)
|
||||
{
|
||||
// Decompress non-repeat packing.
|
||||
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
||||
}
|
||||
|
||||
free(temp_buf);
|
||||
return result;
|
||||
}
|
||||
182
pak/distill.c
Normal file
182
pak/distill.c
Normal file
@@ -0,0 +1,182 @@
|
||||
/*
|
||||
* arc_distill.c - ARC Distill decompression algorithm
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "../library.h"
|
||||
#include "bitstream.h"
|
||||
#include "prefixcode.h"
|
||||
|
||||
static const int offset_lengths[0x40] = {
|
||||
3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7,
|
||||
7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
|
||||
};
|
||||
|
||||
static const int offset_codes[0x40] = {
|
||||
0x00, 0x02, 0x04, 0x0c, 0x01, 0x06, 0x0a, 0x0e, 0x11, 0x16, 0x1a, 0x1e, 0x05, 0x09, 0x0d, 0x15,
|
||||
0x19, 0x1d, 0x25, 0x29, 0x2d, 0x35, 0x39, 0x3d, 0x03, 0x07, 0x0b, 0x13, 0x17, 0x1b, 0x23, 0x27,
|
||||
0x2b, 0x33, 0x37, 0x3b, 0x43, 0x47, 0x4b, 0x53, 0x57, 0x5b, 0x63, 0x67, 0x6b, 0x73, 0x77, 0x7b,
|
||||
0x0f, 0x1f, 0x2f, 0x3f, 0x4f, 0x5f, 0x6f, 0x7f, 0x8f, 0x9f, 0xaf, 0xbf, 0xcf, 0xdf, 0xef, 0xff,
|
||||
};
|
||||
|
||||
static void build_code_from_tree(PrefixCode *code, int *tree, int node, int numnodes, int depth)
|
||||
{
|
||||
if(depth > 64)
|
||||
{
|
||||
// Too deep - error
|
||||
return;
|
||||
}
|
||||
|
||||
if(node >= numnodes) { prefix_code_make_leaf_with_value(code, node - numnodes); }
|
||||
else
|
||||
{
|
||||
prefix_code_start_zero_branch(code);
|
||||
build_code_from_tree(code, tree, tree[node], numnodes, depth + 1);
|
||||
prefix_code_start_one_branch(code);
|
||||
build_code_from_tree(code, tree, tree[node + 1], numnodes, depth + 1);
|
||||
prefix_code_finish_branches(code);
|
||||
}
|
||||
}
|
||||
|
||||
AARU_EXPORT int AARU_CALL pak_decompress_distill(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
||||
size_t *out_len)
|
||||
{
|
||||
if(!in_buf || !out_buf || !out_len || in_len == 0) { return -1; }
|
||||
|
||||
BitStream bs;
|
||||
bitstream_init(&bs, (const uint8_t *)in_buf, in_len);
|
||||
|
||||
// Read header information
|
||||
int numnodes = bitstream_read_uint16_le(&bs);
|
||||
int codelength = bitstream_read_byte(&bs);
|
||||
|
||||
if(numnodes < 2 || numnodes > 0x274) { return -1; }
|
||||
|
||||
// Read tree nodes
|
||||
int *nodes = malloc(numnodes * sizeof(int));
|
||||
if(!nodes) { return -1; }
|
||||
|
||||
for(int i = 0; i < numnodes; i++) { nodes[i] = bitstream_read_bits_le(&bs, codelength); }
|
||||
|
||||
// Build main code tree
|
||||
PrefixCode *maincode = prefix_code_alloc();
|
||||
if(!maincode)
|
||||
{
|
||||
free(nodes);
|
||||
return -1;
|
||||
}
|
||||
|
||||
prefix_code_start_building_tree(maincode);
|
||||
build_code_from_tree(maincode, nodes, numnodes - 2, numnodes, 0);
|
||||
|
||||
free(nodes);
|
||||
|
||||
// Build offset code tree
|
||||
PrefixCode *offsetcode = prefix_code_alloc();
|
||||
if(!offsetcode)
|
||||
{
|
||||
prefix_code_free(maincode);
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(int i = 0; i < 0x40; i++)
|
||||
{
|
||||
if(prefix_code_add_value_low_bit_first(offsetcode, i, offset_codes[i], offset_lengths[i]) != PREFIX_CODE_OK)
|
||||
{
|
||||
prefix_code_free(maincode);
|
||||
prefix_code_free(offsetcode);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
// LZSS decompression
|
||||
uint8_t window[8192];
|
||||
memset(window, 0, sizeof(window));
|
||||
int windowpos = 0;
|
||||
size_t outpos = 0;
|
||||
size_t max_output = *out_len;
|
||||
|
||||
while(!bitstream_eof(&bs) && outpos < max_output)
|
||||
{
|
||||
int symbol = prefix_code_read_symbol_le(&bs, maincode);
|
||||
if(symbol < 0) break;
|
||||
|
||||
if(symbol < 256)
|
||||
{
|
||||
// Literal byte
|
||||
if(outpos < max_output) { out_buf[outpos++] = (char)symbol; }
|
||||
window[windowpos] = symbol;
|
||||
windowpos = (windowpos + 1) & 0x1fff;
|
||||
}
|
||||
else if(symbol == 256)
|
||||
{
|
||||
// End of stream
|
||||
break;
|
||||
}
|
||||
else
|
||||
{
|
||||
// Match
|
||||
int length = symbol - 0x101 + 3;
|
||||
int offsetsymbol = prefix_code_read_symbol_le(&bs, offsetcode);
|
||||
if(offsetsymbol < 0) break;
|
||||
|
||||
int extralength;
|
||||
if(outpos >= 0x1000 - 0x3c)
|
||||
extralength = 7;
|
||||
else if(outpos >= 0x800 - 0x3c)
|
||||
extralength = 6;
|
||||
else if(outpos >= 0x400 - 0x3c)
|
||||
extralength = 5;
|
||||
else if(outpos >= 0x200 - 0x3c)
|
||||
extralength = 4;
|
||||
else if(outpos >= 0x100 - 0x3c)
|
||||
extralength = 3;
|
||||
else if(outpos >= 0x80 - 0x3c)
|
||||
extralength = 2;
|
||||
else if(outpos >= 0x40 - 0x3c)
|
||||
extralength = 1;
|
||||
else
|
||||
extralength = 0;
|
||||
|
||||
int extrabits = bitstream_read_bits_le(&bs, extralength);
|
||||
int offset = (offsetsymbol << extralength) + extrabits + 1;
|
||||
|
||||
// Copy match
|
||||
for(int i = 0; i < length; i++)
|
||||
{
|
||||
int sourcepos = (windowpos - offset) & 0x1fff;
|
||||
uint8_t byte = window[sourcepos];
|
||||
|
||||
if(outpos < max_output) { out_buf[outpos++] = (char)byte; }
|
||||
|
||||
window[windowpos] = byte;
|
||||
windowpos = (windowpos + 1) & 0x1fff;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
prefix_code_free(maincode);
|
||||
prefix_code_free(offsetcode);
|
||||
|
||||
*out_len = outpos;
|
||||
return 0;
|
||||
}
|
||||
162
pak/lzw.c
Normal file
162
pak/lzw.c
Normal file
@@ -0,0 +1,162 @@
|
||||
/*
|
||||
* lzw.c - LZW decompression implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "lzw.h"
|
||||
#include <stdlib.h>
|
||||
|
||||
LZW *lzw_alloc(int maxsymbols, int reservedsymbols)
|
||||
{
|
||||
LZW *self = (LZW *)malloc(sizeof(LZW) + sizeof(LZWTreeNode) * maxsymbols);
|
||||
if(!self) return NULL;
|
||||
|
||||
if(maxsymbols < 256 + reservedsymbols)
|
||||
{
|
||||
free(self);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
self->maxsymbols = maxsymbols;
|
||||
self->reservedsymbols = reservedsymbols;
|
||||
|
||||
self->buffer = NULL;
|
||||
self->buffersize = 0;
|
||||
|
||||
for(int i = 0; i < 256; i++)
|
||||
{
|
||||
self->nodes[i].chr = i;
|
||||
self->nodes[i].parent = -1;
|
||||
}
|
||||
|
||||
lzw_clear_table(self);
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void lzw_free(LZW *self)
|
||||
{
|
||||
if(self)
|
||||
{
|
||||
free(self->buffer);
|
||||
free(self);
|
||||
}
|
||||
}
|
||||
|
||||
void lzw_clear_table(LZW *self)
|
||||
{
|
||||
self->numsymbols = 256 + self->reservedsymbols;
|
||||
self->prevsymbol = -1;
|
||||
self->symbolsize = 9; // TODO: technically this depends on reservedsymbols
|
||||
}
|
||||
|
||||
static uint8_t find_first_byte(LZWTreeNode *nodes, int symbol)
|
||||
{
|
||||
while(nodes[symbol].parent >= 0) symbol = nodes[symbol].parent;
|
||||
return nodes[symbol].chr;
|
||||
}
|
||||
|
||||
int lzw_next_symbol(LZW *self, int symbol)
|
||||
{
|
||||
if(self->prevsymbol < 0)
|
||||
{
|
||||
if(symbol >= self->numsymbols) return LZW_INVALID_CODE_ERROR;
|
||||
self->prevsymbol = symbol;
|
||||
return LZW_NO_ERROR;
|
||||
}
|
||||
|
||||
int postfixbyte;
|
||||
if(symbol < self->numsymbols) { postfixbyte = find_first_byte(self->nodes, symbol); }
|
||||
else if(symbol == self->numsymbols) { postfixbyte = find_first_byte(self->nodes, self->prevsymbol); }
|
||||
else { return LZW_INVALID_CODE_ERROR; }
|
||||
|
||||
int parent = self->prevsymbol;
|
||||
self->prevsymbol = symbol;
|
||||
|
||||
if(!lzw_symbol_list_full(self))
|
||||
{
|
||||
self->nodes[self->numsymbols].parent = parent;
|
||||
self->nodes[self->numsymbols].chr = postfixbyte;
|
||||
self->numsymbols++;
|
||||
|
||||
if(!lzw_symbol_list_full(self))
|
||||
{
|
||||
if((self->numsymbols & (self->numsymbols - 1)) == 0) { self->symbolsize++; }
|
||||
}
|
||||
|
||||
return LZW_NO_ERROR;
|
||||
}
|
||||
else { return LZW_TOO_MANY_CODES_ERROR; }
|
||||
}
|
||||
|
||||
int lzw_replace_symbol(LZW *self, int oldsymbol, int symbol)
|
||||
{
|
||||
if(symbol >= self->numsymbols) return LZW_INVALID_CODE_ERROR;
|
||||
|
||||
self->nodes[oldsymbol].parent = self->prevsymbol;
|
||||
self->nodes[oldsymbol].chr = find_first_byte(self->nodes, symbol);
|
||||
|
||||
self->prevsymbol = symbol;
|
||||
|
||||
return LZW_NO_ERROR;
|
||||
}
|
||||
|
||||
int lzw_output_length(LZW *self)
|
||||
{
|
||||
int symbol = self->prevsymbol;
|
||||
int n = 0;
|
||||
|
||||
while(symbol >= 0)
|
||||
{
|
||||
symbol = self->nodes[symbol].parent;
|
||||
n++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int lzw_output_to_buffer(LZW *self, uint8_t *buffer)
|
||||
{
|
||||
int symbol = self->prevsymbol;
|
||||
int n = lzw_output_length(self);
|
||||
buffer += n;
|
||||
|
||||
while(symbol >= 0)
|
||||
{
|
||||
*--buffer = self->nodes[symbol].chr;
|
||||
symbol = self->nodes[symbol].parent;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
|
||||
int lzw_reverse_output_to_buffer(LZW *self, uint8_t *buffer)
|
||||
{
|
||||
int symbol = self->prevsymbol;
|
||||
int n = 0;
|
||||
|
||||
while(symbol >= 0)
|
||||
{
|
||||
*buffer++ = self->nodes[symbol].chr;
|
||||
symbol = self->nodes[symbol].parent;
|
||||
n++;
|
||||
}
|
||||
|
||||
return n;
|
||||
}
|
||||
83
pak/lzw.h
Normal file
83
pak/lzw.h
Normal file
@@ -0,0 +1,83 @@
|
||||
/*
|
||||
* lzw.h - LZW decompression implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef LZW_H
|
||||
#define LZW_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#define LZW_NO_ERROR 0
|
||||
#define LZW_INVALID_CODE_ERROR 1
|
||||
#define LZW_TOO_MANY_CODES_ERROR 2
|
||||
|
||||
typedef struct LZWTreeNode
|
||||
{
|
||||
uint8_t chr;
|
||||
int parent;
|
||||
} LZWTreeNode;
|
||||
|
||||
typedef struct LZW
|
||||
{
|
||||
int numsymbols;
|
||||
int maxsymbols;
|
||||
int reservedsymbols;
|
||||
int prevsymbol;
|
||||
int symbolsize;
|
||||
|
||||
uint8_t *buffer;
|
||||
int buffersize;
|
||||
|
||||
LZWTreeNode nodes[]; // Flexible array member (C99)
|
||||
} LZW;
|
||||
|
||||
// Allocate LZW structure
|
||||
LZW *lzw_alloc(int maxsymbols, int reservedsymbols);
|
||||
|
||||
// Free LZW structure
|
||||
void lzw_free(LZW *self);
|
||||
|
||||
// Clear/reset LZW table
|
||||
void lzw_clear_table(LZW *self);
|
||||
|
||||
// Process next symbol
|
||||
int lzw_next_symbol(LZW *self, int symbol);
|
||||
|
||||
// Replace a symbol
|
||||
int lzw_replace_symbol(LZW *self, int oldsymbol, int symbol);
|
||||
|
||||
// Get output length
|
||||
int lzw_output_length(LZW *self);
|
||||
|
||||
// Output to buffer (normal order)
|
||||
int lzw_output_to_buffer(LZW *self, uint8_t *buffer);
|
||||
|
||||
// Output to buffer (reverse order)
|
||||
int lzw_reverse_output_to_buffer(LZW *self, uint8_t *buffer);
|
||||
|
||||
// Inline helper functions
|
||||
static inline int lzw_symbol_count(LZW *self) { return self->numsymbols; }
|
||||
|
||||
static inline bool lzw_symbol_list_full(LZW *self) { return self->numsymbols == self->maxsymbols; }
|
||||
|
||||
static inline LZWTreeNode *lzw_symbols(LZW *self) { return self->nodes; }
|
||||
|
||||
#endif /* LZW_H */
|
||||
539
pak/prefixcode.c
Normal file
539
pak/prefixcode.c
Normal file
@@ -0,0 +1,539 @@
|
||||
/*
|
||||
* prefixcode.c - Prefix code tree implementation
|
||||
*
|
||||
* Copyright (c) 2017-pstatic inline bool is_invalid_node(PrefixCode *self, int node) {
|
||||
(void)self; // Suppress unused parameter warning
|
||||
return (node < 0);
|
||||
}ent, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "prefixcode.h"
|
||||
#include <limits.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
// Safe realloc that frees original pointer on failure
|
||||
static void *safe_realloc(void *ptr, size_t newsize)
|
||||
{
|
||||
void *newptr = realloc(ptr, newsize);
|
||||
if(!newptr && newsize > 0)
|
||||
{
|
||||
free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
return newptr;
|
||||
}
|
||||
|
||||
// Inline helper functions
|
||||
static inline CodeTreeNode *node_pointer(PrefixCode *self, int node) { return &self->tree[node]; }
|
||||
|
||||
static inline int branch(PrefixCode *self, int node, int bit) { return node_pointer(self, node)->branches[bit]; }
|
||||
|
||||
static inline void set_branch(PrefixCode *self, int node, int bit, int nextnode)
|
||||
{
|
||||
node_pointer(self, node)->branches[bit] = nextnode;
|
||||
}
|
||||
|
||||
static inline int left_branch(PrefixCode *self, int node) { return branch(self, node, 0); }
|
||||
|
||||
static inline int right_branch(PrefixCode *self, int node) { return branch(self, node, 1); }
|
||||
|
||||
static inline void set_left_branch(PrefixCode *self, int node, int nextnode) { set_branch(self, node, 0, nextnode); }
|
||||
|
||||
static inline void set_right_branch(PrefixCode *self, int node, int nextnode) { set_branch(self, node, 1, nextnode); }
|
||||
|
||||
static inline int leaf_value(PrefixCode *self, int node) { return left_branch(self, node); }
|
||||
|
||||
static inline void set_leaf_value(PrefixCode *self, int node, int value)
|
||||
{
|
||||
set_left_branch(self, node, value);
|
||||
set_right_branch(self, node, value);
|
||||
}
|
||||
|
||||
static inline void set_empty_node(PrefixCode *self, int node)
|
||||
{
|
||||
set_left_branch(self, node, -1);
|
||||
set_right_branch(self, node, -2);
|
||||
}
|
||||
|
||||
static inline bool is_invalid_node(PrefixCode *self, int node) { return node < 0; }
|
||||
|
||||
static inline bool is_open_branch(PrefixCode *self, int node, int bit)
|
||||
{
|
||||
return is_invalid_node(self, branch(self, node, bit));
|
||||
}
|
||||
|
||||
static inline bool is_empty_node(PrefixCode *self, int node)
|
||||
{
|
||||
return left_branch(self, node) == -1 && right_branch(self, node) == -2;
|
||||
}
|
||||
|
||||
static inline bool is_leaf_node(PrefixCode *self, int node)
|
||||
{
|
||||
return left_branch(self, node) == right_branch(self, node);
|
||||
}
|
||||
|
||||
static int new_node(PrefixCode *self)
|
||||
{
|
||||
CodeTreeNode *newtree = safe_realloc(self->tree, (self->numentries + 1) * sizeof(CodeTreeNode));
|
||||
if(!newtree) return -1;
|
||||
|
||||
self->tree = newtree;
|
||||
set_empty_node(self, self->numentries);
|
||||
return self->numentries++;
|
||||
}
|
||||
|
||||
// Stack implementation for tree building
|
||||
static PrefixCodeStack *prefix_code_stack_alloc(void)
|
||||
{
|
||||
PrefixCodeStack *stack = malloc(sizeof(PrefixCodeStack));
|
||||
if(!stack) return NULL;
|
||||
|
||||
stack->data = malloc(16 * sizeof(int));
|
||||
if(!stack->data)
|
||||
{
|
||||
free(stack);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
stack->count = 0;
|
||||
stack->capacity = 16;
|
||||
return stack;
|
||||
}
|
||||
|
||||
static void prefix_code_stack_free(PrefixCodeStack *stack)
|
||||
{
|
||||
if(!stack) return;
|
||||
free(stack->data);
|
||||
free(stack);
|
||||
}
|
||||
|
||||
static int prefix_code_stack_push(PrefixCodeStack *stack, int value)
|
||||
{
|
||||
if(stack->count >= stack->capacity)
|
||||
{
|
||||
int newcapacity = stack->capacity * 2;
|
||||
int *newdata = safe_realloc(stack->data, newcapacity * sizeof(int));
|
||||
if(!newdata) return -1;
|
||||
|
||||
stack->data = newdata;
|
||||
stack->capacity = newcapacity;
|
||||
}
|
||||
|
||||
stack->data[stack->count++] = value;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int prefix_code_stack_pop(PrefixCodeStack *stack)
|
||||
{
|
||||
if(stack->count == 0) return -1;
|
||||
return stack->data[--stack->count];
|
||||
}
|
||||
|
||||
static void prefix_code_stack_clear(PrefixCodeStack *stack) { stack->count = 0; }
|
||||
|
||||
// Bit reversal functions
|
||||
static uint32_t reverse_32(uint32_t val)
|
||||
{
|
||||
val = ((val >> 1) & 0x55555555) | ((val & 0x55555555) << 1);
|
||||
val = ((val >> 2) & 0x33333333) | ((val & 0x33333333) << 2);
|
||||
val = ((val >> 4) & 0x0F0F0F0F) | ((val & 0x0F0F0F0F) << 4);
|
||||
val = ((val >> 8) & 0x00FF00FF) | ((val & 0x00FF00FF) << 8);
|
||||
return (val >> 16) | (val << 16);
|
||||
}
|
||||
|
||||
static uint32_t reverse_n(uint32_t val, int length) { return reverse_32(val) >> (32 - length); }
|
||||
|
||||
// Table construction functions
|
||||
#define TABLE_MAX_SIZE 10
|
||||
|
||||
static void make_table(PrefixCode *code, int node, CodeTableEntry *table, int depth, int maxdepth)
|
||||
{
|
||||
int currtablesize = 1 << (maxdepth - depth);
|
||||
|
||||
if(is_invalid_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++) table[i].length = -1;
|
||||
}
|
||||
else if(is_leaf_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++)
|
||||
{
|
||||
table[i].length = depth;
|
||||
table[i].value = leaf_value(code, node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(depth == maxdepth)
|
||||
{
|
||||
table[0].length = maxdepth + 1;
|
||||
table[0].value = node;
|
||||
}
|
||||
else
|
||||
{
|
||||
make_table(code, left_branch(code, node), table, depth + 1, maxdepth);
|
||||
make_table(code, right_branch(code, node), table + currtablesize / 2, depth + 1, maxdepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static void make_table_le(PrefixCode *code, int node, CodeTableEntry *table, int depth, int maxdepth)
|
||||
{
|
||||
int currtablesize = 1 << (maxdepth - depth);
|
||||
int currstride = 1 << depth;
|
||||
|
||||
if(is_invalid_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++) table[i * currstride].length = -1;
|
||||
}
|
||||
else if(is_leaf_node(code, node))
|
||||
{
|
||||
for(int i = 0; i < currtablesize; i++)
|
||||
{
|
||||
table[i * currstride].length = depth;
|
||||
table[i * currstride].value = leaf_value(code, node);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(depth == maxdepth)
|
||||
{
|
||||
table[0].length = maxdepth + 1;
|
||||
table[0].value = node;
|
||||
}
|
||||
else
|
||||
{
|
||||
make_table_le(code, left_branch(code, node), table, depth + 1, maxdepth);
|
||||
make_table_le(code, right_branch(code, node), table + currstride, depth + 1, maxdepth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int prefix_code_make_table(PrefixCode *self)
|
||||
{
|
||||
if(self->table1) return PREFIX_CODE_OK;
|
||||
|
||||
if(self->maxlength < self->minlength)
|
||||
self->tablesize = TABLE_MAX_SIZE; // no code lengths recorded
|
||||
else if(self->maxlength >= TABLE_MAX_SIZE)
|
||||
self->tablesize = TABLE_MAX_SIZE;
|
||||
else
|
||||
self->tablesize = self->maxlength;
|
||||
|
||||
self->table1 = malloc(sizeof(CodeTableEntry) * (1 << self->tablesize));
|
||||
if(!self->table1) return PREFIX_CODE_INVALID;
|
||||
|
||||
make_table(self, 0, self->table1, 0, self->tablesize);
|
||||
return PREFIX_CODE_OK;
|
||||
}
|
||||
|
||||
static int prefix_code_make_table_le(PrefixCode *self)
|
||||
{
|
||||
if(self->table2) return PREFIX_CODE_OK;
|
||||
|
||||
if(self->maxlength < self->minlength)
|
||||
self->tablesize = TABLE_MAX_SIZE; // no code lengths recorded
|
||||
else if(self->maxlength >= TABLE_MAX_SIZE)
|
||||
self->tablesize = TABLE_MAX_SIZE;
|
||||
else
|
||||
self->tablesize = self->maxlength;
|
||||
|
||||
self->table2 = malloc(sizeof(CodeTableEntry) * (1 << self->tablesize));
|
||||
if(!self->table2) return PREFIX_CODE_INVALID;
|
||||
|
||||
make_table_le(self, 0, self->table2, 0, self->tablesize);
|
||||
return PREFIX_CODE_OK;
|
||||
}
|
||||
|
||||
// Public functions
|
||||
|
||||
PrefixCode *prefix_code_alloc(void)
|
||||
{
|
||||
PrefixCode *self = malloc(sizeof(PrefixCode));
|
||||
if(!self) return NULL;
|
||||
|
||||
self->tree = malloc(sizeof(CodeTreeNode));
|
||||
if(!self->tree)
|
||||
{
|
||||
free(self);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
set_empty_node(self, 0);
|
||||
self->numentries = 1;
|
||||
self->minlength = INT_MAX;
|
||||
self->maxlength = INT_MIN;
|
||||
self->isstatic = false;
|
||||
|
||||
self->stack = NULL;
|
||||
self->table1 = self->table2 = NULL;
|
||||
self->tablesize = 0;
|
||||
self->currnode = 0;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
PrefixCode *prefix_code_alloc_with_static_table(int (*statictable)[2])
|
||||
{
|
||||
PrefixCode *self = malloc(sizeof(PrefixCode));
|
||||
if(!self) return NULL;
|
||||
|
||||
self->tree = (CodeTreeNode *)statictable; // TODO: fix the ugly cast
|
||||
self->isstatic = true;
|
||||
|
||||
self->stack = NULL;
|
||||
self->table1 = self->table2 = NULL;
|
||||
self->tablesize = 0;
|
||||
self->currnode = 0;
|
||||
self->numentries = 0;
|
||||
self->minlength = INT_MAX;
|
||||
self->maxlength = INT_MIN;
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
PrefixCode *prefix_code_alloc_with_lengths(const int *lengths, int numsymbols, int maxcodelength, bool zeros)
|
||||
{
|
||||
PrefixCode *self = prefix_code_alloc();
|
||||
if(!self) return NULL;
|
||||
|
||||
int code = 0, symbolsleft = numsymbols;
|
||||
|
||||
for(int length = 1; length <= maxcodelength; length++)
|
||||
{
|
||||
for(int i = 0; i < numsymbols; i++)
|
||||
{
|
||||
if(lengths[i] != length) continue;
|
||||
// Instead of reversing to get a low-bit-first code, we shift and use high-bit-first.
|
||||
int result;
|
||||
if(zeros) { result = prefix_code_add_value_high_bit_first(self, i, code, length); }
|
||||
else { result = prefix_code_add_value_high_bit_first(self, i, ~code, length); }
|
||||
if(result != PREFIX_CODE_OK)
|
||||
{
|
||||
prefix_code_free(self);
|
||||
return NULL;
|
||||
}
|
||||
code++;
|
||||
if(--symbolsleft == 0) return self; // early exit if all codes have been handled
|
||||
}
|
||||
code <<= 1;
|
||||
}
|
||||
|
||||
return self;
|
||||
}
|
||||
|
||||
void prefix_code_free(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
if(!self->isstatic) free(self->tree);
|
||||
free(self->table1);
|
||||
free(self->table2);
|
||||
if(self->stack) prefix_code_stack_free(self->stack);
|
||||
free(self);
|
||||
}
|
||||
|
||||
int prefix_code_add_value_high_bit_first(PrefixCode *self, int value, uint32_t code, int length)
|
||||
{
|
||||
return prefix_code_add_value_high_bit_first_repeat(self, value, code, length, length);
|
||||
}
|
||||
|
||||
int prefix_code_add_value_high_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos)
|
||||
{
|
||||
if(!self || self->isstatic) return PREFIX_CODE_INVALID;
|
||||
|
||||
free(self->table1);
|
||||
free(self->table2);
|
||||
self->table1 = self->table2 = NULL;
|
||||
|
||||
if(length > self->maxlength) self->maxlength = length;
|
||||
if(length < self->minlength) self->minlength = length;
|
||||
|
||||
repeatpos = length - 1 - repeatpos;
|
||||
if(repeatpos == 0 ||
|
||||
(repeatpos >= 0 && (((code >> (repeatpos - 1)) & 3) == 0 || ((code >> (repeatpos - 1)) & 3) == 3)))
|
||||
{
|
||||
return PREFIX_CODE_INVALID;
|
||||
}
|
||||
|
||||
int lastnode = 0;
|
||||
for(int bitpos = length - 1; bitpos >= 0; bitpos--)
|
||||
{
|
||||
int bit = (code >> bitpos) & 1;
|
||||
|
||||
if(is_leaf_node(self, lastnode)) return PREFIX_CODE_INVALID;
|
||||
|
||||
if(bitpos == repeatpos)
|
||||
{
|
||||
if(!is_open_branch(self, lastnode, bit)) return PREFIX_CODE_INVALID;
|
||||
|
||||
int repeatnode = new_node(self);
|
||||
int nextnode = new_node(self);
|
||||
if(repeatnode < 0 || nextnode < 0) return PREFIX_CODE_INVALID;
|
||||
|
||||
set_branch(self, lastnode, bit, repeatnode);
|
||||
set_branch(self, repeatnode, bit, repeatnode);
|
||||
set_branch(self, repeatnode, bit ^ 1, nextnode);
|
||||
lastnode = nextnode;
|
||||
|
||||
bitpos++; // terminating bit already handled, skip it
|
||||
}
|
||||
else
|
||||
{
|
||||
if(is_open_branch(self, lastnode, bit))
|
||||
{
|
||||
int newnode = new_node(self);
|
||||
if(newnode < 0) return PREFIX_CODE_INVALID;
|
||||
set_branch(self, lastnode, bit, newnode);
|
||||
}
|
||||
lastnode = branch(self, lastnode, bit);
|
||||
}
|
||||
}
|
||||
|
||||
if(!is_empty_node(self, lastnode)) return PREFIX_CODE_INVALID;
|
||||
set_leaf_value(self, lastnode, value);
|
||||
return PREFIX_CODE_OK;
|
||||
}
|
||||
|
||||
int prefix_code_add_value_low_bit_first(PrefixCode *self, int value, uint32_t code, int length)
|
||||
{
|
||||
return prefix_code_add_value_high_bit_first(self, value, reverse_n(code, length), length);
|
||||
}
|
||||
|
||||
int prefix_code_add_value_low_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos)
|
||||
{
|
||||
return prefix_code_add_value_high_bit_first_repeat(self, value, reverse_n(code, length), length, repeatpos);
|
||||
}
|
||||
|
||||
void prefix_code_start_building_tree(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
self->currnode = 0;
|
||||
if(!self->stack) { self->stack = prefix_code_stack_alloc(); }
|
||||
else { prefix_code_stack_clear(self->stack); }
|
||||
}
|
||||
|
||||
void prefix_code_start_zero_branch(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
int new = new_node(self);
|
||||
if(new < 0) return;
|
||||
|
||||
set_branch(self, self->currnode, 0, new);
|
||||
prefix_code_stack_push(self->stack, self->currnode);
|
||||
self->currnode = new;
|
||||
}
|
||||
|
||||
void prefix_code_start_one_branch(PrefixCode *self)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
int new = new_node(self);
|
||||
if(new < 0) return;
|
||||
|
||||
set_branch(self, self->currnode, 1, new);
|
||||
prefix_code_stack_push(self->stack, self->currnode);
|
||||
self->currnode = new;
|
||||
}
|
||||
|
||||
void prefix_code_finish_branches(PrefixCode *self)
|
||||
{
|
||||
if(!self || !self->stack) return;
|
||||
|
||||
int node = prefix_code_stack_pop(self->stack);
|
||||
if(node >= 0) self->currnode = node;
|
||||
}
|
||||
|
||||
void prefix_code_make_leaf_with_value(PrefixCode *self, int value)
|
||||
{
|
||||
if(!self) return;
|
||||
|
||||
set_leaf_value(self, self->currnode, value);
|
||||
prefix_code_finish_branches(self);
|
||||
}
|
||||
|
||||
// BitStream interface functions
|
||||
|
||||
int prefix_code_read_symbol(BitStream *bs, PrefixCode *code)
|
||||
{
|
||||
if(!code) return PREFIX_CODE_INVALID;
|
||||
if(!code->table1)
|
||||
{
|
||||
if(prefix_code_make_table(code) != PREFIX_CODE_OK) return PREFIX_CODE_INVALID;
|
||||
}
|
||||
|
||||
int bits = bitstream_peek_bits(bs, code->tablesize);
|
||||
|
||||
int length = code->table1[bits].length;
|
||||
int value = code->table1[bits].value;
|
||||
|
||||
if(length < 0) return PREFIX_CODE_INVALID;
|
||||
|
||||
if(length <= code->tablesize)
|
||||
{
|
||||
bitstream_skip_bits(bs, length);
|
||||
return value;
|
||||
}
|
||||
|
||||
bitstream_skip_bits(bs, code->tablesize);
|
||||
|
||||
int node = value;
|
||||
while(!is_leaf_node(code, node))
|
||||
{
|
||||
int bit = bitstream_read_bit(bs);
|
||||
if(is_open_branch(code, node, bit)) return PREFIX_CODE_INVALID;
|
||||
node = branch(code, node, bit);
|
||||
}
|
||||
return leaf_value(code, node);
|
||||
}
|
||||
|
||||
int prefix_code_read_symbol_le(BitStream *bs, PrefixCode *code)
|
||||
{
|
||||
if(!code) return PREFIX_CODE_INVALID;
|
||||
if(!code->table2)
|
||||
{
|
||||
if(prefix_code_make_table_le(code) != PREFIX_CODE_OK) return PREFIX_CODE_INVALID;
|
||||
}
|
||||
|
||||
int bits = bitstream_peek_bits_le(bs, code->tablesize);
|
||||
|
||||
int length = code->table2[bits].length;
|
||||
int value = code->table2[bits].value;
|
||||
|
||||
if(length < 0) return PREFIX_CODE_INVALID;
|
||||
|
||||
if(length <= code->tablesize)
|
||||
{
|
||||
bitstream_skip_bits_le(bs, length);
|
||||
return value;
|
||||
}
|
||||
|
||||
bitstream_skip_bits_le(bs, code->tablesize);
|
||||
|
||||
int node = value;
|
||||
while(!is_leaf_node(code, node))
|
||||
{
|
||||
int bit = bitstream_read_bit_le(bs);
|
||||
if(is_open_branch(code, node, bit)) return PREFIX_CODE_INVALID;
|
||||
node = branch(code, node, bit);
|
||||
}
|
||||
return leaf_value(code, node);
|
||||
}
|
||||
89
pak/prefixcode.h
Normal file
89
pak/prefixcode.h
Normal file
@@ -0,0 +1,89 @@
|
||||
/*
|
||||
* prefixcode.h - Prefix code tree implementation
|
||||
*
|
||||
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
||||
*
|
||||
* This library is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* This library is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with this library; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
* MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#ifndef PREFIXCODE_H
|
||||
#define PREFIXCODE_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdint.h>
|
||||
#include "bitstream.h"
|
||||
|
||||
// Error codes
|
||||
#define PREFIX_CODE_OK 0
|
||||
#define PREFIX_CODE_INVALID -1
|
||||
|
||||
typedef struct CodeTreeNode
|
||||
{
|
||||
int branches[2];
|
||||
} CodeTreeNode;
|
||||
|
||||
typedef struct CodeTableEntry
|
||||
{
|
||||
uint32_t length;
|
||||
int32_t value;
|
||||
} CodeTableEntry;
|
||||
|
||||
// Simple stack implementation for tree building
|
||||
typedef struct PrefixCodeStack
|
||||
{
|
||||
int *data;
|
||||
int count;
|
||||
int capacity;
|
||||
} PrefixCodeStack;
|
||||
|
||||
typedef struct PrefixCode
|
||||
{
|
||||
CodeTreeNode *tree;
|
||||
int numentries;
|
||||
int minlength;
|
||||
int maxlength;
|
||||
bool isstatic;
|
||||
|
||||
int currnode;
|
||||
PrefixCodeStack *stack;
|
||||
|
||||
int tablesize;
|
||||
CodeTableEntry *table1;
|
||||
CodeTableEntry *table2;
|
||||
} PrefixCode;
|
||||
|
||||
// Function declarations
|
||||
PrefixCode *prefix_code_alloc(void);
|
||||
PrefixCode *prefix_code_alloc_with_lengths(const int *lengths, int numsymbols, int maxlength, bool shortestCodeIsZeros);
|
||||
PrefixCode *prefix_code_alloc_with_static_table(int (*statictable)[2]);
|
||||
void prefix_code_free(PrefixCode *self);
|
||||
|
||||
int prefix_code_add_value_high_bit_first(PrefixCode *self, int value, uint32_t code, int length);
|
||||
int prefix_code_add_value_high_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos);
|
||||
int prefix_code_add_value_low_bit_first(PrefixCode *self, int value, uint32_t code, int length);
|
||||
int prefix_code_add_value_low_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos);
|
||||
|
||||
void prefix_code_start_building_tree(PrefixCode *self);
|
||||
void prefix_code_start_zero_branch(PrefixCode *self);
|
||||
void prefix_code_start_one_branch(PrefixCode *self);
|
||||
void prefix_code_finish_branches(PrefixCode *self);
|
||||
void prefix_code_make_leaf_with_value(PrefixCode *self, int value);
|
||||
|
||||
// BitStream interface functions
|
||||
int prefix_code_read_symbol(BitStream *bs, PrefixCode *code);
|
||||
int prefix_code_read_symbol_le(BitStream *bs, PrefixCode *code);
|
||||
|
||||
#endif /* PREFIXCODE_H */
|
||||
Reference in New Issue
Block a user