diff --git a/CMakeLists.txt b/CMakeLists.txt index 0a6d8a9..892de39 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -129,21 +129,10 @@ endif() add_subdirectory(3rdparty) add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h adc.c adc.h lzip.c flac.c flac.h - zoo/lzd.c - zoo/lzd.h - zoo/lzh.c - zoo/decode.c - zoo/huf.c - zoo/io.c - zoo/lh5.h - zoo/lh5.c - zoo/lzh.h - zoo/ar.h - zoo/maketbl.c - arc/pack.c - arc/squeeze.c - arc/crunch.c - arc/lzw.c) + zoo/lzd.c zoo/lzd.h zoo/lzh.c zoo/decode.c zoo/huf.c zoo/io.c zoo/lh5.c zoo/lh5.h zoo/lzh.h zoo/ar.h zoo/maketbl.c + arc/pack.c arc/squeeze.c arc/crunch.c arc/lzw.c + pak/crush.c pak/distill.c pak/bitstream.c pak/bitstream.h pak/lzw.c pak/lzw.h pak/prefixcode.c + pak/prefixcode.h) include(3rdparty/bzip2.cmake) include(3rdparty/flac.cmake) diff --git a/library.h b/library.h index 0ab8dcd..5b2f375 100644 --- a/library.h +++ b/library.h @@ -124,22 +124,30 @@ AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_ // ARC method 4: Huffman squeezing AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); -// Method 5: LZW (crunching) +// ARC Method 5: LZW (crunching) AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); -// Method 6: LZW with non-repeat packing (crunching) +// ARC Method 6: LZW with non-repeat packing (crunching) AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); -// Method 7: LZW with non-repeat packing and new hash (Crunching) +// ARC Method 7: LZW with non-repeat packing and new hash (Crunching) AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); -// Method 8: Dynamic LZW (crunching) +// ARC Method 8: Dynamic LZW (crunching) AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); -// Method 9: Dynamic LZW with 13 bits (squashing) +// ARC Method 9: Dynamic LZW with 13 bits (squashing) AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len); +// ARC/PAK Method 10: LZW (crush) (unsure why it's different of the others but even XADMaster uses different codepaths) +AARU_EXPORT int AARU_CALL pak_decompress_crush(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len); + +// ARC/PAK Method 11: LZSS (distill) +AARU_EXPORT int AARU_CALL pak_decompress_distill(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len); + #endif // AARU_COMPRESSION_NATIVE_LIBRARY_H diff --git a/pak/bitstream.c b/pak/bitstream.c new file mode 100644 index 0000000..dd9ea69 --- /dev/null +++ b/pak/bitstream.c @@ -0,0 +1,166 @@ +/* + * bitstream.c - Bit stream input implementation + * + * Copyright (c) 2017-present, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include "bitstream.h" + +void bitstream_init(BitStream *bs, const uint8_t *data, size_t length) +{ + bs->data = data; + bs->length = length; + bs->pos = 0; + bs->bitbuffer = 0; + bs->bitcount = 0; + bs->eof = false; +} + +static void bitstream_fill_buffer(BitStream *bs) +{ + while(bs->bitcount < 24 && bs->pos < bs->length) + { + bs->bitbuffer |= (uint32_t)bs->data[bs->pos] << (24 - bs->bitcount); + bs->bitcount += 8; + bs->pos++; + } + if(bs->pos >= bs->length && bs->bitcount == 0) { bs->eof = true; } +} + +uint32_t bitstream_read_bit(BitStream *bs) +{ + if(bs->eof) return 0; + + if(bs->bitcount == 0) { bitstream_fill_buffer(bs); } + + if(bs->bitcount == 0) + { + bs->eof = true; + return 0; + } + + uint32_t bit = (bs->bitbuffer >> 31) & 1; + bs->bitbuffer <<= 1; + bs->bitcount--; + + return bit; +} + +uint32_t bitstream_read_bit_le(BitStream *bs) +{ + if(bs->eof) return 0; + + if(bs->bitcount == 0) + { + if(bs->pos >= bs->length) + { + bs->eof = true; + return 0; + } + bs->bitbuffer = bs->data[bs->pos++]; + bs->bitcount = 8; + } + + uint32_t bit = bs->bitbuffer & 1; + bs->bitbuffer >>= 1; + bs->bitcount--; + + return bit; +} + +uint32_t bitstream_read_bits(BitStream *bs, int count) +{ + uint32_t result = 0; + for(int i = 0; i < count; i++) { result = (result << 1) | bitstream_read_bit(bs); } + return result; +} + +uint32_t bitstream_read_bits_le(BitStream *bs, int count) +{ + uint32_t result = 0; + for(int i = 0; i < count; i++) { result |= bitstream_read_bit_le(bs) << i; } + return result; +} + +uint32_t bitstream_peek_bits(BitStream *bs, int count) +{ + // Save current state + uint32_t saved_buffer = bs->bitbuffer; + int saved_bitcount = bs->bitcount; + size_t saved_pos = bs->pos; + bool saved_eof = bs->eof; + + // Read the bits + uint32_t result = bitstream_read_bits(bs, count); + + // Restore state + bs->bitbuffer = saved_buffer; + bs->bitcount = saved_bitcount; + bs->pos = saved_pos; + bs->eof = saved_eof; + + return result; +} + +uint32_t bitstream_peek_bits_le(BitStream *bs, int count) +{ + // Save current state + uint32_t saved_buffer = bs->bitbuffer; + int saved_bitcount = bs->bitcount; + size_t saved_pos = bs->pos; + bool saved_eof = bs->eof; + + // Read the bits + uint32_t result = bitstream_read_bits_le(bs, count); + + // Restore state + bs->bitbuffer = saved_buffer; + bs->bitcount = saved_bitcount; + bs->pos = saved_pos; + bs->eof = saved_eof; + + return result; +} + +void bitstream_skip_bits(BitStream *bs, int count) { bitstream_read_bits(bs, count); } + +void bitstream_skip_bits_le(BitStream *bs, int count) { bitstream_read_bits_le(bs, count); } + +uint8_t bitstream_read_byte(BitStream *bs) +{ + if(bs->pos >= bs->length) + { + bs->eof = true; + return 0; + } + return bs->data[bs->pos++]; +} + +uint16_t bitstream_read_uint16_le(BitStream *bs) +{ + if(bs->pos + 1 >= bs->length) + { + bs->eof = true; + return 0; + } + uint16_t result = bs->data[bs->pos] | (bs->data[bs->pos + 1] << 8); + bs->pos += 2; + return result; +} + +bool bitstream_eof(BitStream *bs) { return bs->eof; } diff --git a/pak/bitstream.h b/pak/bitstream.h new file mode 100644 index 0000000..08c77d5 --- /dev/null +++ b/pak/bitstream.h @@ -0,0 +1,75 @@ +/* + * bitstream.h - Bit stream input implementation + * + * Copyright (c) 2017-present, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#ifndef BITSTREAM_H +#define BITSTREAM_H + +#include +#include +#include + +typedef struct BitStream +{ + const uint8_t *data; + size_t length; + size_t pos; + uint32_t bitbuffer; + int bitcount; + bool eof; +} BitStream; + +// Initialize bit stream +void bitstream_init(BitStream *bs, const uint8_t *data, size_t length); + +// Read a single bit (MSB first) +uint32_t bitstream_read_bit(BitStream *bs); + +// Read a single bit (LSB first) +uint32_t bitstream_read_bit_le(BitStream *bs); + +// Read multiple bits (MSB first) +uint32_t bitstream_read_bits(BitStream *bs, int count); + +// Read multiple bits (LSB first) +uint32_t bitstream_read_bits_le(BitStream *bs, int count); + +// Peek at bits without consuming them (MSB first) +uint32_t bitstream_peek_bits(BitStream *bs, int count); + +// Peek at bits without consuming them (LSB first) +uint32_t bitstream_peek_bits_le(BitStream *bs, int count); + +// Skip previously peeked bits (MSB first) +void bitstream_skip_bits(BitStream *bs, int count); + +// Skip previously peeked bits (LSB first) +void bitstream_skip_bits_le(BitStream *bs, int count); + +// Read a byte +uint8_t bitstream_read_byte(BitStream *bs); + +// Read a 16-bit little endian integer +uint16_t bitstream_read_uint16_le(BitStream *bs); + +// Check if end of stream reached +bool bitstream_eof(BitStream *bs); + +#endif /* BITSTREAM_H */ diff --git a/pak/crush.c b/pak/crush.c new file mode 100644 index 0000000..125960b --- /dev/null +++ b/pak/crush.c @@ -0,0 +1,219 @@ +/* + * arc_crush.c - ARC Crush decompression algorithm + * + * Copyright (c) 2017-present, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include +#include +#include +#include +#include "../library.h" +#include "bitstream.h" +#include "lzw.h" + +int pak_decompress_crush_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len) +{ + if(!in_buf || !out_buf || !out_len || in_len == 0) { return -1; } + + BitStream bs; + bitstream_init(&bs, (const uint8_t *)in_buf, in_len); + + LZW *lzw = lzw_alloc(8192, 1); + if(!lzw) { return -1; } + + // Initialize state + int symbolsize = 1; + int nextsizebump = 2; + bool useliteralbit = true; + + int numrecentstrings = 0; + int ringindex = 0; + bool stringring[500]; + memset(stringring, 0, sizeof(stringring)); + + int usageindex = 0x101; + uint8_t usage[8192]; + memset(usage, 0, sizeof(usage)); + + int currbyte = 0; + uint8_t buffer[8192]; + size_t outpos = 0; + size_t max_output = *out_len; + + while(!bitstream_eof(&bs) && outpos < max_output) + { + if(!currbyte) + { + // Read the next symbol. How depends on the mode we are operating in. + int symbol; + if(useliteralbit) + { + // Use codes prefixed by a bit that selects literal or string codes. + // Literals are always 8 bits, strings vary. + if(bitstream_read_bit_le(&bs)) { symbol = bitstream_read_bits_le(&bs, symbolsize) + 256; } + else { symbol = bitstream_read_bits_le(&bs, 8); } + } + else + { + // Use same-length codes for both literals and strings. + // Due to an optimization quirk in the original decruncher, + // literals have their bits inverted. + symbol = bitstream_read_bits_le(&bs, symbolsize); + if(symbol < 0x100) symbol ^= 0xff; + } + + // Code 0x100 is the EOF code. + if(symbol == 0x100) { break; } + + // Walk through the LZW tree, and set the usage count of the current + // string and all its parents to 4. This is not necessary for literals, + // but we do it anyway for simplicity. + LZWTreeNode *nodes = lzw_symbols(lzw); + int marksymbol = symbol; + while(marksymbol >= 0) + { + if(marksymbol < 8192) { usage[marksymbol] = 4; } + marksymbol = nodes[marksymbol].parent; + } + + // Adjust the count of recent strings versus literals. + // Use a ring buffer of length 500 as a window to keep track + // of how many strings have been encountered lately. + + // First, decrease the count if a string leaves the window. + if(stringring[ringindex]) numrecentstrings--; + + // Then store the current type of symbol in the window, and + // increase the count if the current symbol is a string. + if(symbol < 0x100) { stringring[ringindex] = false; } + else + { + stringring[ringindex] = true; + numrecentstrings++; + } + + // Move the window forward. + ringindex = (ringindex + 1) % 500; + + // Check the number of strings. If there have been many literals + // lately, bit-prefixed codes should be used. If we need to change + // mode, re-calculate the point where we increase the code length. + bool manyliterals = numrecentstrings < 375; + if(manyliterals != useliteralbit) + { + useliteralbit = manyliterals; + nextsizebump = 1 << symbolsize; + if(!useliteralbit) nextsizebump -= 0x100; + } + + // Update the LZW tree. + if(!lzw_symbol_list_full(lzw)) + { + // If there is space in the tree, just add a new string as usual. + if(lzw_next_symbol(lzw, symbol) != LZW_NO_ERROR) + { + lzw_free(lzw); + return -1; + } + + // Set the usage count of the newly created entry. + int count = lzw_symbol_count(lzw); + if(count > 0 && count - 1 < 8192) { usage[count - 1] = 2; } + } + else + { + // If the tree is full, find a less-used symbol, and replace it. + int minindex = 0, minusage = INT_MAX; + int index = usageindex; + do { + index++; + if(index == 8192) index = 0x101; + + if(usage[index] < minusage) + { + minindex = index; + minusage = usage[index]; + } + + usage[index]--; + if(usage[index] == 0) break; + } while(index != usageindex); + + usageindex = index; + + if(lzw_replace_symbol(lzw, minindex, symbol) != LZW_NO_ERROR) + { + lzw_free(lzw); + return -1; + } + + // Set the usage count of the replaced entry. + if(minindex < 8192) { usage[minindex] = 2; } + } + + // Extract the data to output. + currbyte = lzw_reverse_output_to_buffer(lzw, buffer); + + // Check if we need to increase the code size. The point at which + // to increase varies depending on the coding mode. + if(lzw_symbol_count(lzw) - 257 >= nextsizebump) + { + symbolsize++; + nextsizebump = 1 << symbolsize; + if(!useliteralbit) nextsizebump -= 0x100; + } + } + + if(currbyte > 0 && outpos < max_output) { out_buf[outpos++] = (char)buffer[--currbyte]; } + else if(currbyte == 0) + { + // No more bytes in buffer, continue to next symbol + continue; + } + else + { + // Output buffer full + break; + } + } + + lzw_free(lzw); + *out_len = outpos; + return 0; +} + +AARU_EXPORT int AARU_CALL pak_decompress_crush(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len) +{ + // Allocate a temporary buffer. + size_t temp_len = *out_len * 2; // Heuristic. + unsigned char *temp_buf = malloc(temp_len); + if(!temp_buf) return -1; + + // Decompress crunched data. + int result = pak_decompress_crush_internal(in_buf, in_len, temp_buf, &temp_len); + if(result == 0) + { + // Decompress non-repeat packing. + result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); + } + + free(temp_buf); + return result; +} \ No newline at end of file diff --git a/pak/distill.c b/pak/distill.c new file mode 100644 index 0000000..4196bb8 --- /dev/null +++ b/pak/distill.c @@ -0,0 +1,182 @@ +/* + * arc_distill.c - ARC Distill decompression algorithm + * + * Copyright (c) 2017-present, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include +#include +#include +#include "../library.h" +#include "bitstream.h" +#include "prefixcode.h" + +static const int offset_lengths[0x40] = { + 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, + 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, +}; + +static const int offset_codes[0x40] = { + 0x00, 0x02, 0x04, 0x0c, 0x01, 0x06, 0x0a, 0x0e, 0x11, 0x16, 0x1a, 0x1e, 0x05, 0x09, 0x0d, 0x15, + 0x19, 0x1d, 0x25, 0x29, 0x2d, 0x35, 0x39, 0x3d, 0x03, 0x07, 0x0b, 0x13, 0x17, 0x1b, 0x23, 0x27, + 0x2b, 0x33, 0x37, 0x3b, 0x43, 0x47, 0x4b, 0x53, 0x57, 0x5b, 0x63, 0x67, 0x6b, 0x73, 0x77, 0x7b, + 0x0f, 0x1f, 0x2f, 0x3f, 0x4f, 0x5f, 0x6f, 0x7f, 0x8f, 0x9f, 0xaf, 0xbf, 0xcf, 0xdf, 0xef, 0xff, +}; + +static void build_code_from_tree(PrefixCode *code, int *tree, int node, int numnodes, int depth) +{ + if(depth > 64) + { + // Too deep - error + return; + } + + if(node >= numnodes) { prefix_code_make_leaf_with_value(code, node - numnodes); } + else + { + prefix_code_start_zero_branch(code); + build_code_from_tree(code, tree, tree[node], numnodes, depth + 1); + prefix_code_start_one_branch(code); + build_code_from_tree(code, tree, tree[node + 1], numnodes, depth + 1); + prefix_code_finish_branches(code); + } +} + +AARU_EXPORT int AARU_CALL pak_decompress_distill(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, + size_t *out_len) +{ + if(!in_buf || !out_buf || !out_len || in_len == 0) { return -1; } + + BitStream bs; + bitstream_init(&bs, (const uint8_t *)in_buf, in_len); + + // Read header information + int numnodes = bitstream_read_uint16_le(&bs); + int codelength = bitstream_read_byte(&bs); + + if(numnodes < 2 || numnodes > 0x274) { return -1; } + + // Read tree nodes + int *nodes = malloc(numnodes * sizeof(int)); + if(!nodes) { return -1; } + + for(int i = 0; i < numnodes; i++) { nodes[i] = bitstream_read_bits_le(&bs, codelength); } + + // Build main code tree + PrefixCode *maincode = prefix_code_alloc(); + if(!maincode) + { + free(nodes); + return -1; + } + + prefix_code_start_building_tree(maincode); + build_code_from_tree(maincode, nodes, numnodes - 2, numnodes, 0); + + free(nodes); + + // Build offset code tree + PrefixCode *offsetcode = prefix_code_alloc(); + if(!offsetcode) + { + prefix_code_free(maincode); + return -1; + } + + for(int i = 0; i < 0x40; i++) + { + if(prefix_code_add_value_low_bit_first(offsetcode, i, offset_codes[i], offset_lengths[i]) != PREFIX_CODE_OK) + { + prefix_code_free(maincode); + prefix_code_free(offsetcode); + return -1; + } + } + + // LZSS decompression + uint8_t window[8192]; + memset(window, 0, sizeof(window)); + int windowpos = 0; + size_t outpos = 0; + size_t max_output = *out_len; + + while(!bitstream_eof(&bs) && outpos < max_output) + { + int symbol = prefix_code_read_symbol_le(&bs, maincode); + if(symbol < 0) break; + + if(symbol < 256) + { + // Literal byte + if(outpos < max_output) { out_buf[outpos++] = (char)symbol; } + window[windowpos] = symbol; + windowpos = (windowpos + 1) & 0x1fff; + } + else if(symbol == 256) + { + // End of stream + break; + } + else + { + // Match + int length = symbol - 0x101 + 3; + int offsetsymbol = prefix_code_read_symbol_le(&bs, offsetcode); + if(offsetsymbol < 0) break; + + int extralength; + if(outpos >= 0x1000 - 0x3c) + extralength = 7; + else if(outpos >= 0x800 - 0x3c) + extralength = 6; + else if(outpos >= 0x400 - 0x3c) + extralength = 5; + else if(outpos >= 0x200 - 0x3c) + extralength = 4; + else if(outpos >= 0x100 - 0x3c) + extralength = 3; + else if(outpos >= 0x80 - 0x3c) + extralength = 2; + else if(outpos >= 0x40 - 0x3c) + extralength = 1; + else + extralength = 0; + + int extrabits = bitstream_read_bits_le(&bs, extralength); + int offset = (offsetsymbol << extralength) + extrabits + 1; + + // Copy match + for(int i = 0; i < length; i++) + { + int sourcepos = (windowpos - offset) & 0x1fff; + uint8_t byte = window[sourcepos]; + + if(outpos < max_output) { out_buf[outpos++] = (char)byte; } + + window[windowpos] = byte; + windowpos = (windowpos + 1) & 0x1fff; + } + } + } + + prefix_code_free(maincode); + prefix_code_free(offsetcode); + + *out_len = outpos; + return 0; +} diff --git a/pak/lzw.c b/pak/lzw.c new file mode 100644 index 0000000..645ad76 --- /dev/null +++ b/pak/lzw.c @@ -0,0 +1,162 @@ +/* + * lzw.c - LZW decompression implementation + * + * Copyright (c) 2017-present, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include "lzw.h" +#include + +LZW *lzw_alloc(int maxsymbols, int reservedsymbols) +{ + LZW *self = (LZW *)malloc(sizeof(LZW) + sizeof(LZWTreeNode) * maxsymbols); + if(!self) return NULL; + + if(maxsymbols < 256 + reservedsymbols) + { + free(self); + return NULL; + } + + self->maxsymbols = maxsymbols; + self->reservedsymbols = reservedsymbols; + + self->buffer = NULL; + self->buffersize = 0; + + for(int i = 0; i < 256; i++) + { + self->nodes[i].chr = i; + self->nodes[i].parent = -1; + } + + lzw_clear_table(self); + + return self; +} + +void lzw_free(LZW *self) +{ + if(self) + { + free(self->buffer); + free(self); + } +} + +void lzw_clear_table(LZW *self) +{ + self->numsymbols = 256 + self->reservedsymbols; + self->prevsymbol = -1; + self->symbolsize = 9; // TODO: technically this depends on reservedsymbols +} + +static uint8_t find_first_byte(LZWTreeNode *nodes, int symbol) +{ + while(nodes[symbol].parent >= 0) symbol = nodes[symbol].parent; + return nodes[symbol].chr; +} + +int lzw_next_symbol(LZW *self, int symbol) +{ + if(self->prevsymbol < 0) + { + if(symbol >= self->numsymbols) return LZW_INVALID_CODE_ERROR; + self->prevsymbol = symbol; + return LZW_NO_ERROR; + } + + int postfixbyte; + if(symbol < self->numsymbols) { postfixbyte = find_first_byte(self->nodes, symbol); } + else if(symbol == self->numsymbols) { postfixbyte = find_first_byte(self->nodes, self->prevsymbol); } + else { return LZW_INVALID_CODE_ERROR; } + + int parent = self->prevsymbol; + self->prevsymbol = symbol; + + if(!lzw_symbol_list_full(self)) + { + self->nodes[self->numsymbols].parent = parent; + self->nodes[self->numsymbols].chr = postfixbyte; + self->numsymbols++; + + if(!lzw_symbol_list_full(self)) + { + if((self->numsymbols & (self->numsymbols - 1)) == 0) { self->symbolsize++; } + } + + return LZW_NO_ERROR; + } + else { return LZW_TOO_MANY_CODES_ERROR; } +} + +int lzw_replace_symbol(LZW *self, int oldsymbol, int symbol) +{ + if(symbol >= self->numsymbols) return LZW_INVALID_CODE_ERROR; + + self->nodes[oldsymbol].parent = self->prevsymbol; + self->nodes[oldsymbol].chr = find_first_byte(self->nodes, symbol); + + self->prevsymbol = symbol; + + return LZW_NO_ERROR; +} + +int lzw_output_length(LZW *self) +{ + int symbol = self->prevsymbol; + int n = 0; + + while(symbol >= 0) + { + symbol = self->nodes[symbol].parent; + n++; + } + + return n; +} + +int lzw_output_to_buffer(LZW *self, uint8_t *buffer) +{ + int symbol = self->prevsymbol; + int n = lzw_output_length(self); + buffer += n; + + while(symbol >= 0) + { + *--buffer = self->nodes[symbol].chr; + symbol = self->nodes[symbol].parent; + } + + return n; +} + +int lzw_reverse_output_to_buffer(LZW *self, uint8_t *buffer) +{ + int symbol = self->prevsymbol; + int n = 0; + + while(symbol >= 0) + { + *buffer++ = self->nodes[symbol].chr; + symbol = self->nodes[symbol].parent; + n++; + } + + return n; +} diff --git a/pak/lzw.h b/pak/lzw.h new file mode 100644 index 0000000..3a22a76 --- /dev/null +++ b/pak/lzw.h @@ -0,0 +1,83 @@ +/* + * lzw.h - LZW decompression implementation + * + * Copyright (c) 2017-present, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#ifndef LZW_H +#define LZW_H + +#include +#include + +#define LZW_NO_ERROR 0 +#define LZW_INVALID_CODE_ERROR 1 +#define LZW_TOO_MANY_CODES_ERROR 2 + +typedef struct LZWTreeNode +{ + uint8_t chr; + int parent; +} LZWTreeNode; + +typedef struct LZW +{ + int numsymbols; + int maxsymbols; + int reservedsymbols; + int prevsymbol; + int symbolsize; + + uint8_t *buffer; + int buffersize; + + LZWTreeNode nodes[]; // Flexible array member (C99) +} LZW; + +// Allocate LZW structure +LZW *lzw_alloc(int maxsymbols, int reservedsymbols); + +// Free LZW structure +void lzw_free(LZW *self); + +// Clear/reset LZW table +void lzw_clear_table(LZW *self); + +// Process next symbol +int lzw_next_symbol(LZW *self, int symbol); + +// Replace a symbol +int lzw_replace_symbol(LZW *self, int oldsymbol, int symbol); + +// Get output length +int lzw_output_length(LZW *self); + +// Output to buffer (normal order) +int lzw_output_to_buffer(LZW *self, uint8_t *buffer); + +// Output to buffer (reverse order) +int lzw_reverse_output_to_buffer(LZW *self, uint8_t *buffer); + +// Inline helper functions +static inline int lzw_symbol_count(LZW *self) { return self->numsymbols; } + +static inline bool lzw_symbol_list_full(LZW *self) { return self->numsymbols == self->maxsymbols; } + +static inline LZWTreeNode *lzw_symbols(LZW *self) { return self->nodes; } + +#endif /* LZW_H */ diff --git a/pak/prefixcode.c b/pak/prefixcode.c new file mode 100644 index 0000000..67392d1 --- /dev/null +++ b/pak/prefixcode.c @@ -0,0 +1,539 @@ +/* + * prefixcode.c - Prefix code tree implementation + * + * Copyright (c) 2017-pstatic inline bool is_invalid_node(PrefixCode *self, int node) { + (void)self; // Suppress unused parameter warning + return (node < 0); +}ent, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#include "prefixcode.h" +#include +#include +#include + +// Safe realloc that frees original pointer on failure +static void *safe_realloc(void *ptr, size_t newsize) +{ + void *newptr = realloc(ptr, newsize); + if(!newptr && newsize > 0) + { + free(ptr); + return NULL; + } + return newptr; +} + +// Inline helper functions +static inline CodeTreeNode *node_pointer(PrefixCode *self, int node) { return &self->tree[node]; } + +static inline int branch(PrefixCode *self, int node, int bit) { return node_pointer(self, node)->branches[bit]; } + +static inline void set_branch(PrefixCode *self, int node, int bit, int nextnode) +{ + node_pointer(self, node)->branches[bit] = nextnode; +} + +static inline int left_branch(PrefixCode *self, int node) { return branch(self, node, 0); } + +static inline int right_branch(PrefixCode *self, int node) { return branch(self, node, 1); } + +static inline void set_left_branch(PrefixCode *self, int node, int nextnode) { set_branch(self, node, 0, nextnode); } + +static inline void set_right_branch(PrefixCode *self, int node, int nextnode) { set_branch(self, node, 1, nextnode); } + +static inline int leaf_value(PrefixCode *self, int node) { return left_branch(self, node); } + +static inline void set_leaf_value(PrefixCode *self, int node, int value) +{ + set_left_branch(self, node, value); + set_right_branch(self, node, value); +} + +static inline void set_empty_node(PrefixCode *self, int node) +{ + set_left_branch(self, node, -1); + set_right_branch(self, node, -2); +} + +static inline bool is_invalid_node(PrefixCode *self, int node) { return node < 0; } + +static inline bool is_open_branch(PrefixCode *self, int node, int bit) +{ + return is_invalid_node(self, branch(self, node, bit)); +} + +static inline bool is_empty_node(PrefixCode *self, int node) +{ + return left_branch(self, node) == -1 && right_branch(self, node) == -2; +} + +static inline bool is_leaf_node(PrefixCode *self, int node) +{ + return left_branch(self, node) == right_branch(self, node); +} + +static int new_node(PrefixCode *self) +{ + CodeTreeNode *newtree = safe_realloc(self->tree, (self->numentries + 1) * sizeof(CodeTreeNode)); + if(!newtree) return -1; + + self->tree = newtree; + set_empty_node(self, self->numentries); + return self->numentries++; +} + +// Stack implementation for tree building +static PrefixCodeStack *prefix_code_stack_alloc(void) +{ + PrefixCodeStack *stack = malloc(sizeof(PrefixCodeStack)); + if(!stack) return NULL; + + stack->data = malloc(16 * sizeof(int)); + if(!stack->data) + { + free(stack); + return NULL; + } + + stack->count = 0; + stack->capacity = 16; + return stack; +} + +static void prefix_code_stack_free(PrefixCodeStack *stack) +{ + if(!stack) return; + free(stack->data); + free(stack); +} + +static int prefix_code_stack_push(PrefixCodeStack *stack, int value) +{ + if(stack->count >= stack->capacity) + { + int newcapacity = stack->capacity * 2; + int *newdata = safe_realloc(stack->data, newcapacity * sizeof(int)); + if(!newdata) return -1; + + stack->data = newdata; + stack->capacity = newcapacity; + } + + stack->data[stack->count++] = value; + return 0; +} + +static int prefix_code_stack_pop(PrefixCodeStack *stack) +{ + if(stack->count == 0) return -1; + return stack->data[--stack->count]; +} + +static void prefix_code_stack_clear(PrefixCodeStack *stack) { stack->count = 0; } + +// Bit reversal functions +static uint32_t reverse_32(uint32_t val) +{ + val = ((val >> 1) & 0x55555555) | ((val & 0x55555555) << 1); + val = ((val >> 2) & 0x33333333) | ((val & 0x33333333) << 2); + val = ((val >> 4) & 0x0F0F0F0F) | ((val & 0x0F0F0F0F) << 4); + val = ((val >> 8) & 0x00FF00FF) | ((val & 0x00FF00FF) << 8); + return (val >> 16) | (val << 16); +} + +static uint32_t reverse_n(uint32_t val, int length) { return reverse_32(val) >> (32 - length); } + +// Table construction functions +#define TABLE_MAX_SIZE 10 + +static void make_table(PrefixCode *code, int node, CodeTableEntry *table, int depth, int maxdepth) +{ + int currtablesize = 1 << (maxdepth - depth); + + if(is_invalid_node(code, node)) + { + for(int i = 0; i < currtablesize; i++) table[i].length = -1; + } + else if(is_leaf_node(code, node)) + { + for(int i = 0; i < currtablesize; i++) + { + table[i].length = depth; + table[i].value = leaf_value(code, node); + } + } + else + { + if(depth == maxdepth) + { + table[0].length = maxdepth + 1; + table[0].value = node; + } + else + { + make_table(code, left_branch(code, node), table, depth + 1, maxdepth); + make_table(code, right_branch(code, node), table + currtablesize / 2, depth + 1, maxdepth); + } + } +} + +static void make_table_le(PrefixCode *code, int node, CodeTableEntry *table, int depth, int maxdepth) +{ + int currtablesize = 1 << (maxdepth - depth); + int currstride = 1 << depth; + + if(is_invalid_node(code, node)) + { + for(int i = 0; i < currtablesize; i++) table[i * currstride].length = -1; + } + else if(is_leaf_node(code, node)) + { + for(int i = 0; i < currtablesize; i++) + { + table[i * currstride].length = depth; + table[i * currstride].value = leaf_value(code, node); + } + } + else + { + if(depth == maxdepth) + { + table[0].length = maxdepth + 1; + table[0].value = node; + } + else + { + make_table_le(code, left_branch(code, node), table, depth + 1, maxdepth); + make_table_le(code, right_branch(code, node), table + currstride, depth + 1, maxdepth); + } + } +} + +static int prefix_code_make_table(PrefixCode *self) +{ + if(self->table1) return PREFIX_CODE_OK; + + if(self->maxlength < self->minlength) + self->tablesize = TABLE_MAX_SIZE; // no code lengths recorded + else if(self->maxlength >= TABLE_MAX_SIZE) + self->tablesize = TABLE_MAX_SIZE; + else + self->tablesize = self->maxlength; + + self->table1 = malloc(sizeof(CodeTableEntry) * (1 << self->tablesize)); + if(!self->table1) return PREFIX_CODE_INVALID; + + make_table(self, 0, self->table1, 0, self->tablesize); + return PREFIX_CODE_OK; +} + +static int prefix_code_make_table_le(PrefixCode *self) +{ + if(self->table2) return PREFIX_CODE_OK; + + if(self->maxlength < self->minlength) + self->tablesize = TABLE_MAX_SIZE; // no code lengths recorded + else if(self->maxlength >= TABLE_MAX_SIZE) + self->tablesize = TABLE_MAX_SIZE; + else + self->tablesize = self->maxlength; + + self->table2 = malloc(sizeof(CodeTableEntry) * (1 << self->tablesize)); + if(!self->table2) return PREFIX_CODE_INVALID; + + make_table_le(self, 0, self->table2, 0, self->tablesize); + return PREFIX_CODE_OK; +} + +// Public functions + +PrefixCode *prefix_code_alloc(void) +{ + PrefixCode *self = malloc(sizeof(PrefixCode)); + if(!self) return NULL; + + self->tree = malloc(sizeof(CodeTreeNode)); + if(!self->tree) + { + free(self); + return NULL; + } + + set_empty_node(self, 0); + self->numentries = 1; + self->minlength = INT_MAX; + self->maxlength = INT_MIN; + self->isstatic = false; + + self->stack = NULL; + self->table1 = self->table2 = NULL; + self->tablesize = 0; + self->currnode = 0; + + return self; +} + +PrefixCode *prefix_code_alloc_with_static_table(int (*statictable)[2]) +{ + PrefixCode *self = malloc(sizeof(PrefixCode)); + if(!self) return NULL; + + self->tree = (CodeTreeNode *)statictable; // TODO: fix the ugly cast + self->isstatic = true; + + self->stack = NULL; + self->table1 = self->table2 = NULL; + self->tablesize = 0; + self->currnode = 0; + self->numentries = 0; + self->minlength = INT_MAX; + self->maxlength = INT_MIN; + + return self; +} + +PrefixCode *prefix_code_alloc_with_lengths(const int *lengths, int numsymbols, int maxcodelength, bool zeros) +{ + PrefixCode *self = prefix_code_alloc(); + if(!self) return NULL; + + int code = 0, symbolsleft = numsymbols; + + for(int length = 1; length <= maxcodelength; length++) + { + for(int i = 0; i < numsymbols; i++) + { + if(lengths[i] != length) continue; + // Instead of reversing to get a low-bit-first code, we shift and use high-bit-first. + int result; + if(zeros) { result = prefix_code_add_value_high_bit_first(self, i, code, length); } + else { result = prefix_code_add_value_high_bit_first(self, i, ~code, length); } + if(result != PREFIX_CODE_OK) + { + prefix_code_free(self); + return NULL; + } + code++; + if(--symbolsleft == 0) return self; // early exit if all codes have been handled + } + code <<= 1; + } + + return self; +} + +void prefix_code_free(PrefixCode *self) +{ + if(!self) return; + + if(!self->isstatic) free(self->tree); + free(self->table1); + free(self->table2); + if(self->stack) prefix_code_stack_free(self->stack); + free(self); +} + +int prefix_code_add_value_high_bit_first(PrefixCode *self, int value, uint32_t code, int length) +{ + return prefix_code_add_value_high_bit_first_repeat(self, value, code, length, length); +} + +int prefix_code_add_value_high_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos) +{ + if(!self || self->isstatic) return PREFIX_CODE_INVALID; + + free(self->table1); + free(self->table2); + self->table1 = self->table2 = NULL; + + if(length > self->maxlength) self->maxlength = length; + if(length < self->minlength) self->minlength = length; + + repeatpos = length - 1 - repeatpos; + if(repeatpos == 0 || + (repeatpos >= 0 && (((code >> (repeatpos - 1)) & 3) == 0 || ((code >> (repeatpos - 1)) & 3) == 3))) + { + return PREFIX_CODE_INVALID; + } + + int lastnode = 0; + for(int bitpos = length - 1; bitpos >= 0; bitpos--) + { + int bit = (code >> bitpos) & 1; + + if(is_leaf_node(self, lastnode)) return PREFIX_CODE_INVALID; + + if(bitpos == repeatpos) + { + if(!is_open_branch(self, lastnode, bit)) return PREFIX_CODE_INVALID; + + int repeatnode = new_node(self); + int nextnode = new_node(self); + if(repeatnode < 0 || nextnode < 0) return PREFIX_CODE_INVALID; + + set_branch(self, lastnode, bit, repeatnode); + set_branch(self, repeatnode, bit, repeatnode); + set_branch(self, repeatnode, bit ^ 1, nextnode); + lastnode = nextnode; + + bitpos++; // terminating bit already handled, skip it + } + else + { + if(is_open_branch(self, lastnode, bit)) + { + int newnode = new_node(self); + if(newnode < 0) return PREFIX_CODE_INVALID; + set_branch(self, lastnode, bit, newnode); + } + lastnode = branch(self, lastnode, bit); + } + } + + if(!is_empty_node(self, lastnode)) return PREFIX_CODE_INVALID; + set_leaf_value(self, lastnode, value); + return PREFIX_CODE_OK; +} + +int prefix_code_add_value_low_bit_first(PrefixCode *self, int value, uint32_t code, int length) +{ + return prefix_code_add_value_high_bit_first(self, value, reverse_n(code, length), length); +} + +int prefix_code_add_value_low_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos) +{ + return prefix_code_add_value_high_bit_first_repeat(self, value, reverse_n(code, length), length, repeatpos); +} + +void prefix_code_start_building_tree(PrefixCode *self) +{ + if(!self) return; + + self->currnode = 0; + if(!self->stack) { self->stack = prefix_code_stack_alloc(); } + else { prefix_code_stack_clear(self->stack); } +} + +void prefix_code_start_zero_branch(PrefixCode *self) +{ + if(!self) return; + + int new = new_node(self); + if(new < 0) return; + + set_branch(self, self->currnode, 0, new); + prefix_code_stack_push(self->stack, self->currnode); + self->currnode = new; +} + +void prefix_code_start_one_branch(PrefixCode *self) +{ + if(!self) return; + + int new = new_node(self); + if(new < 0) return; + + set_branch(self, self->currnode, 1, new); + prefix_code_stack_push(self->stack, self->currnode); + self->currnode = new; +} + +void prefix_code_finish_branches(PrefixCode *self) +{ + if(!self || !self->stack) return; + + int node = prefix_code_stack_pop(self->stack); + if(node >= 0) self->currnode = node; +} + +void prefix_code_make_leaf_with_value(PrefixCode *self, int value) +{ + if(!self) return; + + set_leaf_value(self, self->currnode, value); + prefix_code_finish_branches(self); +} + +// BitStream interface functions + +int prefix_code_read_symbol(BitStream *bs, PrefixCode *code) +{ + if(!code) return PREFIX_CODE_INVALID; + if(!code->table1) + { + if(prefix_code_make_table(code) != PREFIX_CODE_OK) return PREFIX_CODE_INVALID; + } + + int bits = bitstream_peek_bits(bs, code->tablesize); + + int length = code->table1[bits].length; + int value = code->table1[bits].value; + + if(length < 0) return PREFIX_CODE_INVALID; + + if(length <= code->tablesize) + { + bitstream_skip_bits(bs, length); + return value; + } + + bitstream_skip_bits(bs, code->tablesize); + + int node = value; + while(!is_leaf_node(code, node)) + { + int bit = bitstream_read_bit(bs); + if(is_open_branch(code, node, bit)) return PREFIX_CODE_INVALID; + node = branch(code, node, bit); + } + return leaf_value(code, node); +} + +int prefix_code_read_symbol_le(BitStream *bs, PrefixCode *code) +{ + if(!code) return PREFIX_CODE_INVALID; + if(!code->table2) + { + if(prefix_code_make_table_le(code) != PREFIX_CODE_OK) return PREFIX_CODE_INVALID; + } + + int bits = bitstream_peek_bits_le(bs, code->tablesize); + + int length = code->table2[bits].length; + int value = code->table2[bits].value; + + if(length < 0) return PREFIX_CODE_INVALID; + + if(length <= code->tablesize) + { + bitstream_skip_bits_le(bs, length); + return value; + } + + bitstream_skip_bits_le(bs, code->tablesize); + + int node = value; + while(!is_leaf_node(code, node)) + { + int bit = bitstream_read_bit_le(bs); + if(is_open_branch(code, node, bit)) return PREFIX_CODE_INVALID; + node = branch(code, node, bit); + } + return leaf_value(code, node); +} diff --git a/pak/prefixcode.h b/pak/prefixcode.h new file mode 100644 index 0000000..4a8b368 --- /dev/null +++ b/pak/prefixcode.h @@ -0,0 +1,89 @@ +/* + * prefixcode.h - Prefix code tree implementation + * + * Copyright (c) 2017-present, MacPaw Inc. All rights reserved. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + * MA 02110-1301 USA + */ + +#ifndef PREFIXCODE_H +#define PREFIXCODE_H + +#include +#include +#include "bitstream.h" + +// Error codes +#define PREFIX_CODE_OK 0 +#define PREFIX_CODE_INVALID -1 + +typedef struct CodeTreeNode +{ + int branches[2]; +} CodeTreeNode; + +typedef struct CodeTableEntry +{ + uint32_t length; + int32_t value; +} CodeTableEntry; + +// Simple stack implementation for tree building +typedef struct PrefixCodeStack +{ + int *data; + int count; + int capacity; +} PrefixCodeStack; + +typedef struct PrefixCode +{ + CodeTreeNode *tree; + int numentries; + int minlength; + int maxlength; + bool isstatic; + + int currnode; + PrefixCodeStack *stack; + + int tablesize; + CodeTableEntry *table1; + CodeTableEntry *table2; +} PrefixCode; + +// Function declarations +PrefixCode *prefix_code_alloc(void); +PrefixCode *prefix_code_alloc_with_lengths(const int *lengths, int numsymbols, int maxlength, bool shortestCodeIsZeros); +PrefixCode *prefix_code_alloc_with_static_table(int (*statictable)[2]); +void prefix_code_free(PrefixCode *self); + +int prefix_code_add_value_high_bit_first(PrefixCode *self, int value, uint32_t code, int length); +int prefix_code_add_value_high_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos); +int prefix_code_add_value_low_bit_first(PrefixCode *self, int value, uint32_t code, int length); +int prefix_code_add_value_low_bit_first_repeat(PrefixCode *self, int value, uint32_t code, int length, int repeatpos); + +void prefix_code_start_building_tree(PrefixCode *self); +void prefix_code_start_zero_branch(PrefixCode *self); +void prefix_code_start_one_branch(PrefixCode *self); +void prefix_code_finish_branches(PrefixCode *self); +void prefix_code_make_leaf_with_value(PrefixCode *self, int value); + +// BitStream interface functions +int prefix_code_read_symbol(BitStream *bs, PrefixCode *code); +int prefix_code_read_symbol_le(BitStream *bs, PrefixCode *code); + +#endif /* PREFIXCODE_H */ diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 103394c..d6e5d5e 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -60,9 +60,17 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunch_dynamic.bin file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsquash.bin DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/pak_crush.bin + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) + +file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/pak_distill.bin + DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) + # 'Google_Tests_run' is the target name # 'test1.cpp tests2.cpp' are source files with tests add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp arc/crunch.cpp - arc/squash.cpp) + arc/squash.cpp + pak/crush.cpp + pak/distill.cpp) target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native") diff --git a/tests/data/pak_crush.bin b/tests/data/pak_crush.bin new file mode 100644 index 0000000..0af589d Binary files /dev/null and b/tests/data/pak_crush.bin differ diff --git a/tests/data/pak_distill.bin b/tests/data/pak_distill.bin new file mode 100644 index 0000000..819ef13 Binary files /dev/null and b/tests/data/pak_distill.bin differ diff --git a/tests/pak/crush.cpp b/tests/pak/crush.cpp new file mode 100644 index 0000000..407c3b9 --- /dev/null +++ b/tests/pak/crush.cpp @@ -0,0 +1,81 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include + +#include "../../library.h" +#include "../crc32.h" +#include "gtest/gtest.h" + +#define EXPECTED_CRC32 0x66007dba + +static const uint8_t *buffer; + +class crushFixture : public ::testing::Test +{ +public: + crushFixture() + { + // initialization; + // can also be done in SetUp() + } + +protected: + void SetUp() + { + char path[PATH_MAX]; + char filename[PATH_MAX]; + + getcwd(path, PATH_MAX); + snprintf(filename, PATH_MAX, "%s/data/pak_crush.bin", path); + + FILE *file = fopen(filename, "rb"); + buffer = (const uint8_t *)malloc(63282); + fread((void *)buffer, 1, 63282, file); + fclose(file); + } + + void TearDown() { free((void *)buffer); } + + ~crushFixture() + { + // resources cleanup, no exceptions allowed + } + + // shared user data +}; + +TEST_F(crushFixture, crush) +{ + size_t destLen = 152089; + size_t srcLen = 63282; + auto *outBuf = (uint8_t *)malloc(152089); + + auto err = pak_decompress_crush(buffer, srcLen, outBuf, &destLen); + + EXPECT_EQ(err, 0); + EXPECT_EQ(destLen, 152089); + + auto crc = crc32_data(outBuf, 152089); + + free(outBuf); + + EXPECT_EQ(crc, EXPECTED_CRC32); +} \ No newline at end of file diff --git a/tests/pak/distill.cpp b/tests/pak/distill.cpp new file mode 100644 index 0000000..00d4882 --- /dev/null +++ b/tests/pak/distill.cpp @@ -0,0 +1,81 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * + * This library is free software; you can redistribute it and/or modify + * it under the terms of the GNU Lesser General Public License as + * published by the Free Software Foundation; either version 2.1 of the + * License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, see . + */ + +#include +#include +#include + +#include "../../library.h" +#include "../crc32.h" +#include "gtest/gtest.h" + +#define EXPECTED_CRC32 0x66007dba + +static const uint8_t *buffer; + +class distillFixture : public ::testing::Test +{ +public: + distillFixture() + { + // initialization; + // can also be done in SetUp() + } + +protected: + void SetUp() + { + char path[PATH_MAX]; + char filename[PATH_MAX]; + + getcwd(path, PATH_MAX); + snprintf(filename, PATH_MAX, "%s/data/pak_distill.bin", path); + + FILE *file = fopen(filename, "rb"); + buffer = (const uint8_t *)malloc(60540); + fread((void *)buffer, 1, 60540, file); + fclose(file); + } + + void TearDown() { free((void *)buffer); } + + ~distillFixture() + { + // resources cleanup, no exceptions allowed + } + + // shared user data +}; + +TEST_F(distillFixture, distill) +{ + size_t destLen = 152089; + size_t srcLen = 60540; + auto *outBuf = (uint8_t *)malloc(152089); + + auto err = pak_decompress_distill(buffer, srcLen, outBuf, &destLen); + + EXPECT_EQ(err, 0); + EXPECT_EQ(destLen, 152089); + + auto crc = crc32_data(outBuf, 152089); + + free(outBuf); + + EXPECT_EQ(crc, EXPECTED_CRC32); +} \ No newline at end of file