mirror of
https://github.com/aaru-dps/Aaru.Compression.Native.git
synced 2025-12-16 19:24:31 +00:00
219 lines
7.5 KiB
C
219 lines
7.5 KiB
C
/*
|
|
* arc_crush.c - ARC Crush decompression algorithm
|
|
*
|
|
* Copyright (c) 2017-present, MacPaw Inc. All rights reserved.
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, write to the Free Software
|
|
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
* MA 02110-1301 USA
|
|
*/
|
|
|
|
#include <limits.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
#include <stdint.h>
|
|
#include "../library.h"
|
|
#include "bitstream.h"
|
|
#include "lzw.h"
|
|
|
|
int pak_decompress_crush_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
|
|
{
|
|
if(!in_buf || !out_buf || !out_len || in_len == 0) { return -1; }
|
|
|
|
BitStream bs;
|
|
bitstream_init(&bs, (const uint8_t *)in_buf, in_len);
|
|
|
|
LZW *lzw = lzw_alloc(8192, 1);
|
|
if(!lzw) { return -1; }
|
|
|
|
// Initialize state
|
|
int symbolsize = 1;
|
|
int nextsizebump = 2;
|
|
bool useliteralbit = true;
|
|
|
|
int numrecentstrings = 0;
|
|
int ringindex = 0;
|
|
bool stringring[500];
|
|
memset(stringring, 0, sizeof(stringring));
|
|
|
|
int usageindex = 0x101;
|
|
uint8_t usage[8192];
|
|
memset(usage, 0, sizeof(usage));
|
|
|
|
int currbyte = 0;
|
|
uint8_t buffer[8192];
|
|
size_t outpos = 0;
|
|
size_t max_output = *out_len;
|
|
|
|
while(!bitstream_eof(&bs) && outpos < max_output)
|
|
{
|
|
if(!currbyte)
|
|
{
|
|
// Read the next symbol. How depends on the mode we are operating in.
|
|
int symbol;
|
|
if(useliteralbit)
|
|
{
|
|
// Use codes prefixed by a bit that selects literal or string codes.
|
|
// Literals are always 8 bits, strings vary.
|
|
if(bitstream_read_bit_le(&bs)) { symbol = bitstream_read_bits_le(&bs, symbolsize) + 256; }
|
|
else { symbol = bitstream_read_bits_le(&bs, 8); }
|
|
}
|
|
else
|
|
{
|
|
// Use same-length codes for both literals and strings.
|
|
// Due to an optimization quirk in the original decruncher,
|
|
// literals have their bits inverted.
|
|
symbol = bitstream_read_bits_le(&bs, symbolsize);
|
|
if(symbol < 0x100) symbol ^= 0xff;
|
|
}
|
|
|
|
// Code 0x100 is the EOF code.
|
|
if(symbol == 0x100) { break; }
|
|
|
|
// Walk through the LZW tree, and set the usage count of the current
|
|
// string and all its parents to 4. This is not necessary for literals,
|
|
// but we do it anyway for simplicity.
|
|
LZWTreeNode *nodes = lzw_symbols(lzw);
|
|
int marksymbol = symbol;
|
|
while(marksymbol >= 0)
|
|
{
|
|
if(marksymbol < 8192) { usage[marksymbol] = 4; }
|
|
marksymbol = nodes[marksymbol].parent;
|
|
}
|
|
|
|
// Adjust the count of recent strings versus literals.
|
|
// Use a ring buffer of length 500 as a window to keep track
|
|
// of how many strings have been encountered lately.
|
|
|
|
// First, decrease the count if a string leaves the window.
|
|
if(stringring[ringindex]) numrecentstrings--;
|
|
|
|
// Then store the current type of symbol in the window, and
|
|
// increase the count if the current symbol is a string.
|
|
if(symbol < 0x100) { stringring[ringindex] = false; }
|
|
else
|
|
{
|
|
stringring[ringindex] = true;
|
|
numrecentstrings++;
|
|
}
|
|
|
|
// Move the window forward.
|
|
ringindex = (ringindex + 1) % 500;
|
|
|
|
// Check the number of strings. If there have been many literals
|
|
// lately, bit-prefixed codes should be used. If we need to change
|
|
// mode, re-calculate the point where we increase the code length.
|
|
bool manyliterals = numrecentstrings < 375;
|
|
if(manyliterals != useliteralbit)
|
|
{
|
|
useliteralbit = manyliterals;
|
|
nextsizebump = 1 << symbolsize;
|
|
if(!useliteralbit) nextsizebump -= 0x100;
|
|
}
|
|
|
|
// Update the LZW tree.
|
|
if(!lzw_symbol_list_full(lzw))
|
|
{
|
|
// If there is space in the tree, just add a new string as usual.
|
|
if(lzw_next_symbol(lzw, symbol) != LZW_NO_ERROR)
|
|
{
|
|
lzw_free(lzw);
|
|
return -1;
|
|
}
|
|
|
|
// Set the usage count of the newly created entry.
|
|
int count = lzw_symbol_count(lzw);
|
|
if(count > 0 && count - 1 < 8192) { usage[count - 1] = 2; }
|
|
}
|
|
else
|
|
{
|
|
// If the tree is full, find a less-used symbol, and replace it.
|
|
int minindex = 0, minusage = INT_MAX;
|
|
int index = usageindex;
|
|
do {
|
|
index++;
|
|
if(index == 8192) index = 0x101;
|
|
|
|
if(usage[index] < minusage)
|
|
{
|
|
minindex = index;
|
|
minusage = usage[index];
|
|
}
|
|
|
|
usage[index]--;
|
|
if(usage[index] == 0) break;
|
|
} while(index != usageindex);
|
|
|
|
usageindex = index;
|
|
|
|
if(lzw_replace_symbol(lzw, minindex, symbol) != LZW_NO_ERROR)
|
|
{
|
|
lzw_free(lzw);
|
|
return -1;
|
|
}
|
|
|
|
// Set the usage count of the replaced entry.
|
|
if(minindex < 8192) { usage[minindex] = 2; }
|
|
}
|
|
|
|
// Extract the data to output.
|
|
currbyte = lzw_reverse_output_to_buffer(lzw, buffer);
|
|
|
|
// Check if we need to increase the code size. The point at which
|
|
// to increase varies depending on the coding mode.
|
|
if(lzw_symbol_count(lzw) - 257 >= nextsizebump)
|
|
{
|
|
symbolsize++;
|
|
nextsizebump = 1 << symbolsize;
|
|
if(!useliteralbit) nextsizebump -= 0x100;
|
|
}
|
|
}
|
|
|
|
if(currbyte > 0 && outpos < max_output) { out_buf[outpos++] = (char)buffer[--currbyte]; }
|
|
else if(currbyte == 0)
|
|
{
|
|
// No more bytes in buffer, continue to next symbol
|
|
continue;
|
|
}
|
|
else
|
|
{
|
|
// Output buffer full
|
|
break;
|
|
}
|
|
}
|
|
|
|
lzw_free(lzw);
|
|
*out_len = outpos;
|
|
return 0;
|
|
}
|
|
|
|
AARU_EXPORT int AARU_CALL pak_decompress_crush(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
|
|
size_t *out_len)
|
|
{
|
|
// Allocate a temporary buffer.
|
|
size_t temp_len = *out_len * 2; // Heuristic.
|
|
unsigned char *temp_buf = malloc(temp_len);
|
|
if(!temp_buf) return -1;
|
|
|
|
// Decompress crunched data.
|
|
int result = pak_decompress_crush_internal(in_buf, in_len, temp_buf, &temp_len);
|
|
if(result == 0)
|
|
{
|
|
// Decompress non-repeat packing.
|
|
result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
|
|
}
|
|
|
|
free(temp_buf);
|
|
return result;
|
|
} |