mirror of
https://github.com/aaru-dps/Aaru.Compression.Native.git
synced 2026-04-23 06:22:15 +00:00
333 lines
11 KiB
C
333 lines
11 KiB
C
/*
|
|
* This file is part of the Aaru Data Preservation Suite.
|
|
* Copyright (c) 2019-2026 Natalia Portillo.
|
|
*
|
|
* This library is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License as
|
|
* published by the Free Software Foundation; either version 2.1 of the
|
|
* License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
// KenCode decompressor
|
|
// Reverse-engineered from Apple DiskCopy 6.3.3 resource fork hdi2 ID 128
|
|
// Algorithm: LZSS with MSB-first bitstream and variable-length prefix codes
|
|
//
|
|
// Function map from PEF disassembly:
|
|
// 0x0000 plugin_entry Component Manager dispatcher (selectors 0-3)
|
|
// 0x00CC read_match_info Reads one token (match or literal run) from bitstream
|
|
// 0x01AC decompress Main decompression loop
|
|
// 0x03E4 bs_init Initialize bitstream reader
|
|
// 0x03F4 read_unary Count consecutive 1-bits (MSB-first)
|
|
// 0x046C read_bits Read N bits from bitstream
|
|
// 0x0510 read_literals Read N literal bytes from bitstream
|
|
// 0x05C0 decode_lit_count Decode literal run length
|
|
// 0x0750 decode_match_len Decode match length with prefix code
|
|
// 0x0A84 decode_offset Decode match offset (multi-level)
|
|
// 0x0C2C offset_dispatch Select distance class based on output position
|
|
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
|
|
#include "library.h"
|
|
#include "kencode.h"
|
|
|
|
// ============================================================================
|
|
// MSB-first bitstream reader
|
|
// ============================================================================
|
|
|
|
typedef struct
|
|
{
|
|
const uint8_t *data;
|
|
uint32_t bit_pos;
|
|
uint32_t bit_end;
|
|
} kc_bs;
|
|
|
|
static inline void bs_init(kc_bs *bs, const uint8_t *data, size_t size)
|
|
{
|
|
bs->data = data;
|
|
bs->bit_pos = 0;
|
|
bs->bit_end = (uint32_t)(size * 8);
|
|
}
|
|
|
|
// PEF 0x046C: Read N bits MSB-first.
|
|
// The PEF reads a 32-bit big-endian word at (bit_pos / 8), shifts right
|
|
// by (32 - bit_within_byte - n), and masks to n bits.
|
|
static inline uint32_t bs_read(kc_bs *bs, int n)
|
|
{
|
|
if(n <= 0 || bs->bit_pos + (uint32_t)n > bs->bit_end) return 0;
|
|
|
|
uint32_t byte_off = bs->bit_pos >> 3;
|
|
int bit_off = bs->bit_pos & 7;
|
|
int shift = 32 - bit_off - n;
|
|
|
|
// Read a 32-bit big-endian word starting at byte_off
|
|
// Need to handle reads near end of buffer
|
|
uint32_t word = 0;
|
|
size_t max_byte = (bs->bit_end + 7) / 8;
|
|
for(int i = 0; i < 4; i++)
|
|
{
|
|
word <<= 8;
|
|
if(byte_off + i < max_byte) word |= bs->data[byte_off + i];
|
|
}
|
|
|
|
uint32_t mask = ((uint32_t)1 << n) - 1;
|
|
bs->bit_pos += n;
|
|
|
|
if(shift >= 0) return (word >> shift) & mask;
|
|
|
|
// shift < 0: need bits from next word (rare: n > 24 or misaligned)
|
|
word <<= (-shift);
|
|
if(byte_off + 4 < max_byte) word |= (bs->data[byte_off + 4] >> (8 + shift));
|
|
return word & mask;
|
|
}
|
|
|
|
// PEF 0x03F4: Count consecutive 1-bits, stopping at first 0-bit or max_count.
|
|
static inline uint32_t bs_read_unary(kc_bs *bs, int max_count)
|
|
{
|
|
uint32_t count = 0;
|
|
while((int)count < max_count && bs->bit_pos < bs->bit_end)
|
|
{
|
|
uint32_t byte_off = bs->bit_pos >> 3;
|
|
int bit_off = bs->bit_pos & 7;
|
|
bs->bit_pos++;
|
|
if(!(bs->data[byte_off] & (0x80 >> bit_off))) return count; // hit 0
|
|
count++;
|
|
}
|
|
return count;
|
|
}
|
|
|
|
// PEF 0x0510: Read N literal bytes from bitstream. Each byte is 8 bits
|
|
// at the current (possibly unaligned) bit position.
|
|
static void bs_read_bytes(kc_bs *bs, uint8_t *dst, int count)
|
|
{
|
|
uint32_t byte_off = bs->bit_pos >> 3;
|
|
int bit_off = bs->bit_pos & 7;
|
|
int rshift = 8 - bit_off;
|
|
|
|
for(int i = 0; i < count; i++)
|
|
{
|
|
uint16_t w = ((uint16_t)bs->data[byte_off] << 8) | bs->data[byte_off + 1];
|
|
dst[i] = (uint8_t)(w >> rshift);
|
|
byte_off++;
|
|
}
|
|
bs->bit_pos += count * 8;
|
|
}
|
|
|
|
// ============================================================================
|
|
// VLC decoders
|
|
// ============================================================================
|
|
|
|
// PEF 0x05C0: Decode literal run length (1..63)
|
|
static int decode_lit_count(kc_bs *bs)
|
|
{
|
|
// "0" → 1
|
|
if(bs_read(bs, 1) == 0) return 1;
|
|
|
|
// "1" + 2 bits
|
|
uint32_t v = bs_read(bs, 2);
|
|
if(v == 0) return 2;
|
|
if(v == 1) return 3;
|
|
if(v == 2) return (int)bs_read(bs, 2) + 4; // 4-7
|
|
|
|
// v==3: 4 more bits
|
|
uint32_t e = bs_read(bs, 4);
|
|
if(e <= 7) return (int)e + 8; // 8-15
|
|
if(e <= 11) return (int)(e * 4 + bs_read(bs, 2)) - 16; // 16-31
|
|
return (int)(e * 8 + bs_read(bs, 3)) - 64; // 32-63
|
|
}
|
|
|
|
// PEF 0x0750: Decode match length raw value (0..2042+)
|
|
// Value 0 with had_literals=1 → literal run marker
|
|
// Otherwise: actual match length = raw + 2 (or +3 if previous was literals)
|
|
static int decode_match_len(kc_bs *bs)
|
|
{
|
|
uint32_t pfx = bs_read_unary(bs, 10);
|
|
|
|
switch(pfx)
|
|
{
|
|
case 0:
|
|
return (int)bs_read(bs, 1); // 0-1
|
|
case 1:
|
|
{
|
|
uint32_t b = bs_read(bs, 1);
|
|
if(b == 0) return 2;
|
|
return (int)bs_read(bs, 1) + 3; // 3-4
|
|
}
|
|
case 2:
|
|
{
|
|
uint32_t b = bs_read(bs, 1);
|
|
if(b == 0) return (int)bs_read(bs, 1) + 5; // 5-6
|
|
return (int)bs_read(bs, 2) + 7; // 7-10
|
|
}
|
|
case 3:
|
|
return (int)bs_read(bs, 3) + 11; // 11-18
|
|
case 4:
|
|
return (int)bs_read(bs, 3) + 19; // 19-26
|
|
case 5:
|
|
return (int)bs_read(bs, 5) + 27; // 27-58
|
|
case 6:
|
|
return (int)bs_read(bs, 6) + 59; // 59-122
|
|
case 7:
|
|
return (int)bs_read(bs, 7) + 123; // 123-250
|
|
case 8:
|
|
return (int)bs_read(bs, 8) + 251; // 251-506
|
|
case 9:
|
|
return (int)bs_read(bs, 9) + 507; // 507-1018
|
|
default:
|
|
return (int)bs_read(bs, 10) + 1019; // 1019+
|
|
}
|
|
}
|
|
|
|
// PEF 0x0A84: Decode match offset given distance class (dist_bits)
|
|
static int decode_offset(kc_bs *bs, int dist_bits, int hash_size)
|
|
{
|
|
int pw = 1 << dist_bits;
|
|
|
|
// Prefix "0" + dist_bits → short offset
|
|
if(bs_read(bs, 1) == 0) return (int)bs_read(bs, dist_bits) + 1;
|
|
|
|
// Prefix "10" + (dist_bits+2) bits → medium offset
|
|
if(bs_read(bs, 1) == 0) return pw + 1 + (int)bs_read(bs, dist_bits + 2);
|
|
|
|
// Prefix "11" → long offset, escalating levels
|
|
int pw5 = pw * 5;
|
|
|
|
if(hash_size <= pw5 + 2) return pw5 + 1 + (int)bs_read(bs, 1);
|
|
|
|
if(hash_size <= pw5 + 4) return pw5 + 1 + (int)bs_read(bs, 2);
|
|
|
|
// Escalating loop
|
|
int max_level = dist_bits + 4;
|
|
int scale = 4;
|
|
int level = 3;
|
|
int threshold = pw5 + 4;
|
|
|
|
if(max_level >= 3)
|
|
{
|
|
while(level <= max_level)
|
|
{
|
|
threshold += scale;
|
|
// 68K helper at 0x0ABA: clamp ONLY the exact value 0x680 to 0x66C
|
|
int clamped = (threshold == 0x680) ? 0x66C : threshold;
|
|
if(hash_size <= clamped || level == max_level) break;
|
|
scale <<= 1;
|
|
level++;
|
|
}
|
|
}
|
|
|
|
return pw5 + 1 + (int)bs_read(bs, level);
|
|
}
|
|
|
|
// PEF 0x0C2C / 68K 0x0D48: Select distance class based on output position and call decode_offset
|
|
// out_pos determines the distance class, hash_size (from state init) caps it for large positions
|
|
static int offset_dispatch(kc_bs *bs, int out_pos, int hash_size)
|
|
{
|
|
int dist_bits;
|
|
|
|
if(out_pos <= 0xa)
|
|
dist_bits = 0;
|
|
else if(out_pos <= 0x14)
|
|
dist_bits = 1;
|
|
else if(out_pos <= 0x28)
|
|
dist_bits = 2;
|
|
else if(out_pos <= 0x50)
|
|
dist_bits = 3;
|
|
else if(out_pos <= 0xa0)
|
|
dist_bits = 4;
|
|
else if(out_pos <= 0x2a0)
|
|
dist_bits = 5;
|
|
else if(out_pos <= 0x3e8)
|
|
dist_bits = 6;
|
|
else if(out_pos <= 0xa80 || hash_size <= 0x800)
|
|
dist_bits = 7;
|
|
else if(out_pos <= 0x1500 || hash_size <= 0x1000)
|
|
dist_bits = 8;
|
|
else if(out_pos <= 0x2a00 || hash_size <= 0x2000)
|
|
dist_bits = 9;
|
|
else if(out_pos <= 0x5400 || hash_size <= 0x4000)
|
|
dist_bits = 10;
|
|
else if(out_pos <= 0xa800 || hash_size <= 0x8000)
|
|
dist_bits = 11;
|
|
else if(out_pos <= 0x11170 || hash_size <= 0x10000)
|
|
dist_bits = 12;
|
|
else if(out_pos <= 0x2a000 || hash_size <= 0x20000)
|
|
dist_bits = 13;
|
|
else
|
|
dist_bits = 14;
|
|
|
|
// 68K passes (out_pos, dist_bits, bitstream) to decode_offset
|
|
// but decode_offset internally uses out_pos for hash_size comparisons too
|
|
return decode_offset(bs, dist_bits, out_pos);
|
|
}
|
|
|
|
// ============================================================================
|
|
// Main decompressor — PEF 0x01AC
|
|
// ============================================================================
|
|
|
|
AARU_EXPORT int32_t AARU_CALL AARU_kencode_decode_buffer(uint8_t *dst_buffer, size_t *dst_size,
|
|
const uint8_t *src_buffer, size_t src_size)
|
|
{
|
|
if(!dst_buffer || !dst_size || !src_buffer || src_size == 0) return -1;
|
|
|
|
size_t max_out = *dst_size;
|
|
kc_bs bs;
|
|
bs_init(&bs, src_buffer, src_size);
|
|
|
|
size_t out_pos = 0;
|
|
uint8_t had_literals = 1; // PEF 0x01C0-0x01C4
|
|
|
|
// Plugin init: hash_size from clamp_hash_size(0x2800) → 0x2800 (in range [0x800, 0x30000])
|
|
int hash_size = 0x2800;
|
|
|
|
while(out_pos < max_out)
|
|
{
|
|
// PEF 0x00CC: read_match_info
|
|
int raw = decode_match_len(&bs);
|
|
|
|
if(raw <= 0 && had_literals)
|
|
{
|
|
// Literal run
|
|
int lit_count = decode_lit_count(&bs);
|
|
if(out_pos + lit_count > max_out) return -4;
|
|
bs_read_bytes(&bs, dst_buffer + out_pos, lit_count);
|
|
out_pos += lit_count;
|
|
|
|
// 68K 0x03C4-0x03D2: had_literals = (lit_count >= 63) ? 1 : 0
|
|
// PPC 0x0180-0x0194: equivalent via subfc/adde/subfe/clrlwi
|
|
had_literals = (lit_count >= 63) ? 1 : 0;
|
|
}
|
|
else
|
|
{
|
|
// Match reference
|
|
int match_len;
|
|
if(had_literals == 0)
|
|
match_len = raw + 3; // PEF 0x0130: +3 when coming after non-literal
|
|
else
|
|
match_len = raw + 2; // PEF 0x011C: +2 when had_literals is set
|
|
|
|
had_literals = 1; // PEF 0x0140
|
|
|
|
int offset = offset_dispatch(&bs, (int)out_pos, hash_size);
|
|
|
|
if(out_pos + match_len > max_out) return -4;
|
|
if(offset > (int)out_pos) return -4;
|
|
|
|
// Byte-by-byte copy (may overlap)
|
|
for(int i = 0; i < match_len; i++) dst_buffer[out_pos + i] = dst_buffer[out_pos - offset + i];
|
|
out_pos += match_len;
|
|
}
|
|
}
|
|
|
|
*dst_size = out_pos;
|
|
return 0;
|
|
}
|