2025-08-26 01:02:06 +01:00
|
|
|
|
/*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/
|
|
|
|
|
|
/*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/
|
|
|
|
|
|
/***********************************************************
|
2025-08-26 02:06:19 +01:00
|
|
|
|
huf.c -- static Huffman decoding
|
2025-08-26 01:02:06 +01:00
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
Adapted from Haruhiko Okumura’s “ar” archiver.
|
|
|
|
|
|
Modified in 2025 by Natalia Portillo for in-memory I/O.
|
2025-08-26 01:02:06 +01:00
|
|
|
|
***********************************************************/
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
#include <limits.h> // UCHAR_MAX
|
|
|
|
|
|
#include "ar.h" // archive format constants
|
|
|
|
|
|
#include "lzh.h" // LZH algorithm constants (NC, DICBIT, CODE_BIT, etc.)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// NP = number of position codes = DICBIT+1
|
|
|
|
|
|
// NT = number of tree codes = CODE_BIT+3
|
|
|
|
|
|
// PBIT, TBIT = bit‐width to transmit NP/NT in header
|
2025-08-26 01:02:06 +01:00
|
|
|
|
#define NP (DICBIT + 1)
|
|
|
|
|
|
#define NT (CODE_BIT + 3)
|
2025-08-26 02:06:19 +01:00
|
|
|
|
#define PBIT 4 /* smallest bits so (1<<PBIT)>NP */
|
|
|
|
|
|
#define TBIT 5 /* smallest bits so (1<<TBIT)>NT */
|
|
|
|
|
|
|
|
|
|
|
|
// NPT = max(NP,NT) for prefix‐tree lengths
|
2025-08-26 01:02:06 +01:00
|
|
|
|
#if NT > NP
|
|
|
|
|
|
#define NPT NT
|
|
|
|
|
|
#else
|
|
|
|
|
|
#define NPT NP
|
|
|
|
|
|
#endif
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// forward declarations of helper routines
|
|
|
|
|
|
static void read_pt_len(int nn, int nbit, int i_special);
|
|
|
|
|
|
static void read_c_len(void);
|
|
|
|
|
|
|
|
|
|
|
|
int decoded; // flag set when end-of-stream block is seen
|
2025-08-26 01:02:06 +01:00
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// Huffman tree storage arrays
|
|
|
|
|
|
// left[]/right[] store the binary tree structure for fast decoding
|
|
|
|
|
|
uint16_t left[2 * NC - 1], right[2 * NC - 1];
|
2025-08-26 01:02:06 +01:00
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// c_len[] = code lengths for literal/length tree (NC symbols)
|
|
|
|
|
|
// pt_len[] = code lengths for position‐tree / prefix table (NPT symbols)
|
|
|
|
|
|
// buf = temporary buffer pointer used during encoding; unused in decode
|
2025-08-26 01:02:06 +01:00
|
|
|
|
static uint8_t *buf, c_len[NC], pt_len[NPT];
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// size of buf if used, and remaining symbols in current block
|
2025-08-26 01:02:06 +01:00
|
|
|
|
static uint32_t bufsiz = 0, blocksize;
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// Frequency, code and decode‐table structures
|
|
|
|
|
|
static uint16_t c_freq[2 * NC - 1], // literal/length frequency counts
|
|
|
|
|
|
c_table[4096], // fast‐lookup table for literal/length decoding
|
|
|
|
|
|
c_code[NC], // canonical Huffman codes for literals
|
|
|
|
|
|
p_freq[2 * NP - 1], // position frequency counts
|
|
|
|
|
|
pt_table[256], // prefix‐tree fast lookup (for reading code lengths)
|
|
|
|
|
|
pt_code[NPT], // canonical codes for prefix‐tree
|
|
|
|
|
|
t_freq[2 * NT - 1]; // temporary freq for tree of code‐length codes
|
2025-08-26 01:02:06 +01:00
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
/***** decoding helper: read prefix‐tree code-lengths *****/
|
2025-08-26 01:02:06 +01:00
|
|
|
|
static void read_pt_len(int nn, int nbit, int i_special)
|
|
|
|
|
|
{
|
|
|
|
|
|
int i, c, n;
|
|
|
|
|
|
uint32_t mask;
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// 1) read how many code‐lengths to consume
|
2025-08-26 01:02:06 +01:00
|
|
|
|
n = getbits(nbit);
|
|
|
|
|
|
if(n == 0)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// special case: all code‐lengths are identical
|
2025-08-26 01:02:06 +01:00
|
|
|
|
c = getbits(nbit);
|
2025-08-26 02:06:19 +01:00
|
|
|
|
for(i = 0; i < nn; i++) // zero out lengths
|
|
|
|
|
|
pt_len[i] = 0;
|
|
|
|
|
|
for(i = 0; i < 256; i++) // prefix‐table always returns 'c'
|
|
|
|
|
|
pt_table[i] = c;
|
2025-08-26 01:02:06 +01:00
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// 2) read code lengths one by one
|
2025-08-26 01:02:06 +01:00
|
|
|
|
i = 0;
|
|
|
|
|
|
while(i < n)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// peek top 3 bits of bitbuf to guess small lengths
|
2025-08-26 01:02:06 +01:00
|
|
|
|
c = bitbuf >> (BITBUFSIZ - 3);
|
|
|
|
|
|
if(c == 7)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// if all three bits are 1, count additional ones
|
|
|
|
|
|
mask = 1U << (BITBUFSIZ - 1 - 3);
|
2025-08-26 01:02:06 +01:00
|
|
|
|
while(mask & bitbuf)
|
|
|
|
|
|
{
|
|
|
|
|
|
c++;
|
2025-08-26 02:06:19 +01:00
|
|
|
|
mask >>= 1;
|
2025-08-26 01:02:06 +01:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// consume the actual length bits
|
|
|
|
|
|
fillbuf((c < 7) ? 3 : (c - 3));
|
2025-08-26 01:02:06 +01:00
|
|
|
|
pt_len[i++] = c;
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// at special index, read a small run of zeros
|
2025-08-26 01:02:06 +01:00
|
|
|
|
if(i == i_special)
|
|
|
|
|
|
{
|
|
|
|
|
|
c = getbits(2);
|
2025-08-26 02:06:19 +01:00
|
|
|
|
while(--c >= 0 && i < nn) pt_len[i++] = 0;
|
2025-08-26 01:02:06 +01:00
|
|
|
|
}
|
|
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// any remaining symbols get code‐length zero
|
2025-08-26 01:02:06 +01:00
|
|
|
|
while(i < nn) pt_len[i++] = 0;
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// build fast lookup table from lengths
|
2025-08-26 01:02:06 +01:00
|
|
|
|
make_table(nn, pt_len, 8, pt_table);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
/***** decoding helper: read literal/length code‐lengths *****/
|
|
|
|
|
|
static void read_c_len(void)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
{
|
|
|
|
|
|
int i, c, n;
|
|
|
|
|
|
uint32_t mask;
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// 1) how many literal codes?
|
2025-08-26 01:02:06 +01:00
|
|
|
|
n = getbits(CBIT);
|
|
|
|
|
|
if(n == 0)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// all code‐lengths identical
|
2025-08-26 01:02:06 +01:00
|
|
|
|
c = getbits(CBIT);
|
|
|
|
|
|
for(i = 0; i < NC; i++) c_len[i] = 0;
|
|
|
|
|
|
for(i = 0; i < 4096; i++) c_table[i] = c;
|
|
|
|
|
|
}
|
|
|
|
|
|
else
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// 2) read each code length via prefix‐tree
|
2025-08-26 01:02:06 +01:00
|
|
|
|
i = 0;
|
|
|
|
|
|
while(i < n)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// lookup next symbol in prefix‐table
|
2025-08-26 01:02:06 +01:00
|
|
|
|
c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
|
|
|
|
|
|
if(c >= NT)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// if prefix code is non-leaf, walk tree
|
|
|
|
|
|
mask = 1U << (BITBUFSIZ - 1 - 8);
|
2025-08-26 01:02:06 +01:00
|
|
|
|
do {
|
2025-08-26 02:06:19 +01:00
|
|
|
|
c = (bitbuf & mask) ? right[c] : left[c];
|
2025-08-26 01:02:06 +01:00
|
|
|
|
mask >>= 1;
|
|
|
|
|
|
} while(c >= NT);
|
|
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// consume code‐length bits
|
2025-08-26 01:02:06 +01:00
|
|
|
|
fillbuf(pt_len[c]);
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// c ≤ 2: run-length encoding of zeros
|
2025-08-26 01:02:06 +01:00
|
|
|
|
if(c <= 2)
|
|
|
|
|
|
{
|
|
|
|
|
|
if(c == 0)
|
|
|
|
|
|
c = 1;
|
|
|
|
|
|
else if(c == 1)
|
|
|
|
|
|
c = getbits(4) + 3;
|
|
|
|
|
|
else
|
|
|
|
|
|
c = getbits(CBIT) + 20;
|
2025-08-26 02:06:19 +01:00
|
|
|
|
while(--c >= 0 && i < NC) c_len[i++] = 0;
|
2025-08-26 01:02:06 +01:00
|
|
|
|
}
|
|
|
|
|
|
else
|
2025-08-26 02:06:19 +01:00
|
|
|
|
{
|
|
|
|
|
|
// real code-length = c−2
|
|
|
|
|
|
c_len[i++] = (uint8_t)(c - 2);
|
|
|
|
|
|
}
|
2025-08-26 01:02:06 +01:00
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// fill rest with zero lengths
|
2025-08-26 01:02:06 +01:00
|
|
|
|
while(i < NC) c_len[i++] = 0;
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// build fast lookup for literal/length codes
|
2025-08-26 01:02:06 +01:00
|
|
|
|
make_table(NC, c_len, 12, c_table);
|
|
|
|
|
|
}
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
/***** decode next literal/length symbol or end-of-block *****/
|
|
|
|
|
|
uint32_t decode_c(void)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
{
|
|
|
|
|
|
uint32_t j, mask;
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// if starting a new block, read its header
|
2025-08-26 01:02:06 +01:00
|
|
|
|
if(blocksize == 0)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
blocksize = getbits(16); // block size = number of symbols
|
2025-08-26 01:02:06 +01:00
|
|
|
|
if(blocksize == 0)
|
2025-08-26 02:06:19 +01:00
|
|
|
|
{ // zero block → end of data
|
2025-08-26 01:02:06 +01:00
|
|
|
|
decoded = 1;
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// read three Huffman trees for this block:
|
|
|
|
|
|
// 1) code-length codes for literal tree (NT,TBIT,3)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
read_pt_len(NT, TBIT, 3);
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// 2) literal/length tree lengths (CBIT)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
read_c_len();
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// 3) prefix-tree lengths for positions (NP,PBIT,-1)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
read_pt_len(NP, PBIT, -1);
|
|
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// consume one symbol from this block
|
2025-08-26 01:02:06 +01:00
|
|
|
|
blocksize--;
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// fast table lookup: top 12 bits
|
2025-08-26 01:02:06 +01:00
|
|
|
|
j = c_table[bitbuf >> (BITBUFSIZ - 12)];
|
|
|
|
|
|
if(j >= NC)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// need to walk tree if overflow
|
|
|
|
|
|
mask = 1U << (BITBUFSIZ - 1 - 12);
|
2025-08-26 01:02:06 +01:00
|
|
|
|
do {
|
2025-08-26 02:06:19 +01:00
|
|
|
|
j = (bitbuf & mask) ? right[j] : left[j];
|
2025-08-26 01:02:06 +01:00
|
|
|
|
mask >>= 1;
|
|
|
|
|
|
} while(j >= NC);
|
|
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// remove j’s code length bits from bitbuf
|
2025-08-26 01:02:06 +01:00
|
|
|
|
fillbuf(c_len[j]);
|
|
|
|
|
|
return j;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
/***** decode match-position extra bits *****/
|
|
|
|
|
|
uint32_t decode_p(void)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
{
|
|
|
|
|
|
uint32_t j, mask;
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// fast table lookup: top 8 bits
|
2025-08-26 01:02:06 +01:00
|
|
|
|
j = pt_table[bitbuf >> (BITBUFSIZ - 8)];
|
|
|
|
|
|
if(j >= NP)
|
|
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
// tree walk for long codes
|
|
|
|
|
|
mask = 1U << (BITBUFSIZ - 1 - 8);
|
2025-08-26 01:02:06 +01:00
|
|
|
|
do {
|
2025-08-26 02:06:19 +01:00
|
|
|
|
j = (bitbuf & mask) ? right[j] : left[j];
|
2025-08-26 01:02:06 +01:00
|
|
|
|
mask >>= 1;
|
|
|
|
|
|
} while(j >= NP);
|
|
|
|
|
|
}
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// consume prefix bits
|
2025-08-26 01:02:06 +01:00
|
|
|
|
fillbuf(pt_len[j]);
|
2025-08-26 02:06:19 +01:00
|
|
|
|
|
|
|
|
|
|
// if non-zero, read extra bits to form full position
|
|
|
|
|
|
if(j != 0) j = (1U << (j - 1)) + getbits((int)(j - 1));
|
|
|
|
|
|
|
2025-08-26 01:02:06 +01:00
|
|
|
|
return j;
|
|
|
|
|
|
}
|
|
|
|
|
|
|
2025-08-26 02:06:19 +01:00
|
|
|
|
/***** start a new Huffman decode session *****/
|
|
|
|
|
|
void huf_decode_start(void)
|
2025-08-26 01:02:06 +01:00
|
|
|
|
{
|
2025-08-26 02:06:19 +01:00
|
|
|
|
init_getbits(); // reset bit buffer & subbitbuf state
|
|
|
|
|
|
blocksize = 0; // force reading a fresh block header
|
2025-08-26 01:02:06 +01:00
|
|
|
|
}
|