Files

245 lines
7.3 KiB
C
Raw Permalink Normal View History

/*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/
/*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/
/***********************************************************
2025-08-26 02:06:19 +01:00
huf.c -- static Huffman decoding
2025-08-26 02:06:19 +01:00
Adapted from Haruhiko Okumuras ar archiver.
Modified in 2025 by Natalia Portillo for in-memory I/O.
***********************************************************/
2025-08-26 02:06:19 +01:00
#include <limits.h> // UCHAR_MAX
#include "ar.h" // archive format constants
#include "lzh.h" // LZH algorithm constants (NC, DICBIT, CODE_BIT, etc.)
2025-08-26 02:06:19 +01:00
// NP = number of position codes = DICBIT+1
// NT = number of tree codes = CODE_BIT+3
// PBIT, TBIT = bitwidth to transmit NP/NT in header
#define NP (DICBIT + 1)
#define NT (CODE_BIT + 3)
2025-08-26 02:06:19 +01:00
#define PBIT 4 /* smallest bits so (1<<PBIT)>NP */
#define TBIT 5 /* smallest bits so (1<<TBIT)>NT */
// NPT = max(NP,NT) for prefixtree lengths
#if NT > NP
#define NPT NT
#else
#define NPT NP
#endif
2025-08-26 02:06:19 +01:00
// forward declarations of helper routines
static void read_pt_len(int nn, int nbit, int i_special);
static void read_c_len(void);
int decoded; // flag set when end-of-stream block is seen
2025-08-26 02:06:19 +01:00
// Huffman tree storage arrays
// left[]/right[] store the binary tree structure for fast decoding
uint16_t left[2 * NC - 1], right[2 * NC - 1];
2025-08-26 02:06:19 +01:00
// c_len[] = code lengths for literal/length tree (NC symbols)
// pt_len[] = code lengths for positiontree / prefix table (NPT symbols)
// buf = temporary buffer pointer used during encoding; unused in decode
static uint8_t *buf, c_len[NC], pt_len[NPT];
2025-08-26 02:06:19 +01:00
// size of buf if used, and remaining symbols in current block
static uint32_t bufsiz = 0, blocksize;
2025-08-26 02:06:19 +01:00
// Frequency, code and decodetable structures
static uint16_t c_freq[2 * NC - 1], // literal/length frequency counts
c_table[4096], // fastlookup table for literal/length decoding
c_code[NC], // canonical Huffman codes for literals
p_freq[2 * NP - 1], // position frequency counts
pt_table[256], // prefixtree fast lookup (for reading code lengths)
pt_code[NPT], // canonical codes for prefixtree
t_freq[2 * NT - 1]; // temporary freq for tree of codelength codes
2025-08-26 02:06:19 +01:00
/***** decoding helper: read prefixtree code-lengths *****/
static void read_pt_len(int nn, int nbit, int i_special)
{
int i, c, n;
uint32_t mask;
2025-08-26 02:06:19 +01:00
// 1) read how many codelengths to consume
n = getbits(nbit);
if(n == 0)
{
2025-08-26 02:06:19 +01:00
// special case: all codelengths are identical
c = getbits(nbit);
2025-08-26 02:06:19 +01:00
for(i = 0; i < nn; i++) // zero out lengths
pt_len[i] = 0;
for(i = 0; i < 256; i++) // prefixtable always returns 'c'
pt_table[i] = c;
}
else
{
2025-08-26 02:06:19 +01:00
// 2) read code lengths one by one
i = 0;
while(i < n)
{
2025-08-26 02:06:19 +01:00
// peek top 3 bits of bitbuf to guess small lengths
c = bitbuf >> (BITBUFSIZ - 3);
if(c == 7)
{
2025-08-26 02:06:19 +01:00
// if all three bits are 1, count additional ones
mask = 1U << (BITBUFSIZ - 1 - 3);
while(mask & bitbuf)
{
c++;
2025-08-26 02:06:19 +01:00
mask >>= 1;
}
}
2025-08-26 02:06:19 +01:00
// consume the actual length bits
fillbuf((c < 7) ? 3 : (c - 3));
pt_len[i++] = c;
2025-08-26 02:06:19 +01:00
// at special index, read a small run of zeros
if(i == i_special)
{
c = getbits(2);
2025-08-26 02:06:19 +01:00
while(--c >= 0 && i < nn) pt_len[i++] = 0;
}
}
2025-08-26 02:06:19 +01:00
// any remaining symbols get codelength zero
while(i < nn) pt_len[i++] = 0;
2025-08-26 02:06:19 +01:00
// build fast lookup table from lengths
make_table(nn, pt_len, 8, pt_table);
}
}
2025-08-26 02:06:19 +01:00
/***** decoding helper: read literal/length codelengths *****/
static void read_c_len(void)
{
int i, c, n;
uint32_t mask;
2025-08-26 02:06:19 +01:00
// 1) how many literal codes?
n = getbits(CBIT);
if(n == 0)
{
2025-08-26 02:06:19 +01:00
// all codelengths identical
c = getbits(CBIT);
for(i = 0; i < NC; i++) c_len[i] = 0;
for(i = 0; i < 4096; i++) c_table[i] = c;
}
else
{
2025-08-26 02:06:19 +01:00
// 2) read each code length via prefixtree
i = 0;
while(i < n)
{
2025-08-26 02:06:19 +01:00
// lookup next symbol in prefixtable
c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
if(c >= NT)
{
2025-08-26 02:06:19 +01:00
// if prefix code is non-leaf, walk tree
mask = 1U << (BITBUFSIZ - 1 - 8);
do {
2025-08-26 02:06:19 +01:00
c = (bitbuf & mask) ? right[c] : left[c];
mask >>= 1;
} while(c >= NT);
}
2025-08-26 02:06:19 +01:00
// consume codelength bits
fillbuf(pt_len[c]);
2025-08-26 02:06:19 +01:00
// c ≤ 2: run-length encoding of zeros
if(c <= 2)
{
if(c == 0)
c = 1;
else if(c == 1)
c = getbits(4) + 3;
else
c = getbits(CBIT) + 20;
2025-08-26 02:06:19 +01:00
while(--c >= 0 && i < NC) c_len[i++] = 0;
}
else
2025-08-26 02:06:19 +01:00
{
// real code-length = c2
c_len[i++] = (uint8_t)(c - 2);
}
}
2025-08-26 02:06:19 +01:00
// fill rest with zero lengths
while(i < NC) c_len[i++] = 0;
2025-08-26 02:06:19 +01:00
// build fast lookup for literal/length codes
make_table(NC, c_len, 12, c_table);
}
}
2025-08-26 02:06:19 +01:00
/***** decode next literal/length symbol or end-of-block *****/
uint32_t decode_c(void)
{
uint32_t j, mask;
2025-08-26 02:06:19 +01:00
// if starting a new block, read its header
if(blocksize == 0)
{
2025-08-26 02:06:19 +01:00
blocksize = getbits(16); // block size = number of symbols
if(blocksize == 0)
2025-08-26 02:06:19 +01:00
{ // zero block → end of data
decoded = 1;
return 0;
}
2025-08-26 02:06:19 +01:00
// read three Huffman trees for this block:
// 1) code-length codes for literal tree (NT,TBIT,3)
read_pt_len(NT, TBIT, 3);
2025-08-26 02:06:19 +01:00
// 2) literal/length tree lengths (CBIT)
read_c_len();
2025-08-26 02:06:19 +01:00
// 3) prefix-tree lengths for positions (NP,PBIT,-1)
read_pt_len(NP, PBIT, -1);
}
2025-08-26 02:06:19 +01:00
// consume one symbol from this block
blocksize--;
2025-08-26 02:06:19 +01:00
// fast table lookup: top 12 bits
j = c_table[bitbuf >> (BITBUFSIZ - 12)];
if(j >= NC)
{
2025-08-26 02:06:19 +01:00
// need to walk tree if overflow
mask = 1U << (BITBUFSIZ - 1 - 12);
do {
2025-08-26 02:06:19 +01:00
j = (bitbuf & mask) ? right[j] : left[j];
mask >>= 1;
} while(j >= NC);
}
2025-08-26 02:06:19 +01:00
// remove js code length bits from bitbuf
fillbuf(c_len[j]);
return j;
}
2025-08-26 02:06:19 +01:00
/***** decode match-position extra bits *****/
uint32_t decode_p(void)
{
uint32_t j, mask;
2025-08-26 02:06:19 +01:00
// fast table lookup: top 8 bits
j = pt_table[bitbuf >> (BITBUFSIZ - 8)];
if(j >= NP)
{
2025-08-26 02:06:19 +01:00
// tree walk for long codes
mask = 1U << (BITBUFSIZ - 1 - 8);
do {
2025-08-26 02:06:19 +01:00
j = (bitbuf & mask) ? right[j] : left[j];
mask >>= 1;
} while(j >= NP);
}
2025-08-26 02:06:19 +01:00
// consume prefix bits
fillbuf(pt_len[j]);
2025-08-26 02:06:19 +01:00
// if non-zero, read extra bits to form full position
if(j != 0) j = (1U << (j - 1)) + getbits((int)(j - 1));
return j;
}
2025-08-26 02:06:19 +01:00
/***** start a new Huffman decode session *****/
void huf_decode_start(void)
{
2025-08-26 02:06:19 +01:00
init_getbits(); // reset bit buffer & subbitbuf state
blocksize = 0; // force reading a fresh block header
}