Added comments from Copilot.

This commit is contained in:
2025-08-26 02:06:19 +01:00
parent a336ce953e
commit 750df1cca9
4 changed files with 322 additions and 155 deletions

View File

@@ -3,63 +3,96 @@
/*********************************************************** /***********************************************************
decode.c decode.c
Adapted from "ar" archiver written by Haruhiko Okumura. Adapted from Haruhiko Okumuras “ar” archiver. This
version has been modified in 2025 by Natalia Portillo
for in-memory decompression.
***********************************************************/ ***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include <limits.h> #include <limits.h> // for UCHAR_MAX
#include <stdint.h> #include <stdint.h> // for fixed-width integer types
#include "ar.h" #include "ar.h" // archive format constants
#include "lzh.h" #include "lzh.h" // LZH-specific constants (DICSIZ, THRESHOLD, etc.)
extern int decoded; /* from huf.c */ extern int decoded; // flag set by decode_c() when end-of-stream is reached
static int j; /* remaining bytes to copy */ static int j; // number of literal/copy runs remaining from a match
/*
* decode_start()
*
* Prepare the decoder for a new file:
* - Initialize the Huffman bitstream (via huf_decode_start())
* - Reset the sliding-window copy counter `j`
* - Clear the end-of-data flag `decoded`
*/
void decode_start() void decode_start()
{ {
huf_decode_start(); huf_decode_start(); // reset bit-reader state
j = 0; j = 0; // no pending copy runs yet
decoded = 0; decoded = 0; // not yet at end-of-stream
} }
/* /*
decodes; returns no. of chars decoded * decode(count, buffer)
*/ *
* Decode up to `count` bytes (usually DICSIZ) into `buffer[]`.
* Returns the actual number of bytes written, or 0 if `decoded` is set.
*
* Slidingwindow logic:
* 1. If `j` > 0, we are in the middle of copying a previous match:
* - Copy one byte from `buffer[i]` into `buffer[r]`
* - Advance `i` (circular within DICSIZ) and `r`
* - Decrement `j` and repeat until `j` = 0 or `r` = count
* 2. Otherwise, fetch the next symbol `c = decode_c()`:
* - If `c <= UCHAR_MAX`, its a literal byte: emit it directly
* - Else its a match:
* • compute `j = match_length = c - (UCHAR_MAX + 1 - THRESHOLD)`
* • compute `i = (r - match_offset - 1) mod DICSIZ`,
* where match_offset = decode_p()
* • enter copy loop from step 1
*/
int decode(uint32_t count, uint8_t *buffer) int decode(uint32_t count, uint8_t *buffer)
/* The calling function must keep the number of
bytes to be processed. This function decodes
either 'count' bytes or 'DICSIZ' bytes, whichever
is smaller, into the array 'buffer[]' of size
'DICSIZ' or more.
Call decode_start() once for each new file
before calling this function. */
{ {
static uint32_t i; static uint32_t i; // sliding-window read index (circular)
uint32_t r, c; uint32_t r; // write position in buffer
uint32_t c; // symbol or match code
r = 0; r = 0;
// Step 1: finish any pending copy from a previous match
while(--j >= 0) while(--j >= 0)
{ {
buffer[r] = buffer[i]; buffer[r] = buffer[i]; // copy one byte from history
i = (i + 1) & (DICSIZ - 1); i = (i + 1) & (DICSIZ - 1); // wrap index within [0, DICSIZ)
if(++r == count) return r; if(++r == count) // if output buffer is full
return r; // return bytes written so far
} }
// Step 2: decode new symbols until end-of-stream or buffer full
for(;;) for(;;)
{ {
c = decode_c(); c = decode_c(); // get next Huffman symbol
if(decoded) return r; if(decoded) // end-of-stream marker reached
return r; // no more bytes to decode
if(c <= UCHAR_MAX) if(c <= UCHAR_MAX)
{ {
buffer[r] = c; // Literal byte: emit it directly
buffer[r] = (uint8_t)c;
if(++r == count) return r; if(++r == count) return r;
} }
else else
{ {
// Match sequence: compute how many bytes to copy
// j = match length
j = c - (UCHAR_MAX + 1 - THRESHOLD); j = c - (UCHAR_MAX + 1 - THRESHOLD);
// i = start position in sliding window:
// current output position minus offset minus 1, wrapped
i = (r - decode_p() - 1) & (DICSIZ - 1); i = (r - decode_p() - 1) & (DICSIZ - 1);
// Copy `j` bytes from history
while(--j >= 0) while(--j >= 0)
{ {
buffer[r] = buffer[i]; buffer[r] = buffer[i];

157
zoo/huf.c
View File

@@ -1,110 +1,148 @@
/*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/ /*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/
/*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/ /*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/
/*********************************************************** /***********************************************************
huf.c -- static Huffman huf.c -- static Huffman decoding
Adapted from "ar" archiver written by Haruhiko Okumura. Adapted from Haruhiko Okumuras “ar” archiver.
Modified in 2025 by Natalia Portillo for in-memory I/O.
***********************************************************/ ***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include <limits.h> #include <limits.h> // UCHAR_MAX
#include "ar.h" // archive format constants
#include "ar.h" #include "lzh.h" // LZH algorithm constants (NC, DICBIT, CODE_BIT, etc.)
#include "lzh.h"
// NP = number of position codes = DICBIT+1
// NT = number of tree codes = CODE_BIT+3
// PBIT, TBIT = bitwidth to transmit NP/NT in header
#define NP (DICBIT + 1) #define NP (DICBIT + 1)
#define NT (CODE_BIT + 3) #define NT (CODE_BIT + 3)
#define PBIT 4 /* smallest integer such that (1U << PBIT) > NP */ #define PBIT 4 /* smallest bits so (1<<PBIT)>NP */
#define TBIT 5 /* smallest integer such that (1U << TBIT) > NT */ #define TBIT 5 /* smallest bits so (1<<TBIT)>NT */
// NPT = max(NP,NT) for prefixtree lengths
#if NT > NP #if NT > NP
#define NPT NT #define NPT NT
#else #else
#define NPT NP #define NPT NP
#endif #endif
static void read_pt_len(int, int, int); // forward declarations of helper routines
static void read_c_len(); static void read_pt_len(int nn, int nbit, int i_special);
static void read_c_len(void);
int decoded; /* for use in decode.c */ int decoded; // flag set when end-of-stream block is seen
// Huffman tree storage arrays
// left[]/right[] store the binary tree structure for fast decoding
uint16_t left[2 * NC - 1], right[2 * NC - 1]; uint16_t left[2 * NC - 1], right[2 * NC - 1];
// c_len[] = code lengths for literal/length tree (NC symbols)
// pt_len[] = code lengths for positiontree / prefix table (NPT symbols)
// buf = temporary buffer pointer used during encoding; unused in decode
static uint8_t *buf, c_len[NC], pt_len[NPT]; static uint8_t *buf, c_len[NC], pt_len[NPT];
// size of buf if used, and remaining symbols in current block
static uint32_t bufsiz = 0, blocksize; static uint32_t bufsiz = 0, blocksize;
static uint16_t c_freq[2 * NC - 1], c_table[4096], c_code[NC], p_freq[2 * NP - 1], pt_table[256], pt_code[NPT],
t_freq[2 * NT - 1];
/***** decoding *****/ // Frequency, code and decodetable structures
static uint16_t c_freq[2 * NC - 1], // literal/length frequency counts
c_table[4096], // fastlookup table for literal/length decoding
c_code[NC], // canonical Huffman codes for literals
p_freq[2 * NP - 1], // position frequency counts
pt_table[256], // prefixtree fast lookup (for reading code lengths)
pt_code[NPT], // canonical codes for prefixtree
t_freq[2 * NT - 1]; // temporary freq for tree of codelength codes
/***** decoding helper: read prefixtree code-lengths *****/
static void read_pt_len(int nn, int nbit, int i_special) static void read_pt_len(int nn, int nbit, int i_special)
{ {
int i, c, n; int i, c, n;
uint32_t mask; uint32_t mask;
// 1) read how many codelengths to consume
n = getbits(nbit); n = getbits(nbit);
if(n == 0) if(n == 0)
{ {
// special case: all codelengths are identical
c = getbits(nbit); c = getbits(nbit);
for(i = 0; i < nn; i++) pt_len[i] = 0; for(i = 0; i < nn; i++) // zero out lengths
for(i = 0; i < 256; i++) pt_table[i] = c; pt_len[i] = 0;
for(i = 0; i < 256; i++) // prefixtable always returns 'c'
pt_table[i] = c;
} }
else else
{ {
// 2) read code lengths one by one
i = 0; i = 0;
while(i < n) while(i < n)
{ {
// peek top 3 bits of bitbuf to guess small lengths
c = bitbuf >> (BITBUFSIZ - 3); c = bitbuf >> (BITBUFSIZ - 3);
if(c == 7) if(c == 7)
{ {
mask = (unsigned)1 << (BITBUFSIZ - 1 - 3); // if all three bits are 1, count additional ones
mask = 1U << (BITBUFSIZ - 1 - 3);
while(mask & bitbuf) while(mask & bitbuf)
{ {
mask >>= 1;
c++; c++;
mask >>= 1;
} }
} }
fillbuf((c < 7) ? 3 : c - 3); // consume the actual length bits
fillbuf((c < 7) ? 3 : (c - 3));
pt_len[i++] = c; pt_len[i++] = c;
// at special index, read a small run of zeros
if(i == i_special) if(i == i_special)
{ {
c = getbits(2); c = getbits(2);
while(--c >= 0) pt_len[i++] = 0; while(--c >= 0 && i < nn) pt_len[i++] = 0;
} }
} }
// any remaining symbols get codelength zero
while(i < nn) pt_len[i++] = 0; while(i < nn) pt_len[i++] = 0;
// build fast lookup table from lengths
make_table(nn, pt_len, 8, pt_table); make_table(nn, pt_len, 8, pt_table);
} }
} }
static void read_c_len() /***** decoding helper: read literal/length codelengths *****/
static void read_c_len(void)
{ {
int i, c, n; int i, c, n;
uint32_t mask; uint32_t mask;
// 1) how many literal codes?
n = getbits(CBIT); n = getbits(CBIT);
if(n == 0) if(n == 0)
{ {
// all codelengths identical
c = getbits(CBIT); c = getbits(CBIT);
for(i = 0; i < NC; i++) c_len[i] = 0; for(i = 0; i < NC; i++) c_len[i] = 0;
for(i = 0; i < 4096; i++) c_table[i] = c; for(i = 0; i < 4096; i++) c_table[i] = c;
} }
else else
{ {
// 2) read each code length via prefixtree
i = 0; i = 0;
while(i < n) while(i < n)
{ {
// lookup next symbol in prefixtable
c = pt_table[bitbuf >> (BITBUFSIZ - 8)]; c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
if(c >= NT) if(c >= NT)
{ {
mask = (unsigned)1 << (BITBUFSIZ - 1 - 8); // if prefix code is non-leaf, walk tree
mask = 1U << (BITBUFSIZ - 1 - 8);
do { do {
if(bitbuf & mask) c = (bitbuf & mask) ? right[c] : left[c];
c = right[c];
else
c = left[c];
mask >>= 1; mask >>= 1;
} while(c >= NT); } while(c >= NT);
} }
// consume codelength bits
fillbuf(pt_len[c]); fillbuf(pt_len[c]);
// c ≤ 2: run-length encoding of zeros
if(c <= 2) if(c <= 2)
{ {
if(c == 0) if(c == 0)
@@ -113,75 +151,94 @@ static void read_c_len()
c = getbits(4) + 3; c = getbits(4) + 3;
else else
c = getbits(CBIT) + 20; c = getbits(CBIT) + 20;
while(--c >= 0) c_len[i++] = 0; while(--c >= 0 && i < NC) c_len[i++] = 0;
} }
else else
c_len[i++] = c - 2; {
// real code-length = c2
c_len[i++] = (uint8_t)(c - 2);
} }
}
// fill rest with zero lengths
while(i < NC) c_len[i++] = 0; while(i < NC) c_len[i++] = 0;
// build fast lookup for literal/length codes
make_table(NC, c_len, 12, c_table); make_table(NC, c_len, 12, c_table);
} }
} }
uint32_t decode_c() /***** decode next literal/length symbol or end-of-block *****/
uint32_t decode_c(void)
{ {
uint32_t j, mask; uint32_t j, mask;
// if starting a new block, read its header
if(blocksize == 0) if(blocksize == 0)
{ {
blocksize = getbits(16); blocksize = getbits(16); // block size = number of symbols
if(blocksize == 0) if(blocksize == 0)
{ { // zero block → end of data
#if 0
(void) fprintf(stderr, "block size = 0, decoded\n"); /* debug */
#endif
decoded = 1; decoded = 1;
return 0; return 0;
} }
// read three Huffman trees for this block:
// 1) code-length codes for literal tree (NT,TBIT,3)
read_pt_len(NT, TBIT, 3); read_pt_len(NT, TBIT, 3);
// 2) literal/length tree lengths (CBIT)
read_c_len(); read_c_len();
// 3) prefix-tree lengths for positions (NP,PBIT,-1)
read_pt_len(NP, PBIT, -1); read_pt_len(NP, PBIT, -1);
} }
// consume one symbol from this block
blocksize--; blocksize--;
// fast table lookup: top 12 bits
j = c_table[bitbuf >> (BITBUFSIZ - 12)]; j = c_table[bitbuf >> (BITBUFSIZ - 12)];
if(j >= NC) if(j >= NC)
{ {
mask = (unsigned)1 << (BITBUFSIZ - 1 - 12); // need to walk tree if overflow
mask = 1U << (BITBUFSIZ - 1 - 12);
do { do {
if(bitbuf & mask) j = (bitbuf & mask) ? right[j] : left[j];
j = right[j];
else
j = left[j];
mask >>= 1; mask >>= 1;
} while(j >= NC); } while(j >= NC);
} }
// remove js code length bits from bitbuf
fillbuf(c_len[j]); fillbuf(c_len[j]);
return j; return j;
} }
uint32_t decode_p() /***** decode match-position extra bits *****/
uint32_t decode_p(void)
{ {
uint32_t j, mask; uint32_t j, mask;
// fast table lookup: top 8 bits
j = pt_table[bitbuf >> (BITBUFSIZ - 8)]; j = pt_table[bitbuf >> (BITBUFSIZ - 8)];
if(j >= NP) if(j >= NP)
{ {
mask = (unsigned)1 << (BITBUFSIZ - 1 - 8); // tree walk for long codes
mask = 1U << (BITBUFSIZ - 1 - 8);
do { do {
if(bitbuf & mask) j = (bitbuf & mask) ? right[j] : left[j];
j = right[j];
else
j = left[j];
mask >>= 1; mask >>= 1;
} while(j >= NP); } while(j >= NP);
} }
// consume prefix bits
fillbuf(pt_len[j]); fillbuf(pt_len[j]);
if(j != 0) j = ((unsigned)1 << (j - 1)) + getbits((int)(j - 1));
// if non-zero, read extra bits to form full position
if(j != 0) j = (1U << (j - 1)) + getbits((int)(j - 1));
return j; return j;
} }
void huf_decode_start() /***** start a new Huffman decode session *****/
void huf_decode_start(void)
{ {
init_getbits(); init_getbits(); // reset bit buffer & subbitbuf state
blocksize = 0; blocksize = 0; // force reading a fresh block header
} }

112
zoo/io.c
View File

@@ -3,103 +3,127 @@
/*********************************************************** /***********************************************************
io.c -- input/output (modified for in-memory I/O) io.c -- input/output (modified for in-memory I/O)
Adapted from "ar" archiver written by Haruhiko Okumura. Adapted from Haruhiko Okumuras “ar” archiver.
This version reads compressed bytes from an input buffer This version feeds compressed bytes from a memory buffer
via mem_getc() and writes output bytes to a buffer via (via mem_getc()) and writes decompressed output to a buffer
mem_putc(), removing all FILE* dependencies for decompression. (via mem_putc()), eliminating FILE* dependencies.
Modified for in-memory decompression by Natalia Portillo, 2025
***********************************************************/ ***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include <limits.h> #include <limits.h> // Provides CHAR_BIT for bit-width operations
#include "ar.h" #include "ar.h" // Archive format constants (e.g., CODE_BIT, NC)
#include "lzh.h" #include "lh5.h" // Declarations for mem_getc(), mem_putc(), buffer state
#include "lzh.h" // LZH algorithm constants (e.g., BITBUFSIZ, DICSIZ)
#include "lh5.h" /* mem_getc(), mem_putc(), in_ptr/in_left, out_ptr/out_left */ //-----------------------------------------------------------------------------
// Global bit-I/O state
//-----------------------------------------------------------------------------
uint16_t bitbuf; uint16_t bitbuf; // Accumulates bits shifted in from the input stream
int unpackable; int unpackable; // Unused in decompression here (was for encode error)
size_t compsize, origsize; // Byte counters (optional diagnostics; not used to gate decompression)
uint32_t subbitbuf; size_t compsize; // Count of output bytes produced (for compression mode)
int bitcount; size_t origsize; // Count of input bytes consumed (for CRC in file I/O)
uint32_t subbitbuf; // Holds the last byte fetched; bits are consumed from here
int bitcount; // How many valid bits remain in subbitbuf
/* //-----------------------------------------------------------------------------
* fillbuf(n) -- shift bitbuf left by n bits and read in n new bits // fillbuf(n)
* now reads bytes directly from in-memory input buffer // Shift the global bitbuf left by n bits, then read in n new bits
*/ // from the input buffer (in-memory) to replenish bitbuf.
//-----------------------------------------------------------------------------
void fillbuf(int n) /* Shift bitbuf n bits left, read n bits */ void fillbuf(int n) /* Shift bitbuf n bits left, read n bits */
{ {
// Make room for n bits
bitbuf <<= n; bitbuf <<= n;
// While we still need more bits than we have in subbitbuf...
while(n > bitcount) while(n > bitcount)
{ {
// Pull any remaining bits from subbitbuf into bitbuf
bitbuf |= subbitbuf << (n -= bitcount); bitbuf |= subbitbuf << (n -= bitcount);
/* fetch next compressed byte from in_buf */ // Fetch the next compressed byte from input memory
{ {
int c = mem_getc(); int c = mem_getc(); // read one byte or 0 at EOF
subbitbuf = (c == EOF ? 0 : (uint8_t)c); subbitbuf = (c == EOF ? 0 : (uint8_t)c);
} }
// Reset bitcount: a full new byte is available
bitcount = CHAR_BIT; bitcount = CHAR_BIT;
} }
// Finally, consume the last n bits from subbitbuf into bitbuf
bitbuf |= subbitbuf >> (bitcount -= n); bitbuf |= subbitbuf >> (bitcount -= n);
} }
/* //-----------------------------------------------------------------------------
* getbits(n) -- return next n bits from the bit buffer // getbits(n)
*/ // Return the next n bits from bitbuf (highest-order bits), then
// call fillbuf(n) to replace them. Useful for reading variable-length codes.
//-----------------------------------------------------------------------------
uint32_t getbits(int n) uint32_t getbits(int n)
{ {
uint32_t x = bitbuf >> (BITBUFSIZ - n); uint32_t x = bitbuf >> (BITBUFSIZ - n); // extract top n bits
fillbuf(n); fillbuf(n); // replenish bitbuf for future reads
return x; return x;
} }
/* //-----------------------------------------------------------------------------
* putbits(n,x) -- write the lowest n bits of x to the bit buffer // putbits(n, x)
* now writes bytes directly to in-memory output buffer // Write the lowest n bits of x into the output buffer, packing them
*/ // into bytes via subbitbuf/bitcount and sending full bytes out
// with mem_putc(). Used by the encoder; kept here for completeness.
//-----------------------------------------------------------------------------
void putbits(int n, uint32_t x) /* Write rightmost n bits of x */ void putbits(int n, uint32_t x) /* Write rightmost n bits of x */
{ {
// If we have enough room in subbitbuf, just pack the bits
if(n < bitcount) { subbitbuf |= x << (bitcount -= n); } if(n < bitcount) { subbitbuf |= x << (bitcount -= n); }
else else
{ {
/* output first byte */ // Output the first full byte when subbitbuf fills
{ {
int w = (int)(subbitbuf | (x >> (n -= bitcount))); int w = (int)(subbitbuf | (x >> (n -= bitcount)));
mem_putc(w); mem_putc(w);
compsize++; compsize++; // increment output counter (for compression)
} }
// If remaining bits don't fill a full byte, stash them
if(n < CHAR_BIT) { subbitbuf = x << (bitcount = CHAR_BIT - n); } if(n < CHAR_BIT) { subbitbuf = x << (bitcount = CHAR_BIT - n); }
else else
{ {
/* output second byte */ // Otherwise, flush a second full byte
{ {
int w2 = (int)(x >> (n - CHAR_BIT)); int w2 = (int)(x >> (n - CHAR_BIT));
mem_putc(w2); mem_putc(w2);
compsize++; compsize++;
} }
// And stash any leftover bits beyond two bytes
subbitbuf = x << (bitcount = 2 * CHAR_BIT - n); subbitbuf = x << (bitcount = 2 * CHAR_BIT - n);
} }
} }
} }
/* //-----------------------------------------------------------------------------
* init_getbits -- initialize bit reader state // init_getbits()
*/ // Reset the bit-reader state so that fillbuf() will load fresh bits
// from the start of the input buffer.
//-----------------------------------------------------------------------------
void init_getbits() void init_getbits()
{ {
bitbuf = 0; bitbuf = 0; // clear accumulated bits
subbitbuf = 0; subbitbuf = 0; // no pending byte
bitcount = 0; bitcount = 0; // no bits available
fillbuf(BITBUFSIZ); fillbuf(BITBUFSIZ); // pre-load the bit buffer fully
} }
/* //-----------------------------------------------------------------------------
* init_putbits -- initialize bit writer state // init_putbits()
*/ // Reset the bit-writer state so subsequent putbits() calls start fresh.
//-----------------------------------------------------------------------------
void init_putbits() void init_putbits()
{ {
bitcount = CHAR_BIT; bitcount = CHAR_BIT; // subbitbuf is empty but ready for CHAR_BIT bits
subbitbuf = 0; subbitbuf = 0; // clear any leftover byte data
} }

View File

@@ -3,75 +3,128 @@
/*********************************************************** /***********************************************************
maketbl.c -- make table for decoding maketbl.c -- make table for decoding
Adapted from "ar" archiver written by Haruhiko Okumura. Builds a fast lookup table + fallback tree for Huffman
codes given code lengths. Used by decode_c() to map
input bit patterns to symbols efficiently.
Adapted from Haruhiko Okumuras “ar” archiver.
Modified for in-memory decompression by Natalia Portillo, 2025
***********************************************************/ ***********************************************************/
// Modified for in-memory decompression by Natalia Portillo, 2025
#include "ar.h" #include <stdio.h>
#include "lzh.h" #include "ar.h" // provides NC, CODE_BIT, etc.
#include "lzh.h" // provides BITBUFSIZ
/*
* make_table(nchar, bitlen, tablebits, table):
*
* nchar = number of symbols
* bitlen[] = array of code lengths for each symbol [0..nchar-1]
* tablebits = number of bits for fast direct lookup
* table[] = output table of size (1<<tablebits), entries are:
* - symbol index if code length ≤ tablebits
* - zero or tree node index to follow for longer codes
*
* Algorithm steps:
* 1) Count how many codes of each length (count[1..16]).
* 2) Compute 'start' offsets for each length in a 16-bit code space.
* 3) Normalize starts to 'tablebits' prefix domain, build 'weight'.
* 4) Fill direct-mapped entries for short codes.
* 5) Build binary tree (using left[]/right[]) for codes longer than tablebits.
*/
void make_table(int nchar, uint8_t *bitlen, int tablebits, uint16_t *table) void make_table(int nchar, uint8_t *bitlen, int tablebits, uint16_t *table)
{ {
uint16_t count[17], weight[17], start[18], *p; uint16_t count[17]; // count[L] = number of symbols with length L
uint32_t i, k, len, ch, jutbits, avail, nextcode, mask; uint16_t weight[17]; // weight[L] = step size in prefix domain for length L
uint16_t start[18]; // start[L] = base code for length L in 16-bit space
uint16_t *p; // pointer into 'table' or tree
uint32_t i, k, len, ch;
uint32_t jutbits; // bits to drop when mapping into tablebits
uint32_t avail; // next free node index for left[]/right[] tree
uint32_t nextcode; // end-of-range code for current length
uint32_t mask; // bitmask for tree insertion
// 1) Zero counts, then tally code-lengths
for(i = 1; i <= 16; i++) count[i] = 0; for(i = 1; i <= 16; i++) count[i] = 0;
for(i = 0; i < nchar; i++) count[bitlen[i]]++; for(i = 0; i < (uint32_t)nchar; i++) count[bitlen[i]]++;
// 2) Compute cumulative start positions in the 16-bit code space
start[1] = 0; start[1] = 0;
for(i = 1; i <= 16; i++) start[i + 1] = start[i] + (count[i] << (16 - i)); for(i = 1; i <= 16; i++) start[i + 1] = start[i] + (count[i] << (16 - i));
if(start[17] != (uint16_t)((unsigned)1 << 16)) fprintf(stderr, "Bad decode table\n");
// Validate: sum of all codes must fill 16-bit range
if(start[17] != (uint16_t)(1U << 16)) fprintf(stderr, "make_table: Bad decode table\n");
// Prepare for mapping into tablebits-bit table
jutbits = 16 - tablebits; jutbits = 16 - tablebits;
for(i = 1; i <= tablebits; i++) for(i = 1; i <= (uint32_t)tablebits; i++)
{ {
// Shrink start[i] into prefix domain
start[i] >>= jutbits; start[i] >>= jutbits;
weight[i] = (unsigned)1 << (tablebits - i); // Weight = 2^(tablebits - i)
} weight[i] = (uint16_t)(1U << (tablebits - i));
while(i <= 16)
{
weight[i] = (unsigned)1 << (16 - i);
i++;
} }
// For lengths > tablebits, weight = 2^(16 - length)
for(; i <= 16; i++) weight[i] = (uint16_t)(1U << (16 - i));
// 3) Clear any unused table slots between last short code and end
i = start[tablebits + 1] >> jutbits; i = start[tablebits + 1] >> jutbits;
if(i != (uint16_t)((unsigned)1 << 16)) if(i != (uint16_t)(1U << tablebits))
{ {
k = 1 << tablebits; k = 1U << tablebits;
while(i != k) table[i++] = 0; while(i < k) table[i++] = 0;
} }
// Initialize tree node index after the direct table entries
avail = nchar; avail = nchar;
mask = (unsigned)1 << (15 - tablebits); // Mask for inspecting bits when building tree
for(ch = 0; ch < nchar; ch++) mask = 1U << (15 - tablebits);
// 4) For each symbol, place its codes in table or tree
for(ch = 0; ch < (uint32_t)nchar; ch++)
{ {
if((len = bitlen[ch]) == 0) continue; len = bitlen[ch];
if(len == 0) continue; // skip symbols with no code
// Next code range = [start[len], start[len]+weight[len])
nextcode = start[len] + weight[len]; nextcode = start[len] + weight[len];
if(len <= tablebits) if(len <= tablebits)
{ {
for(i = start[len]; i < nextcode; i++) table[i] = ch; // Direct mapping: fill all table slots in this range
for(k = start[len]; k < nextcode; k++) table[k] = (uint16_t)ch;
} }
else else
{ {
// Build or extend tree for longer codes
// Start at table index for this prefix
k = start[len]; k = start[len];
p = &table[k >> jutbits]; p = &table[k >> jutbits];
i = len - tablebits; // Number of extra bits beyond tablebits
while(i != 0) uint32_t extra = len - tablebits;
// Walk/construct tree nodes bit by bit
while(extra-- > 0)
{ {
if(*p == 0) if(*p == 0)
{ {
right[avail] = left[avail] = 0; // allocate a new node for left[]/right[]
*p = avail++; left[avail] = right[avail] = 0;
*p = (uint16_t)avail++;
} }
// branch left or right based on current code bit
if(k & mask) if(k & mask)
p = &right[*p]; p = &right[*p];
else else
p = &left[*p]; p = &left[*p];
// shift to next bit in code
k <<= 1; k <<= 1;
i--;
} }
*p = ch; // At leaf: assign symbol
*p = (uint16_t)ch;
} }
// Advance start[len] for next code of same length
start[len] = nextcode; start[len] = nextcode;
} }
} }