Added comments from Copilot.

2025-12-16 19:24:31 +00:00 · 2025-08-26 02:06:19 +01:00
parent a336ce953e
commit 750df1cca9
4 changed files with 322 additions and 155 deletions
--- a/zoo/decode.c
+++ b/zoo/decode.c
@@ -3,63 +3,96 @@
 /***********************************************************
    decode.c
-Adapted from "ar" archiver written by Haruhiko Okumura.
+    Adapted from Haruhiko Okumura’s “ar” archiver. This
    version has been modified in 2025 by Natalia Portillo
    for in-memory decompression.
 ***********************************************************/
 // Modified for in-memory decompression by Natalia Portillo, 2025
-#include <limits.h>
+#include <limits.h>  // for UCHAR_MAX
-#include <stdint.h>
+#include <stdint.h>  // for fixed-width integer types
-#include "ar.h"
+#include "ar.h"   // archive format constants
-#include "lzh.h"
+#include "lzh.h"  // LZH-specific constants (DICSIZ, THRESHOLD, etc.)
-extern int decoded; /* from huf.c */
+extern int decoded;  // flag set by decode_c() when end-of-stream is reached
-static int j; /* remaining bytes to copy */
+static int j;  // number of literal/copy runs remaining from a match
 /*
 * decode_start()
 *
 * Prepare the decoder for a new file:
 * - Initialize the Huffman bitstream (via huf_decode_start())
 * - Reset the sliding-window copy counter `j`
 * - Clear the end-of-data flag `decoded`
 */
 void decode_start()
 {
-    huf_decode_start();
+    huf_decode_start();  // reset bit-reader state
-    j       = 0;
+    j       = 0;         // no pending copy runs yet
-    decoded = 0;
+    decoded = 0;         // not yet at end-of-stream
 }
 /*
-decodes; returns no. of chars decoded
+ * decode(count, buffer)
-*/
+ *
-
+ * Decode up to `count` bytes (usually DICSIZ) into `buffer[]`.
 * Returns the actual number of bytes written, or 0 if `decoded` is set.
 *
 * Sliding‐window logic:
 * 1. If `j` > 0, we are in the middle of copying a previous match:
 *    - Copy one byte from `buffer[i]` into `buffer[r]`
 *    - Advance `i` (circular within DICSIZ) and `r`
 *    - Decrement `j` and repeat until `j` = 0 or `r` = count
 * 2. Otherwise, fetch the next symbol `c = decode_c()`:
 *    - If `c <= UCHAR_MAX`, it’s a literal byte: emit it directly
 *    - Else it’s a match:
 *        • compute `j = match_length = c - (UCHAR_MAX + 1 - THRESHOLD)`
 *        • compute `i = (r - match_offset - 1) mod DICSIZ`,
 *          where match_offset = decode_p()
 *        • enter copy loop from step 1
 */
 int decode(uint32_t count, uint8_t *buffer)
 /* The calling function must keep the number of
   bytes to be processed.  This function decodes
   either 'count' bytes or 'DICSIZ' bytes, whichever
   is smaller, into the array 'buffer[]' of size
   'DICSIZ' or more.
   Call decode_start() once for each new file
   before calling this function. */
 {
-    static uint32_t i;
+    static uint32_t i;  // sliding-window read index (circular)
-    uint32_t        r, c;
+    uint32_t        r;  // write position in buffer
    uint32_t        c;  // symbol or match code
    r = 0;
    // Step 1: finish any pending copy from a previous match
    while(--j >= 0)
    {
-        buffer[r] = buffer[i];
+        buffer[r] = buffer[i];               // copy one byte from history
-        i         = (i + 1) & (DICSIZ - 1);
+        i         = (i + 1) & (DICSIZ - 1);  // wrap index within [0, DICSIZ)
-        if(++r == count) return r;
+        if(++r == count)                     // if output buffer is full
            return r;                        // return bytes written so far
    }
    // Step 2: decode new symbols until end-of-stream or buffer full
    for(;;)
    {
-        c = decode_c();
+        c = decode_c();  // get next Huffman symbol
-        if(decoded) return r;
+        if(decoded)      // end-of-stream marker reached
            return r;    // no more bytes to decode
        if(c <= UCHAR_MAX)
        {
-            buffer[r] = c;
+            // Literal byte: emit it directly
            buffer[r] = (uint8_t)c;
            if(++r == count) return r;
        }
        else
        {
            // Match sequence: compute how many bytes to copy
            // j = match length
            j = c - (UCHAR_MAX + 1 - THRESHOLD);
            // i = start position in sliding window:
            //    current output position minus offset minus 1, wrapped
            i = (r - decode_p() - 1) & (DICSIZ - 1);
            // Copy `j` bytes from history
            while(--j >= 0)
            {
                buffer[r] = buffer[i];
--- a/zoo/huf.c
+++ b/zoo/huf.c
@@ -1,110 +1,148 @@
 /*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/
 /*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/
 /***********************************************************
-    huf.c -- static Huffman
+    huf.c -- static Huffman decoding
-Adapted from "ar" archiver written by Haruhiko Okumura.
+  Adapted from Haruhiko Okumura’s “ar” archiver.
  Modified in 2025 by Natalia Portillo for in-memory I/O.
 ***********************************************************/
 // Modified for in-memory decompression by Natalia Portillo, 2025
-#include <limits.h>
+#include <limits.h>  // UCHAR_MAX
-
+#include "ar.h"      // archive format constants
-#include "ar.h"
+#include "lzh.h"     // LZH algorithm constants (NC, DICBIT, CODE_BIT, etc.)
 #include "lzh.h"
 // NP = number of position codes = DICBIT+1
 // NT = number of tree codes   = CODE_BIT+3
 // PBIT, TBIT = bit‐width to transmit NP/NT in header
 #define NP   (DICBIT + 1)
 #define NT   (CODE_BIT + 3)
-#define PBIT 4 /* smallest integer such that (1U << PBIT) > NP */
+#define PBIT 4 /* smallest bits so (1<<PBIT)>NP */
-#define TBIT 5 /* smallest integer such that (1U << TBIT) > NT */
+#define TBIT 5 /* smallest bits so (1<<TBIT)>NT */
 // NPT = max(NP,NT) for prefix‐tree lengths
 #if NT > NP
 #define NPT NT
 #else
 #define NPT NP
 #endif
-static void read_pt_len(int, int, int);
+// forward declarations of helper routines
-static void read_c_len();
+static void read_pt_len(int nn, int nbit, int i_special);
 static void read_c_len(void);
-int decoded; /* for use in decode.c */
+int decoded;  // flag set when end-of-stream block is seen
 // Huffman tree storage arrays
 // left[]/right[] store the binary tree structure for fast decoding
 uint16_t left[2 * NC - 1], right[2 * NC - 1];
 // c_len[] = code lengths for literal/length tree (NC symbols)
 // pt_len[] = code lengths for position‐tree / prefix table (NPT symbols)
 // buf     = temporary buffer pointer used during encoding; unused in decode
 static uint8_t *buf, c_len[NC], pt_len[NPT];
 // size of buf if used, and remaining symbols in current block
 static uint32_t bufsiz = 0, blocksize;
 static uint16_t c_freq[2 * NC - 1], c_table[4096], c_code[NC], p_freq[2 * NP - 1], pt_table[256], pt_code[NPT],
    t_freq[2 * NT - 1];
-/***** decoding *****/
+// Frequency, code and decode‐table structures
 static uint16_t c_freq[2 * NC - 1],  // literal/length frequency counts
    c_table[4096],                   // fast‐lookup table for literal/length decoding
    c_code[NC],                      // canonical Huffman codes for literals
    p_freq[2 * NP - 1],              // position frequency counts
    pt_table[256],                   // prefix‐tree fast lookup (for reading code lengths)
    pt_code[NPT],                    // canonical codes for prefix‐tree
    t_freq[2 * NT - 1];              // temporary freq for tree of code‐length codes
 /***** decoding helper: read prefix‐tree code-lengths *****/
 static void read_pt_len(int nn, int nbit, int i_special)
 {
    int      i, c, n;
    uint32_t mask;
    // 1) read how many code‐lengths to consume
    n = getbits(nbit);
    if(n == 0)
    {
        // special case: all code‐lengths are identical
        c = getbits(nbit);
-        for(i = 0; i < nn; i++) pt_len[i] = 0;
+        for(i = 0; i < nn; i++)  // zero out lengths
-        for(i = 0; i < 256; i++) pt_table[i] = c;
+            pt_len[i] = 0;
        for(i = 0; i < 256; i++)  // prefix‐table always returns 'c'
            pt_table[i] = c;
    }
    else
    {
        // 2) read code lengths one by one
        i = 0;
        while(i < n)
        {
            // peek top 3 bits of bitbuf to guess small lengths
            c = bitbuf >> (BITBUFSIZ - 3);
            if(c == 7)
            {
-                mask = (unsigned)1 << (BITBUFSIZ - 1 - 3);
+                // if all three bits are 1, count additional ones
                mask = 1U << (BITBUFSIZ - 1 - 3);
                while(mask & bitbuf)
                {
                    mask >>= 1;
                    c++;
                    mask >>= 1;
                }
            }
-            fillbuf((c < 7) ? 3 : c - 3);
+            // consume the actual length bits
            fillbuf((c < 7) ? 3 : (c - 3));
            pt_len[i++] = c;
            // at special index, read a small run of zeros
            if(i == i_special)
            {
                c = getbits(2);
-                while(--c >= 0) pt_len[i++] = 0;
+                while(--c >= 0 && i < nn) pt_len[i++] = 0;
            }
        }
        // any remaining symbols get code‐length zero
        while(i < nn) pt_len[i++] = 0;
        // build fast lookup table from lengths
        make_table(nn, pt_len, 8, pt_table);
    }
 }
-static void read_c_len()
+/***** decoding helper: read literal/length code‐lengths *****/
 static void read_c_len(void)
 {
    int      i, c, n;
    uint32_t mask;
    // 1) how many literal codes?
    n = getbits(CBIT);
    if(n == 0)
    {
        // all code‐lengths identical
        c = getbits(CBIT);
        for(i = 0; i < NC; i++) c_len[i] = 0;
        for(i = 0; i < 4096; i++) c_table[i] = c;
    }
    else
    {
        // 2) read each code length via prefix‐tree
        i = 0;
        while(i < n)
        {
            // lookup next symbol in prefix‐table
            c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
            if(c >= NT)
            {
-                mask = (unsigned)1 << (BITBUFSIZ - 1 - 8);
+                // if prefix code is non-leaf, walk tree
                mask = 1U << (BITBUFSIZ - 1 - 8);
                do {
-                    if(bitbuf & mask)
+                    c = (bitbuf & mask) ? right[c] : left[c];
                        c = right[c];
                    else
                        c = left[c];
                    mask >>= 1;
                } while(c >= NT);
            }
            // consume code‐length bits
            fillbuf(pt_len[c]);
            // c ≤ 2: run-length encoding of zeros
            if(c <= 2)
            {
                if(c == 0)
@@ -113,75 +151,94 @@ static void read_c_len()
                    c = getbits(4) + 3;
                else
                    c = getbits(CBIT) + 20;
-                while(--c >= 0) c_len[i++] = 0;
+                while(--c >= 0 && i < NC) c_len[i++] = 0;
            }
            else
-                c_len[i++] = c - 2;
+            {
                // real code-length = c−2
                c_len[i++] = (uint8_t)(c - 2);
            }
        }
        // fill rest with zero lengths
        while(i < NC) c_len[i++] = 0;
        // build fast lookup for literal/length codes
        make_table(NC, c_len, 12, c_table);
    }
 }
-uint32_t decode_c()
+/***** decode next literal/length symbol or end-of-block *****/
 uint32_t decode_c(void)
 {
    uint32_t j, mask;
    // if starting a new block, read its header
    if(blocksize == 0)
    {
-        blocksize = getbits(16);
+        blocksize = getbits(16);  // block size = number of symbols
        if(blocksize == 0)
-        {
+        {  // zero block → end of data
 #if 0
 			(void) fprintf(stderr, "block size = 0, decoded\n");  /* debug */
 #endif
            decoded = 1;
            return 0;
        }
        // read three Huffman trees for this block:
        //   1) code-length codes for literal tree  (NT,TBIT,3)
        read_pt_len(NT, TBIT, 3);
        //   2) literal/length tree lengths         (CBIT)
        read_c_len();
        //   3) prefix-tree lengths for positions    (NP,PBIT,-1)
        read_pt_len(NP, PBIT, -1);
    }
    // consume one symbol from this block
    blocksize--;
    // fast table lookup: top 12 bits
    j = c_table[bitbuf >> (BITBUFSIZ - 12)];
    if(j >= NC)
    {
-        mask = (unsigned)1 << (BITBUFSIZ - 1 - 12);
+        // need to walk tree if overflow
        mask = 1U << (BITBUFSIZ - 1 - 12);
        do {
-            if(bitbuf & mask)
+            j = (bitbuf & mask) ? right[j] : left[j];
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while(j >= NC);
    }
    // remove j’s code length bits from bitbuf
    fillbuf(c_len[j]);
    return j;
 }
-uint32_t decode_p()
+/***** decode match-position extra bits *****/
 uint32_t decode_p(void)
 {
    uint32_t j, mask;
    // fast table lookup: top 8 bits
    j = pt_table[bitbuf >> (BITBUFSIZ - 8)];
    if(j >= NP)
    {
-        mask = (unsigned)1 << (BITBUFSIZ - 1 - 8);
+        // tree walk for long codes
        mask = 1U << (BITBUFSIZ - 1 - 8);
        do {
-            if(bitbuf & mask)
+            j = (bitbuf & mask) ? right[j] : left[j];
                j = right[j];
            else
                j = left[j];
            mask >>= 1;
        } while(j >= NP);
    }
    // consume prefix bits
    fillbuf(pt_len[j]);
-    if(j != 0) j = ((unsigned)1 << (j - 1)) + getbits((int)(j - 1));
+
    // if non-zero, read extra bits to form full position
    if(j != 0) j = (1U << (j - 1)) + getbits((int)(j - 1));
    return j;
 }
-void huf_decode_start()
+/***** start a new Huffman decode session *****/
 void huf_decode_start(void)
 {
-    init_getbits();
+    init_getbits();  // reset bit buffer & subbitbuf state
-    blocksize = 0;
+    blocksize = 0;   // force reading a fresh block header
 }
--- a/zoo/io.c
+++ b/zoo/io.c
@@ -3,103 +3,127 @@
 /***********************************************************
    io.c -- input/output (modified for in-memory I/O)
-Adapted from "ar" archiver written by Haruhiko Okumura.
+    Adapted from Haruhiko Okumura’s “ar” archiver.
-This version reads compressed bytes from an input buffer
+    This version feeds compressed bytes from a memory buffer
-via mem_getc() and writes output bytes to a buffer via
+    (via mem_getc()) and writes decompressed output to a buffer
-mem_putc(), removing all FILE* dependencies for decompression.
+    (via mem_putc()), eliminating FILE* dependencies.
    Modified for in-memory decompression by Natalia Portillo, 2025
 ***********************************************************/
 // Modified for in-memory decompression by Natalia Portillo, 2025
-#include <limits.h>
+#include <limits.h>  // Provides CHAR_BIT for bit-width operations
-#include "ar.h"
+#include "ar.h"   // Archive format constants (e.g., CODE_BIT, NC)
-#include "lzh.h"
+#include "lh5.h"  // Declarations for mem_getc(), mem_putc(), buffer state
 #include "lzh.h"  // LZH algorithm constants (e.g., BITBUFSIZ, DICSIZ)
-#include "lh5.h" /* mem_getc(), mem_putc(), in_ptr/in_left, out_ptr/out_left */
+//-----------------------------------------------------------------------------
 // Global bit-I/O state
 //-----------------------------------------------------------------------------
-uint16_t bitbuf;
+uint16_t bitbuf;      // Accumulates bits shifted in from the input stream
-int      unpackable;
+int      unpackable;  // Unused in decompression here (was for encode error)
-size_t   compsize, origsize;
+// Byte counters (optional diagnostics; not used to gate decompression)
-uint32_t subbitbuf;
+size_t   compsize;   // Count of output bytes produced (for compression mode)
-int      bitcount;
+size_t   origsize;   // Count of input bytes consumed (for CRC in file I/O)
 uint32_t subbitbuf;  // Holds the last byte fetched; bits are consumed from here
 int      bitcount;   // How many valid bits remain in subbitbuf
-/*
+//-----------------------------------------------------------------------------
- * fillbuf(n) -- shift bitbuf left by n bits and read in n new bits
+// fillbuf(n)
- * now reads bytes directly from in-memory input buffer
+//   Shift the global bitbuf left by n bits, then read in n new bits
- */
+//   from the input buffer (in-memory) to replenish bitbuf.
 //-----------------------------------------------------------------------------
 void fillbuf(int n) /* Shift bitbuf n bits left, read n bits */
 {
    // Make room for n bits
    bitbuf <<= n;
    // While we still need more bits than we have in subbitbuf...
    while(n > bitcount)
    {
        // Pull any remaining bits from subbitbuf into bitbuf
        bitbuf |= subbitbuf << (n -= bitcount);
-        /* fetch next compressed byte from in_buf */
+        // Fetch the next compressed byte from input memory
        {
-            int c     = mem_getc();
+            int c     = mem_getc();  // read one byte or 0 at EOF
            subbitbuf = (c == EOF ? 0 : (uint8_t)c);
        }
        // Reset bitcount: a full new byte is available
        bitcount = CHAR_BIT;
    }
    // Finally, consume the last n bits from subbitbuf into bitbuf
    bitbuf |= subbitbuf >> (bitcount -= n);
 }
-/*
+//-----------------------------------------------------------------------------
- * getbits(n) -- return next n bits from the bit buffer
+// getbits(n)
- */
+//   Return the next n bits from bitbuf (highest-order bits), then
 //   call fillbuf(n) to replace them. Useful for reading variable-length codes.
 //-----------------------------------------------------------------------------
 uint32_t getbits(int n)
 {
-    uint32_t x = bitbuf >> (BITBUFSIZ - n);
+    uint32_t x = bitbuf >> (BITBUFSIZ - n);  // extract top n bits
-    fillbuf(n);
+    fillbuf(n);                              // replenish bitbuf for future reads
    return x;
 }
-/*
+//-----------------------------------------------------------------------------
- * putbits(n,x) -- write the lowest n bits of x to the bit buffer
+// putbits(n, x)
- * now writes bytes directly to in-memory output buffer
+//   Write the lowest n bits of x into the output buffer, packing them
- */
+//   into bytes via subbitbuf/bitcount and sending full bytes out
 //   with mem_putc(). Used by the encoder; kept here for completeness.
 //-----------------------------------------------------------------------------
 void putbits(int n, uint32_t x) /* Write rightmost n bits of x */
 {
    // If we have enough room in subbitbuf, just pack the bits
    if(n < bitcount) { subbitbuf |= x << (bitcount -= n); }
    else
    {
-        /* output first byte */
+        // Output the first full byte when subbitbuf fills
        {
            int w = (int)(subbitbuf | (x >> (n -= bitcount)));
            mem_putc(w);
-            compsize++;
+            compsize++;  // increment output counter (for compression)
        }
        // If remaining bits don't fill a full byte, stash them
        if(n < CHAR_BIT) { subbitbuf = x << (bitcount = CHAR_BIT - n); }
        else
        {
-            /* output second byte */
+            // Otherwise, flush a second full byte
            {
                int w2 = (int)(x >> (n - CHAR_BIT));
                mem_putc(w2);
                compsize++;
            }
            // And stash any leftover bits beyond two bytes
            subbitbuf = x << (bitcount = 2 * CHAR_BIT - n);
        }
    }
 }
-/*
+//-----------------------------------------------------------------------------
- * init_getbits -- initialize bit reader state
+// init_getbits()
- */
+//   Reset the bit-reader state so that fillbuf() will load fresh bits
 //   from the start of the input buffer.
 //-----------------------------------------------------------------------------
 void init_getbits()
 {
-    bitbuf    = 0;
+    bitbuf    = 0;       // clear accumulated bits
-    subbitbuf = 0;
+    subbitbuf = 0;       // no pending byte
-    bitcount  = 0;
+    bitcount  = 0;       // no bits available
-    fillbuf(BITBUFSIZ);
+    fillbuf(BITBUFSIZ);  // pre-load the bit buffer fully
 }
-/*
+//-----------------------------------------------------------------------------
- * init_putbits -- initialize bit writer state
+// init_putbits()
- */
+//   Reset the bit-writer state so subsequent putbits() calls start fresh.
 //-----------------------------------------------------------------------------
 void init_putbits()
 {
-    bitcount  = CHAR_BIT;
+    bitcount  = CHAR_BIT;  // subbitbuf is empty but ready for CHAR_BIT bits
-    subbitbuf = 0;
+    subbitbuf = 0;         // clear any leftover byte data
 }
--- a/zoo/maketbl.c
+++ b/zoo/maketbl.c
@@ -3,75 +3,128 @@
 /***********************************************************
    maketbl.c -- make table for decoding
-Adapted from "ar" archiver written by Haruhiko Okumura.
+    Builds a fast lookup table + fallback tree for Huffman
    codes given code lengths.  Used by decode_c() to map
    input bit patterns to symbols efficiently.
    Adapted from Haruhiko Okumura’s “ar” archiver.
    Modified for in-memory decompression by Natalia Portillo, 2025
 ***********************************************************/
 // Modified for in-memory decompression by Natalia Portillo, 2025
-#include "ar.h"
+#include <stdio.h>
-#include "lzh.h"
+#include "ar.h"   // provides NC, CODE_BIT, etc.
 #include "lzh.h"  // provides BITBUFSIZ
 /*
 * make_table(nchar, bitlen, tablebits, table):
 *
 * nchar     = number of symbols
 * bitlen[]  = array of code lengths for each symbol [0..nchar-1]
 * tablebits = number of bits for fast direct lookup
 * table[]   = output table of size (1<<tablebits), entries are:
 *             - symbol index if code length ≤ tablebits
 *             - zero or tree node index to follow for longer codes
 *
 * Algorithm steps:
 *  1) Count how many codes of each length (count[1..16]).
 *  2) Compute 'start' offsets for each length in a 16-bit code space.
 *  3) Normalize starts to 'tablebits' prefix domain, build 'weight'.
 *  4) Fill direct-mapped entries for short codes.
 *  5) Build binary tree (using left[]/right[]) for codes longer than tablebits.
 */
 void make_table(int nchar, uint8_t *bitlen, int tablebits, uint16_t *table)
 {
-    uint16_t count[17], weight[17], start[18], *p;
+    uint16_t  count[17];   // count[L] = number of symbols with length L
-    uint32_t i, k, len, ch, jutbits, avail, nextcode, mask;
+    uint16_t  weight[17];  // weight[L] = step size in prefix domain for length L
    uint16_t  start[18];   // start[L] = base code for length L in 16-bit space
    uint16_t *p;           // pointer into 'table' or tree
    uint32_t  i, k, len, ch;
    uint32_t  jutbits;   // bits to drop when mapping into tablebits
    uint32_t  avail;     // next free node index for left[]/right[] tree
    uint32_t  nextcode;  // end-of-range code for current length
    uint32_t  mask;      // bitmask for tree insertion
    // 1) Zero counts, then tally code-lengths
    for(i = 1; i <= 16; i++) count[i] = 0;
-    for(i = 0; i < nchar; i++) count[bitlen[i]]++;
+    for(i = 0; i < (uint32_t)nchar; i++) count[bitlen[i]]++;
    // 2) Compute cumulative start positions in the 16-bit code space
    start[1] = 0;
    for(i = 1; i <= 16; i++) start[i + 1] = start[i] + (count[i] << (16 - i));
    if(start[17] != (uint16_t)((unsigned)1 << 16)) fprintf(stderr, "Bad decode table\n");
    // Validate: sum of all codes must fill 16-bit range
    if(start[17] != (uint16_t)(1U << 16)) fprintf(stderr, "make_table: Bad decode table\n");
    // Prepare for mapping into tablebits-bit table
    jutbits = 16 - tablebits;
-    for(i = 1; i <= tablebits; i++)
+    for(i = 1; i <= (uint32_t)tablebits; i++)
    {
        // Shrink start[i] into prefix domain
        start[i] >>= jutbits;
-        weight[i] = (unsigned)1 << (tablebits - i);
+        // Weight = 2^(tablebits - i)
-    }
+        weight[i] = (uint16_t)(1U << (tablebits - i));
    while(i <= 16)
    {
        weight[i] = (unsigned)1 << (16 - i);
        i++;
    }
    // For lengths > tablebits, weight = 2^(16 - length)
    for(; i <= 16; i++) weight[i] = (uint16_t)(1U << (16 - i));
    // 3) Clear any unused table slots between last short code and end
    i = start[tablebits + 1] >> jutbits;
-    if(i != (uint16_t)((unsigned)1 << 16))
+    if(i != (uint16_t)(1U << tablebits))
    {
-        k = 1 << tablebits;
+        k = 1U << tablebits;
-        while(i != k) table[i++] = 0;
+        while(i < k) table[i++] = 0;
    }
    // Initialize tree node index after the direct table entries
    avail = nchar;
-    mask  = (unsigned)1 << (15 - tablebits);
+    // Mask for inspecting bits when building tree
-    for(ch = 0; ch < nchar; ch++)
+    mask  = 1U << (15 - tablebits);
    // 4) For each symbol, place its codes in table or tree
    for(ch = 0; ch < (uint32_t)nchar; ch++)
    {
-        if((len = bitlen[ch]) == 0) continue;
+        len = bitlen[ch];
        if(len == 0) continue;  // skip symbols with no code
        // Next code range = [start[len], start[len]+weight[len])
        nextcode = start[len] + weight[len];
        if(len <= tablebits)
        {
-            for(i = start[len]; i < nextcode; i++) table[i] = ch;
+            // Direct mapping: fill all table slots in this range
            for(k = start[len]; k < nextcode; k++) table[k] = (uint16_t)ch;
        }
        else
        {
            // Build or extend tree for longer codes
            // Start at table index for this prefix
            k              = start[len];
            p              = &table[k >> jutbits];
-            i = len - tablebits;
+            // Number of extra bits beyond tablebits
-            while(i != 0)
+            uint32_t extra = len - tablebits;
            // Walk/construct tree nodes bit by bit
            while(extra-- > 0)
            {
                if(*p == 0)
                {
-                    right[avail] = left[avail] = 0;
+                    // allocate a new node for left[]/right[]
-                    *p                         = avail++;
+                    left[avail] = right[avail] = 0;
                    *p                         = (uint16_t)avail++;
                }
                // branch left or right based on current code bit
                if(k & mask)
                    p = &right[*p];
                else
                    p = &left[*p];
                // shift to next bit in code
                k <<= 1;
                i--;
            }
-            *p = ch;
+            // At leaf: assign symbol
            *p = (uint16_t)ch;
        }
        // Advance start[len] for next code of same length
        start[len] = nextcode;
    }
 }