mirror of
https://github.com/aaru-dps/Aaru.Compression.Native.git
synced 2025-12-16 19:24:31 +00:00
Added comments from Copilot.
This commit is contained in:
113
zoo/maketbl.c
113
zoo/maketbl.c
@@ -3,75 +3,128 @@
|
||||
/***********************************************************
|
||||
maketbl.c -- make table for decoding
|
||||
|
||||
Adapted from "ar" archiver written by Haruhiko Okumura.
|
||||
Builds a fast lookup table + fallback tree for Huffman
|
||||
codes given code lengths. Used by decode_c() to map
|
||||
input bit patterns to symbols efficiently.
|
||||
|
||||
Adapted from Haruhiko Okumura’s “ar” archiver.
|
||||
Modified for in-memory decompression by Natalia Portillo, 2025
|
||||
***********************************************************/
|
||||
// Modified for in-memory decompression by Natalia Portillo, 2025
|
||||
|
||||
#include "ar.h"
|
||||
#include "lzh.h"
|
||||
#include <stdio.h>
|
||||
#include "ar.h" // provides NC, CODE_BIT, etc.
|
||||
#include "lzh.h" // provides BITBUFSIZ
|
||||
|
||||
/*
|
||||
* make_table(nchar, bitlen, tablebits, table):
|
||||
*
|
||||
* nchar = number of symbols
|
||||
* bitlen[] = array of code lengths for each symbol [0..nchar-1]
|
||||
* tablebits = number of bits for fast direct lookup
|
||||
* table[] = output table of size (1<<tablebits), entries are:
|
||||
* - symbol index if code length ≤ tablebits
|
||||
* - zero or tree node index to follow for longer codes
|
||||
*
|
||||
* Algorithm steps:
|
||||
* 1) Count how many codes of each length (count[1..16]).
|
||||
* 2) Compute 'start' offsets for each length in a 16-bit code space.
|
||||
* 3) Normalize starts to 'tablebits' prefix domain, build 'weight'.
|
||||
* 4) Fill direct-mapped entries for short codes.
|
||||
* 5) Build binary tree (using left[]/right[]) for codes longer than tablebits.
|
||||
*/
|
||||
void make_table(int nchar, uint8_t *bitlen, int tablebits, uint16_t *table)
|
||||
{
|
||||
uint16_t count[17], weight[17], start[18], *p;
|
||||
uint32_t i, k, len, ch, jutbits, avail, nextcode, mask;
|
||||
uint16_t count[17]; // count[L] = number of symbols with length L
|
||||
uint16_t weight[17]; // weight[L] = step size in prefix domain for length L
|
||||
uint16_t start[18]; // start[L] = base code for length L in 16-bit space
|
||||
uint16_t *p; // pointer into 'table' or tree
|
||||
uint32_t i, k, len, ch;
|
||||
uint32_t jutbits; // bits to drop when mapping into tablebits
|
||||
uint32_t avail; // next free node index for left[]/right[] tree
|
||||
uint32_t nextcode; // end-of-range code for current length
|
||||
uint32_t mask; // bitmask for tree insertion
|
||||
|
||||
// 1) Zero counts, then tally code-lengths
|
||||
for(i = 1; i <= 16; i++) count[i] = 0;
|
||||
for(i = 0; i < nchar; i++) count[bitlen[i]]++;
|
||||
for(i = 0; i < (uint32_t)nchar; i++) count[bitlen[i]]++;
|
||||
|
||||
// 2) Compute cumulative start positions in the 16-bit code space
|
||||
start[1] = 0;
|
||||
for(i = 1; i <= 16; i++) start[i + 1] = start[i] + (count[i] << (16 - i));
|
||||
if(start[17] != (uint16_t)((unsigned)1 << 16)) fprintf(stderr, "Bad decode table\n");
|
||||
|
||||
// Validate: sum of all codes must fill 16-bit range
|
||||
if(start[17] != (uint16_t)(1U << 16)) fprintf(stderr, "make_table: Bad decode table\n");
|
||||
|
||||
// Prepare for mapping into tablebits-bit table
|
||||
jutbits = 16 - tablebits;
|
||||
for(i = 1; i <= tablebits; i++)
|
||||
for(i = 1; i <= (uint32_t)tablebits; i++)
|
||||
{
|
||||
// Shrink start[i] into prefix domain
|
||||
start[i] >>= jutbits;
|
||||
weight[i] = (unsigned)1 << (tablebits - i);
|
||||
}
|
||||
while(i <= 16)
|
||||
{
|
||||
weight[i] = (unsigned)1 << (16 - i);
|
||||
i++;
|
||||
// Weight = 2^(tablebits - i)
|
||||
weight[i] = (uint16_t)(1U << (tablebits - i));
|
||||
}
|
||||
// For lengths > tablebits, weight = 2^(16 - length)
|
||||
for(; i <= 16; i++) weight[i] = (uint16_t)(1U << (16 - i));
|
||||
|
||||
// 3) Clear any unused table slots between last short code and end
|
||||
i = start[tablebits + 1] >> jutbits;
|
||||
if(i != (uint16_t)((unsigned)1 << 16))
|
||||
if(i != (uint16_t)(1U << tablebits))
|
||||
{
|
||||
k = 1 << tablebits;
|
||||
while(i != k) table[i++] = 0;
|
||||
k = 1U << tablebits;
|
||||
while(i < k) table[i++] = 0;
|
||||
}
|
||||
|
||||
// Initialize tree node index after the direct table entries
|
||||
avail = nchar;
|
||||
mask = (unsigned)1 << (15 - tablebits);
|
||||
for(ch = 0; ch < nchar; ch++)
|
||||
// Mask for inspecting bits when building tree
|
||||
mask = 1U << (15 - tablebits);
|
||||
|
||||
// 4) For each symbol, place its codes in table or tree
|
||||
for(ch = 0; ch < (uint32_t)nchar; ch++)
|
||||
{
|
||||
if((len = bitlen[ch]) == 0) continue;
|
||||
len = bitlen[ch];
|
||||
if(len == 0) continue; // skip symbols with no code
|
||||
|
||||
// Next code range = [start[len], start[len]+weight[len])
|
||||
nextcode = start[len] + weight[len];
|
||||
|
||||
if(len <= tablebits)
|
||||
{
|
||||
for(i = start[len]; i < nextcode; i++) table[i] = ch;
|
||||
// Direct mapping: fill all table slots in this range
|
||||
for(k = start[len]; k < nextcode; k++) table[k] = (uint16_t)ch;
|
||||
}
|
||||
else
|
||||
{
|
||||
k = start[len];
|
||||
p = &table[k >> jutbits];
|
||||
i = len - tablebits;
|
||||
while(i != 0)
|
||||
// Build or extend tree for longer codes
|
||||
// Start at table index for this prefix
|
||||
k = start[len];
|
||||
p = &table[k >> jutbits];
|
||||
// Number of extra bits beyond tablebits
|
||||
uint32_t extra = len - tablebits;
|
||||
|
||||
// Walk/construct tree nodes bit by bit
|
||||
while(extra-- > 0)
|
||||
{
|
||||
if(*p == 0)
|
||||
{
|
||||
right[avail] = left[avail] = 0;
|
||||
*p = avail++;
|
||||
// allocate a new node for left[]/right[]
|
||||
left[avail] = right[avail] = 0;
|
||||
*p = (uint16_t)avail++;
|
||||
}
|
||||
// branch left or right based on current code bit
|
||||
if(k & mask)
|
||||
p = &right[*p];
|
||||
else
|
||||
p = &left[*p];
|
||||
|
||||
// shift to next bit in code
|
||||
k <<= 1;
|
||||
i--;
|
||||
}
|
||||
*p = ch;
|
||||
// At leaf: assign symbol
|
||||
*p = (uint16_t)ch;
|
||||
}
|
||||
// Advance start[len] for next code of same length
|
||||
start[len] = nextcode;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user