Update build script.

Fix missing includes.
Bump version to 6.0.0-alpha.11.3.
2025-12-16 19:24:31 +00:00 · 2025-09-02 11:31:54 +01:00 · 2025-09-02 11:31:26 +01:00 · 2025-09-02 10:20:30 +01:00 · 2025-09-02 10:19:51 +01:00 · 2025-09-02 03:52:35 +01:00
23 changed files with 5169 additions and 162 deletions
--- a/Aaru.Compression.Native.nuspec
+++ b/Aaru.Compression.Native.nuspec
@@ -2,7 +2,7 @@
 <package xmlns="http://schemas.microsoft.com/packaging/2010/07/nuspec.xsd">
    <metadata>
        <id>Aaru.Compression.Native</id>
-        <version>6.0.0-alpha.11.2</version>
+        <version>6.0.0-alpha.11.3</version>
        <description>C implementation of compression algorithms used by Aaru.</description>
        <authors>claunia</authors>
        <projectUrl>https://github.com/aaru-dps/Aaru.Compression.Native</projectUrl>
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -139,7 +139,11 @@ add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h a
            zoo/lh5.c
            zoo/lzh.h
            zoo/ar.h
-            zoo/maketbl.c)
+            zoo/maketbl.c
+            arc/pack.c
+            arc/squeeze.c
+            arc/crunch.c
+            arc/lzw.c)

 include(3rdparty/bzip2.cmake)
 include(3rdparty/flac.cmake)
--- a/arc/crunch.c
+++ b/arc/crunch.c
@@ -0,0 +1,295 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ * Copyright © 2018-2019 David Ryskalczyk
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "../library.h"
+
+#define FALSE   0
+#define TRUE    !FALSE
+#define TABSIZE 4096    // Size of the string table.
+#define NO_PRED 0xFFFF  // Indicates no predecessor in the string table.
+#define EMPTY   0xFFFF  // Indicates an empty stack.
+
+typedef unsigned char  u_char;
+typedef unsigned short u_short;
+
+// Entry in the string table.
+struct entry
+{
+    char    used;         // Is this entry in use?
+    u_char  follower;     // The character that follows the string.
+    u_short next;         // Next entry in a collision chain.
+    u_short predecessor;  // Code for the preceding string.
+};
+
+// Static variables for decompression state.
+static struct entry *string_tab;
+static u_char       *stack;
+static int           sp;
+
+// Buffer management variables.
+static const u_char *in_buf_ptr;
+static size_t        in_len_rem;
+static int           inflag;
+
+// Pointer to the hash function to use.
+static u_short (*h)(u_short, u_char);
+
+// Original hash function from ARC.
+static u_short oldh(u_short pred, u_char foll)
+{
+    long local;
+    local = ((pred + foll) | 0x0800) & 0xFFFF;
+    local *= local;
+    return (local >> 6) & 0x0FFF;
+}
+
+// Newer, faster hash function.
+static u_short newh(u_short pred, u_char foll) { return (((pred + foll) & 0xFFFF) * 15073) & 0xFFF; }
+
+// Finds the end of a collision list.
+static u_short eolist(u_short index)
+{
+    int temp;
+    while((temp = string_tab[index].next)) index = temp;
+    return index;
+}
+
+// Hashes a string to find its position in the table.
+static u_short hash_it(u_short pred, u_char foll)
+{
+    u_short       local, tempnext;
+    struct entry *ep;
+
+    local = (*h)(pred, foll);
+
+    if(!string_tab[local].used)
+        return local;
+    else
+    {
+        local    = eolist(local);
+        tempnext = (local + 101) & 0x0FFF;
+        ep       = &string_tab[tempnext];
+
+        while(ep->used)
+        {
+            if(++tempnext == TABSIZE)
+            {
+                tempnext = 0;
+                ep       = string_tab;
+            }
+            else
+                ++ep;
+        }
+        string_tab[local].next = tempnext;
+        return tempnext;
+    }
+}
+
+// Adds a new string to the table.
+static void upd_tab(u_short pred, u_short foll)
+{
+    struct entry *ep;
+    ep              = &string_tab[hash_it(pred, foll)];
+    ep->used        = TRUE;
+    ep->next        = 0;
+    ep->predecessor = pred;
+    ep->follower    = foll;
+}
+
+// Initializes the string table.
+static void init_tab()
+{
+    memset((char *)string_tab, 0, TABSIZE * sizeof(struct entry));
+    for(unsigned int i = 0; i < 256; i++) upd_tab(NO_PRED, i);
+}
+
+// Reads a 12-bit code from the input buffer.
+static int get_code()
+{
+    int code;
+    if(in_len_rem < 2) return -1;
+
+    if((inflag ^= 1))
+    {
+        code = (*in_buf_ptr++ << 4);
+        code |= (*in_buf_ptr >> 4);
+        in_len_rem--;
+    }
+    else
+    {
+        code = (*in_buf_ptr++ & 0x0f) << 8;
+        code |= (*in_buf_ptr++);
+        in_len_rem -= 2;
+    }
+    return code;
+}
+
+// Pushes a character onto the stack.
+#define PUSH(c)                        \
+    do {                               \
+        stack[sp] = ((char)(c));       \
+        if(++sp >= TABSIZE) return -1; \
+    } while(0)
+
+// Pops a character from the stack.
+#define POP() ((sp > 0) ? (int)stack[--sp] : EMPTY)
+
+// Internal crunch decompression logic.
+static int arc_decompress_crunch_internal(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                          size_t *out_len, int new_hash)
+{
+    // Basic validation of pointers.
+    if(!in_buf || !out_buf || !out_len) { return -1; }
+
+    // Allocate memory for tables.
+    string_tab = (struct entry *)malloc(TABSIZE * sizeof(struct entry));
+    stack      = (u_char *)malloc(TABSIZE * sizeof(u_char));
+    if(!string_tab || !stack)
+    {
+        if(string_tab) free(string_tab);
+        if(stack) free(stack);
+        return -1;
+    }
+
+    // Select the hash function.
+    if(new_hash)
+        h = newh;
+    else
+        h = oldh;
+
+    // Initialize state.
+    sp = 0;
+    init_tab();
+    int code_count = TABSIZE - 256;
+    in_buf_ptr     = in_buf;
+    in_len_rem     = in_len;
+    inflag         = 0;
+
+    // Main decompression loop.
+    int oldcode = get_code();
+    if(oldcode == -1)
+    {
+        *out_len = 0;
+        free(string_tab);
+        free(stack);
+        return 0;
+    }
+    int finchar = string_tab[oldcode].follower;
+
+    size_t out_pos = 0;
+    if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
+
+    int newcode;
+    while((newcode = get_code()) != -1)
+    {
+        int           code = newcode;
+        struct entry *ep   = &string_tab[code];
+
+        // Handle unknown codes and KwKwK case.
+        if(!ep->used)
+        {
+            code = oldcode;
+            ep   = &string_tab[code];
+            PUSH(finchar);
+        }
+        // Decode the string by traversing the table.
+        while(ep->predecessor != NO_PRED)
+        {
+            PUSH(ep->follower);
+            code = ep->predecessor;
+            ep   = &string_tab[code];
+        }
+        PUSH(finchar = ep->follower);
+
+        // Add the new string to the table if there's room.
+        if(code_count)
+        {
+            upd_tab(oldcode, finchar);
+            --code_count;
+        }
+        oldcode = newcode;
+
+        // Write the decoded string to the output buffer.
+        while(sp > 0)
+        {
+            int c = POP();
+            if(c == EMPTY) break;
+            if(out_pos < *out_len) { out_buf[out_pos++] = (unsigned char)c; }
+        }
+    }
+
+    // Clean up and return.
+    *out_len = out_pos;
+    free(string_tab);
+    free(stack);
+    return 0;
+}
+
+// Decompresses crunched data.
+AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                                size_t *out_len)
+{
+    return arc_decompress_crunch_internal(in_buf, in_len, out_buf, out_len, 0);
+}
+
+// Decompresses crunched data with non-repeat packing.
+AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len,
+                                                       unsigned char *out_buf, size_t *out_len)
+{
+    // Allocate a temporary buffer for the intermediate decompressed data.
+    size_t         temp_len = *out_len * 2;  // Heuristic for temp buffer size.
+    unsigned char *temp_buf = malloc(temp_len);
+    if(!temp_buf) return -1;
+
+    // First, decompress the crunched data.
+    int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 0);
+    if(result == 0)
+    {
+        // Then, decompress the non-repeat packing.
+        result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
+    }
+
+    free(temp_buf);
+    return result;
+}
+
+// Decompresses crunched data with non-repeat packing and the new hash function.
+AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len,
+                                                           unsigned char *out_buf, size_t *out_len)
+{
+    // Allocate a temporary buffer.
+    size_t         temp_len = *out_len * 2;  // Heuristic.
+    unsigned char *temp_buf = malloc(temp_len);
+    if(!temp_buf) return -1;
+
+    // Decompress crunched data with the new hash.
+    int result = arc_decompress_crunch_internal(in_buf, in_len, temp_buf, &temp_len, 1);
+    if(result == 0)
+    {
+        // Decompress non-repeat packing.
+        result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
+    }
+
+    free(temp_buf);
+    return result;
+}
--- a/arc/lzw.c
+++ b/arc/lzw.c
@@ -0,0 +1,271 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ * Copyright © 2018-2019 David Ryskalczyk
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include "../library.h"
+
+#define CRBITS     12                // Max bits for crunching.
+#define SQBITS     13                // Max bits for squashing.
+#define INIT_BITS  9                 // Initial number of bits per code.
+#define MAXCODE(n) ((1 << (n)) - 1)  // Macro to calculate max code for n bits.
+#define FIRST      257               // First available code.
+#define CLEAR      256               // Code to clear the dictionary.
+
+// LZW decompression state variables.
+static int             Bits;
+static int             max_maxcode;
+static int             n_bits;
+static int             maxcode;
+static int             clear_flg;
+static int             free_ent;
+static unsigned short *prefix;
+static unsigned char  *suffix;
+static unsigned char  *stack;
+
+// Buffer management variables.
+static const unsigned char *in_buf_ptr;
+static size_t               in_len_rem;
+static int                  offset;
+static char                 buf[SQBITS];
+
+// Reads a variable-length code from the input buffer.
+static int getcode()
+{
+    int            code;
+    static int     size = 0;
+    int            r_off, bits;
+    unsigned char *bp = (unsigned char *)buf;
+
+    // Check if we need to increase code size or handle a clear flag.
+    if(clear_flg > 0 || offset >= size || free_ent > maxcode)
+    {
+        if(free_ent > maxcode)
+        {
+            n_bits++;
+            if(n_bits == Bits)
+                maxcode = max_maxcode;
+            else
+                maxcode = MAXCODE(n_bits);
+        }
+        if(clear_flg > 0)
+        {
+            maxcode   = MAXCODE(n_bits = INIT_BITS);
+            clear_flg = 0;
+        }
+        // Read n_bits bytes into the buffer.
+        for(size = 0; size < n_bits; size++)
+        {
+            if(in_len_rem == 0)
+            {
+                code = -1;
+                break;
+            }
+            code = *in_buf_ptr++;
+            in_len_rem--;
+            buf[size] = (char)code;
+        }
+        if(size <= 0) return -1;  // End of file.
+
+        offset = 0;
+        size   = (size << 3) - (n_bits - 1);
+    }
+    r_off = offset;
+    bits  = n_bits;
+
+    // Extract the code from the buffer.
+    bp += (r_off >> 3);
+    r_off &= 7;
+
+    code = (*bp++ >> r_off);
+    bits -= 8 - r_off;
+    r_off = 8 - r_off;
+
+    if(bits >= 8)
+    {
+        code |= *bp++ << r_off;
+        r_off += 8;
+        bits -= 8;
+    }
+    code |= (*bp & ((1 << bits) - 1)) << r_off;
+    offset += n_bits;
+
+    return code;
+}
+
+// Main LZW decompression logic.
+static int arc_decompress_lzw(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len,
+                              int squash)
+{
+    // Basic validation of pointers.
+    if(!in_buf || !out_buf || !out_len) { return -1; }
+
+    // Initialize buffer pointers and lengths.
+    in_buf_ptr = in_buf;
+    in_len_rem = in_len;
+
+    // Set parameters based on whether we're unsquashing or uncrushing.
+    if(squash) { Bits = SQBITS; }
+    else
+    {
+        Bits = CRBITS;
+        if(in_len_rem > 0)
+        {
+            // Crunch format has a header byte indicating max bits.
+            if(*in_buf_ptr != CRBITS) return -1;
+            in_buf_ptr++;
+            in_len_rem--;
+        }
+    }
+
+    if(in_len_rem <= 0)
+    {
+        *out_len = 0;
+        return 0;
+    }
+
+    // Initialize LZW parameters.
+    max_maxcode = 1 << Bits;
+    clear_flg   = 0;
+    n_bits      = INIT_BITS;
+    maxcode     = MAXCODE(n_bits);
+
+    // Allocate memory for LZW tables.
+    prefix = (unsigned short *)malloc(max_maxcode * sizeof(unsigned short));
+    suffix = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
+    stack  = (unsigned char *)malloc(max_maxcode * sizeof(unsigned char));
+
+    if(!prefix || !suffix || !stack)
+    {
+        if(prefix) free(prefix);
+        if(suffix) free(suffix);
+        if(stack) free(stack);
+        return -1;
+    }
+
+    // Initialize the first 256 entries of the dictionary.
+    memset(prefix, 0, 256 * sizeof(unsigned short));
+    for(int code = 255; code >= 0; code--) { suffix[code] = (unsigned char)code; }
+
+    free_ent = FIRST;
+    offset   = 0;
+
+    // Main decompression loop.
+    int finchar, oldcode, incode;
+    finchar = oldcode = getcode();
+    if(oldcode == -1)
+    {
+        *out_len = 0;
+        free(prefix);
+        free(suffix);
+        free(stack);
+        return 0;
+    }
+
+    size_t out_pos = 0;
+    if(out_pos < *out_len) { out_buf[out_pos++] = finchar; }
+
+    unsigned char *stackp = stack;
+    int            code;
+    while((code = getcode()) > -1)
+    {
+        if(code == CLEAR)
+        {
+            // Clear the dictionary.
+            memset(prefix, 0, 256 * sizeof(unsigned short));
+            clear_flg = 1;
+            free_ent  = FIRST - 1;
+            if((code = getcode()) == -1) break;
+        }
+        incode = code;
+        // Handle KwKwK case.
+        if(code >= free_ent)
+        {
+            if(code > free_ent)
+            {
+                // Error: invalid code.
+                break;
+            }
+            *stackp++ = finchar;
+            code      = oldcode;
+        }
+        // Decode the string by traversing the dictionary.
+        while(code >= 256)
+        {
+            *stackp++ = suffix[code];
+            code      = prefix[code];
+        }
+        *stackp++ = finchar = suffix[code];
+
+        // Write the decoded string to the output buffer.
+        do {
+            if(out_pos < *out_len) { out_buf[out_pos++] = *--stackp; }
+            else
+            {
+                stackp--;  // Discard if output buffer is full.
+            }
+        } while(stackp > stack);
+
+        // Add the new string to the dictionary.
+        if((code = free_ent) < max_maxcode)
+        {
+            prefix[code] = (unsigned short)oldcode;
+            suffix[code] = finchar;
+            free_ent     = code + 1;
+        }
+        oldcode = incode;
+    }
+
+    // Clean up and return.
+    *out_len = out_pos;
+    free(prefix);
+    free(suffix);
+    free(stack);
+    return 0;
+}
+
+// Decompresses squashed data.
+AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                                size_t *out_len)
+{
+    return arc_decompress_lzw(in_buf, in_len, out_buf, out_len, 1);
+}
+
+// Decompresses crunched data.
+AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
+                                                        unsigned char *out_buf, size_t *out_len)
+{
+    // Allocate a temporary buffer.
+    size_t         temp_len = *out_len * 2;  // Heuristic.
+    unsigned char *temp_buf = malloc(temp_len);
+    if(!temp_buf) return -1;
+
+    // Decompress crunched data.
+    int result = arc_decompress_lzw(in_buf, in_len, temp_buf, &temp_len, 0);
+    if(result == 0)
+    {
+        // Decompress non-repeat packing.
+        result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len);
+    }
+
+    free(temp_buf);
+    return result;
+}
--- a/arc/pack.c
+++ b/arc/pack.c
@@ -0,0 +1,78 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ * Copyright © 2018-2019 David Ryskalczyk
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdint.h>
+#include <string.h>
+#include "../library.h"
+
+#define DLE 0x90  // Data Link Escape character, used as a repeat marker.
+
+// Decompresses data using non-repeat packing.
+// This algorithm encodes runs of identical bytes.
+AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
+{
+    // Basic validation of pointers.
+    if(!in_buf || !out_buf || !out_len) { return -1; }
+
+    size_t        in_pos  = 0;
+    size_t        out_pos = 0;
+    unsigned char state   = 0;  // 0 for normal (NOHIST), 1 for in-repeat (INREP).
+    unsigned char lastc   = 0;  // Last character seen.
+
+    // Loop through the input buffer until it's exhausted or the output buffer is full.
+    while(in_pos < in_len && out_pos < *out_len)
+    {
+        if(state == 1)
+        {  // We are in a repeat sequence.
+            if(in_buf[in_pos])
+            {  // The byte after DLE is the repeat count.
+                unsigned char count = in_buf[in_pos];
+                // Write the last character 'count' times.
+                while(--count && out_pos < *out_len) { out_buf[out_pos++] = lastc; }
+            }
+            else
+            {  // A count of 0 means the DLE character itself should be written.
+                if(out_pos < *out_len) { out_buf[out_pos++] = DLE; }
+            }
+            state = 0;  // Return to normal state.
+            in_pos++;
+        }
+        else
+        {  // Normal state.
+            if(in_buf[in_pos] != DLE)
+            {  // Not a repeat sequence.
+                if(out_pos < *out_len)
+                {
+                    // Copy the character and save it as the last character.
+                    out_buf[out_pos++] = lastc = in_buf[in_pos];
+                }
+            }
+            else
+            {               // DLE marks the start of a repeat sequence.
+                state = 1;  // Enter repeat state.
+            }
+            in_pos++;
+        }
+    }
+
+    // Update the output length to the number of bytes written.
+    *out_len = out_pos;
+    // Return success.
+    return 0;
+}
--- a/arc/squeeze.c
+++ b/arc/squeeze.c
@@ -0,0 +1,148 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ * Copyright © 2018-2019 David Ryskalczyk
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include "../library.h"
+
+#define SPEOF   256  // Special end-of-file token.
+#define NUMVALS 257  // Number of values in the Huffman tree (256 chars + SPEOF).
+
+// Node structure for the Huffman decoding tree.
+struct nd
+{
+    int child[2];  // Children of the node.
+};
+
+// Static variables for the decompression state.
+static struct nd nodes[NUMVALS];  // The Huffman tree.
+static int       numnodes;        // Number of nodes in the tree.
+
+static int           bpos;   // Bit position in the current byte.
+static unsigned char curin;  // Current byte being read.
+
+// Pointers for buffer management.
+static const unsigned char *in_buf_ptr;
+static size_t               in_len_rem;
+static unsigned char       *out_buf_ptr;
+static size_t               out_len_rem;
+
+// Reads a byte from the input buffer.
+static int get_byte()
+{
+    if(in_len_rem == 0) { return EOF; }
+    in_len_rem--;
+    return *in_buf_ptr++;
+}
+
+static int arc_decompress_huffman(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf, size_t *out_len)
+{
+    // Basic validation of pointers.
+    if(!in_buf || !out_buf || !out_len) { return -1; }
+
+    // Initialize buffer pointers and lengths.
+    in_buf_ptr  = in_buf;
+    in_len_rem  = in_len;
+    out_buf_ptr = out_buf;
+    out_len_rem = *out_len;
+
+    bpos = 99;  // Force initial read.
+
+    // Read the number of nodes in the Huffman tree.
+    if(in_len_rem < 2) return -1;
+    numnodes = get_byte();
+    numnodes |= get_byte() << 8;
+
+    if(numnodes < 0 || numnodes >= NUMVALS)
+    {
+        return -1;  // Invalid tree.
+    }
+
+    // ARC: initialize for possible empty tree (SPEOF only)
+    nodes[0].child[0] = -(SPEOF + 1);
+    nodes[0].child[1] = -(SPEOF + 1);
+
+    // Read the Huffman tree from the input buffer, sign-extend 16-bit values
+    for(int i = 0; i < numnodes; ++i)
+    {
+        if(in_len_rem < 4) return -1;
+        uint8_t b0        = get_byte();
+        uint8_t b1        = get_byte();
+        uint8_t b2        = get_byte();
+        uint8_t b3        = get_byte();
+        nodes[i].child[0] = (int16_t)((b0) | (b1 << 8));
+        nodes[i].child[1] = (int16_t)((b2) | (b3 << 8));
+    }
+
+    size_t written = 0;
+    // bpos is already 99 from init
+
+    while(written < *out_len)
+    {
+        int i = 0;
+        // follow bit stream in tree to a leaf
+        while(i >= 0)
+        {
+            if(++bpos > 7)
+            {
+                int c = get_byte();
+                if(c == EOF)
+                {
+                    *out_len = written;
+                    return 0;  // End of input
+                }
+                curin = c;
+                bpos  = 0;
+                // move a level deeper in tree
+                i     = nodes[i].child[curin & 1];
+            }
+            else { i = nodes[i].child[1 & (curin >>= 1)]; }
+        }
+
+        // decode fake node index to original data value
+        int value = -(i + 1);
+
+        if(value == SPEOF)
+        {
+            break;  // End of data
+        }
+
+        *out_buf_ptr++ = value;
+        written++;
+    }
+
+    *out_len = written;
+    return 0;
+}
+
+// Decompresses data using Huffman squeezing.
+AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                                 size_t *out_len)
+{
+    size_t         temp_len = *out_len * 2;
+    unsigned char *temp_buf = malloc(temp_len);
+    if(!temp_buf) return -1;
+
+    int result = arc_decompress_huffman(in_buf, in_len, temp_buf, &temp_len);
+    if(result == 0) { result = arc_decompress_pack(temp_buf, temp_len, out_buf, out_len); }
+
+    free(temp_buf);
+    return result;
+}
--- a/build.sh
+++ b/build.sh
@@ -179,7 +179,7 @@ docker/dockcross-win-arm cmake -DCMAKE_BUILD_TYPE=Release -DAARU_BUILD_PACKAGE=1
 sed -e 's/\-soname,libAaru\.Compression\.Native\.so//g' ./CMakeFiles/Aaru.Compression.Native.dir/link.txt > link.txt
 mv link.txt ./CMakeFiles/Aaru.Compression.Native.dir/link.txt
 docker/dockcross-win-arm make Aaru.Compression.Native
-mv libAaru.Compression.Native.so runtimes/win-arm/native/libAaru.Compression.Native.dll
+mv libAaru.Compression.Native.dll runtimes/win-arm/native/

 ## Windows (ARM64)
 # Detected system processor: aarch64
@@ -191,7 +191,7 @@ docker/dockcross-win-arm64 cmake -DCMAKE_BUILD_TYPE=Release -DAARU_BUILD_PACKAGE
 sed -e 's/\-soname,libAaru\.Compression\.Native\.so//g' ./CMakeFiles/Aaru.Compression.Native.dir/link.txt > link.txt
 mv link.txt ./CMakeFiles/Aaru.Compression.Native.dir/link.txt
 docker/dockcross-win-arm64 make Aaru.Compression.Native
-mv libAaru.Compression.Native.so runtimes/win-arm64/native/libAaru.Compression.Native.dll
+mv libAaru.Compression.Native.dll runtimes/win-arm64/native/

 ## Windows (AMD64)
 # Detected system processor: x86_64
--- a/library.h
+++ b/library.h
@@ -118,4 +118,28 @@ AARU_EXPORT int AARU_CALL lh5_decompress(const uint8_t *in_buf, size_t in_len, u

 AARU_EXPORT uint64_t AARU_CALL AARU_get_acn_version();

+// ARC method 3: Stored with non-repeat packing
+AARU_EXPORT int AARU_CALL arc_decompress_pack(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                              size_t *out_len);
+// ARC method 4: Huffman squeezing
+AARU_EXPORT int AARU_CALL arc_decompress_squeeze(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                                 size_t *out_len);
+// Method 5: LZW (crunching)
+AARU_EXPORT int AARU_CALL arc_decompress_crunch(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                                size_t *out_len);
+// Method 6: LZW with non-repeat packing (crunching)
+AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack(const unsigned char *in_buf, size_t in_len,
+                                                       unsigned char *out_buf, size_t *out_len);
+// Method 7: LZW with non-repeat packing and new hash (Crunching)
+AARU_EXPORT int AARU_CALL arc_decompress_crunch_nrpack_new(const unsigned char *in_buf, size_t in_len,
+                                                           unsigned char *out_buf, size_t *out_len);
+
+// Method 8: Dynamic LZW (crunching)
+AARU_EXPORT int AARU_CALL arc_decompress_crunch_dynamic(const unsigned char *in_buf, size_t in_len,
+                                                        unsigned char *out_buf, size_t *out_len);
+
+// Method 9: Dynamic LZW with 13 bits (squashing)
+AARU_EXPORT int AARU_CALL arc_decompress_squash(const unsigned char *in_buf, size_t in_len, unsigned char *out_buf,
+                                                size_t *out_len);
+
 #endif  // AARU_COMPRESSION_NATIVE_LIBRARY_H
--- a/tests/CMakeLists.txt
+++ b/tests/CMakeLists.txt
@@ -45,9 +45,24 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lzd
 file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lh5
     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)

+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcpack.bin
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
+
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsqueeze.bin
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
+
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunchnr.bin
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
+
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arccrunch_dynamic.bin
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
+
+file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/arcsquash.bin
+     DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
+
 # 'Google_Tests_run' is the target name
 # 'test1.cpp tests2.cpp' are source files with tests
 add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp
-               zoo/lzd.cpp
-               lh5.cpp)
+               zoo/lzd.cpp arc/pack.cpp lh5.cpp arc/squeeze.cpp arc/crunch.cpp
+               arc/squash.cpp)
 target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native")
--- a/tests/arc/crunch.cpp
+++ b/tests/arc/crunch.cpp
@@ -0,0 +1,133 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+
+#include "../../library.h"
+#include "../crc32.h"
+#include "gtest/gtest.h"
+
+#define EXPECTED_CRC32 0x66007dba
+
+static const uint8_t *buffer;
+
+class crunchFixture : public ::testing::Test
+{
+public:
+    crunchFixture()
+    {
+        // initialization;
+        // can also be done in SetUp()
+    }
+
+protected:
+    void SetUp()
+    {
+        char path[PATH_MAX];
+        char filename[PATH_MAX];
+
+        getcwd(path, PATH_MAX);
+        snprintf(filename, PATH_MAX, "%s/data/arccrunchnr.bin", path);
+
+        FILE *file = fopen(filename, "rb");
+        buffer     = (const uint8_t *)malloc(72537);
+        fread((void *)buffer, 1, 72537, file);
+        fclose(file);
+    }
+
+    void TearDown() { free((void *)buffer); }
+
+    ~crunchFixture()
+    {
+        // resources cleanup, no exceptions allowed
+    }
+
+    // shared user data
+};
+
+TEST_F(crunchFixture, crunch)
+{
+    size_t destLen = 152089;
+    size_t srcLen  = 72537;
+    auto  *outBuf  = (uint8_t *)malloc(152089);
+
+    auto err = arc_decompress_crunch_nrpack(buffer, srcLen, outBuf, &destLen);
+
+    EXPECT_EQ(err, 0);
+    EXPECT_EQ(destLen, 152089);
+
+    auto crc = crc32_data(outBuf, 152089);
+
+    free(outBuf);
+
+    EXPECT_EQ(crc, EXPECTED_CRC32);
+}
+
+class crunchDynamicFixture : public ::testing::Test
+{
+public:
+    crunchDynamicFixture()
+    {
+        // initialization;
+        // can also be done in SetUp()
+    }
+
+protected:
+    void SetUp()
+    {
+        char path[PATH_MAX];
+        char filename[PATH_MAX];
+
+        getcwd(path, PATH_MAX);
+        snprintf(filename, PATH_MAX, "%s/data/arccrunch_dynamic.bin", path);
+
+        FILE *file = fopen(filename, "rb");
+        buffer     = (const uint8_t *)malloc(73189);
+        fread((void *)buffer, 1, 73189, file);
+        fclose(file);
+    }
+
+    void TearDown() { free((void *)buffer); }
+
+    ~crunchDynamicFixture()
+    {
+        // resources cleanup, no exceptions allowed
+    }
+
+    // shared user data
+};
+
+TEST_F(crunchDynamicFixture, crunchDynamic)
+{
+    size_t destLen = 152089;
+    size_t srcLen  = 73189;
+    auto  *outBuf  = (uint8_t *)malloc(152089);
+
+    auto err = arc_decompress_crunch_dynamic(buffer, srcLen, outBuf, &destLen);
+
+    EXPECT_EQ(err, 0);
+    EXPECT_EQ(destLen, 152089);
+
+    auto crc = crc32_data(outBuf, 152089);
+
+    free(outBuf);
+
+    EXPECT_EQ(crc, EXPECTED_CRC32);
+}
--- a/tests/arc/pack.cpp
+++ b/tests/arc/pack.cpp
@@ -0,0 +1,82 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+
+#include "../../library.h"
+#include "../crc32.h"
+#include "gtest/gtest.h"
+
+#define EXPECTED_CRC32 0x66007dba
+
+static const uint8_t *buffer;
+
+class packFixture : public ::testing::Test
+{
+public:
+    packFixture()
+    {
+        // initialization;
+        // can also be done in SetUp()
+    }
+
+protected:
+    void SetUp()
+    {
+        char path[PATH_MAX];
+        char filename[PATH_MAX];
+
+        getcwd(path, PATH_MAX);
+        snprintf(filename, PATH_MAX, "%s/data/arcpack.bin", path);
+
+        FILE *file = fopen(filename, "rb");
+        buffer     = (const uint8_t *)malloc(149855);
+        fread((void *)buffer, 1, 149855, file);
+        fclose(file);
+    }
+
+    void TearDown() { free((void *)buffer); }
+
+    ~packFixture()
+    {
+        // resources cleanup, no exceptions allowed
+    }
+
+    // shared user data
+};
+
+TEST_F(packFixture, pack)
+{
+    uint8_t params[] = {0x5D, 0x00, 0x00, 0x00, 0x02};
+    size_t  destLen  = 152089;
+    size_t  srcLen   = 149855;
+    auto   *outBuf   = (uint8_t *)malloc(152089);
+
+    auto err = arc_decompress_pack(buffer, srcLen, outBuf, &destLen);
+
+    EXPECT_EQ(err, 0);
+    EXPECT_EQ(destLen, 152089);
+
+    auto crc = crc32_data(outBuf, 152089);
+
+    free(outBuf);
+
+    EXPECT_EQ(crc, EXPECTED_CRC32);
+}
--- a/tests/arc/squash.cpp
+++ b/tests/arc/squash.cpp
@@ -0,0 +1,81 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+
+#include "../../library.h"
+#include "../crc32.h"
+#include "gtest/gtest.h"
+
+#define EXPECTED_CRC32 0x66007dba
+
+static const uint8_t *buffer;
+
+class squashFixture : public ::testing::Test
+{
+public:
+    squashFixture()
+    {
+        // initialization;
+        // can also be done in SetUp()
+    }
+
+protected:
+    void SetUp()
+    {
+        char path[PATH_MAX];
+        char filename[PATH_MAX];
+
+        getcwd(path, PATH_MAX);
+        snprintf(filename, PATH_MAX, "%s/data/arcsquash.bin", path);
+
+        FILE *file = fopen(filename, "rb");
+        buffer     = (const uint8_t *)malloc(67308);
+        fread((void *)buffer, 1, 67308, file);
+        fclose(file);
+    }
+
+    void TearDown() { free((void *)buffer); }
+
+    ~squashFixture()
+    {
+        // resources cleanup, no exceptions allowed
+    }
+
+    // shared user data
+};
+
+TEST_F(squashFixture, squash)
+{
+    size_t destLen = 152089;
+    size_t srcLen  = 67308;
+    auto  *outBuf  = (uint8_t *)malloc(152089);
+
+    auto err = arc_decompress_squash(buffer, srcLen, outBuf, &destLen);
+
+    EXPECT_EQ(err, 0);
+    EXPECT_EQ(destLen, 152089);
+
+    auto crc = crc32_data(outBuf, 152089);
+
+    free(outBuf);
+
+    EXPECT_EQ(crc, EXPECTED_CRC32);
+}
--- a/tests/arc/squeeze.cpp
+++ b/tests/arc/squeeze.cpp
@@ -0,0 +1,81 @@
+/*
+ * This file is part of the Aaru Data Preservation Suite.
+ * Copyright (c) 2019-2025 Natalia Portillo.
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of the
+ * License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this library; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <climits>
+#include <cstddef>
+#include <cstdint>
+
+#include "../../library.h"
+#include "../crc32.h"
+#include "gtest/gtest.h"
+
+#define EXPECTED_CRC32 0x66007dba
+
+static const uint8_t *buffer;
+
+class squeezeFixture : public ::testing::Test
+{
+public:
+    squeezeFixture()
+    {
+        // initialization;
+        // can also be done in SetUp()
+    }
+
+protected:
+    void SetUp()
+    {
+        char path[PATH_MAX];
+        char filename[PATH_MAX];
+
+        getcwd(path, PATH_MAX);
+        snprintf(filename, PATH_MAX, "%s/data/arcsqueeze.bin", path);
+
+        FILE *file = fopen(filename, "rb");
+        buffer     = (const uint8_t *)malloc(88044);
+        fread((void *)buffer, 1, 88044, file);
+        fclose(file);
+    }
+
+    void TearDown() { free((void *)buffer); }
+
+    ~squeezeFixture()
+    {
+        // resources cleanup, no exceptions allowed
+    }
+
+    // shared user data
+};
+
+TEST_F(squeezeFixture, squeeze)
+{
+    size_t  destLen  = 152089;
+    size_t  srcLen   = 88044;
+    auto   *outBuf   = (uint8_t *)malloc(152089);
+
+    auto err = arc_decompress_squeeze(buffer, srcLen, outBuf, &destLen);
+
+    EXPECT_EQ(err, 0);
+    EXPECT_EQ(destLen, 152089);
+
+    auto crc = crc32_data(outBuf, 152089);
+
+    free(outBuf);
+
+    EXPECT_EQ(crc, EXPECTED_CRC32);
+}
--- a/tests/data/arccrunch_dynamic.bin
+++ b/tests/data/arccrunch_dynamic.bin
--- a/tests/data/arccrunchnr.bin
+++ b/tests/data/arccrunchnr.bin
--- a/tests/data/arcpack.bin
+++ b/tests/data/arcpack.bin
--- a/tests/data/arcsquash.bin
+++ b/tests/data/arcsquash.bin
--- a/tests/data/arcsqueeze.bin
+++ b/tests/data/arcsqueeze.bin
--- a/tests/zoo/lzd.cpp
+++ b/tests/zoo/lzd.cpp
@@ -25,7 +25,7 @@
 #include <sys/stat.h>
 #include <unistd.h>

-#include "../library.h"
+#include "../../library.h"
 #include "../crc32.h"
 #include "gtest/gtest.h"

--- a/zoo/decode.c
+++ b/zoo/decode.c
@@ -3,63 +3,96 @@
 /***********************************************************
    decode.c

-Adapted from "ar" archiver written by Haruhiko Okumura.
+    Adapted from Haruhiko Okumura’s “ar” archiver. This
+    version has been modified in 2025 by Natalia Portillo
+    for in-memory decompression.
 ***********************************************************/
-// Modified for in-memory decompression by Natalia Portillo, 2025

-#include <limits.h>
-#include <stdint.h>
+#include <limits.h>  // for UCHAR_MAX
+#include <stdint.h>  // for fixed-width integer types

-#include "ar.h"
-#include "lzh.h"
+#include "ar.h"   // archive format constants
+#include "lzh.h"  // LZH-specific constants (DICSIZ, THRESHOLD, etc.)

-extern int decoded; /* from huf.c */
+extern int decoded;  // flag set by decode_c() when end-of-stream is reached

-static int j; /* remaining bytes to copy */
+static int j;  // number of literal/copy runs remaining from a match

+/*
+ * decode_start()
+ *
+ * Prepare the decoder for a new file:
+ * - Initialize the Huffman bitstream (via huf_decode_start())
+ * - Reset the sliding-window copy counter `j`
+ * - Clear the end-of-data flag `decoded`
+ */
 void decode_start()
 {
-    huf_decode_start();
-    j       = 0;
-    decoded = 0;
+    huf_decode_start();  // reset bit-reader state
+    j       = 0;         // no pending copy runs yet
+    decoded = 0;         // not yet at end-of-stream
 }

 /*
-decodes; returns no. of chars decoded
-*/
-
+ * decode(count, buffer)
+ *
+ * Decode up to `count` bytes (usually DICSIZ) into `buffer[]`.
+ * Returns the actual number of bytes written, or 0 if `decoded` is set.
+ *
+ * Sliding‐window logic:
+ * 1. If `j` > 0, we are in the middle of copying a previous match:
+ *    - Copy one byte from `buffer[i]` into `buffer[r]`
+ *    - Advance `i` (circular within DICSIZ) and `r`
+ *    - Decrement `j` and repeat until `j` = 0 or `r` = count
+ * 2. Otherwise, fetch the next symbol `c = decode_c()`:
+ *    - If `c <= UCHAR_MAX`, it’s a literal byte: emit it directly
+ *    - Else it’s a match:
+ *        • compute `j = match_length = c - (UCHAR_MAX + 1 - THRESHOLD)`
+ *        • compute `i = (r - match_offset - 1) mod DICSIZ`,
+ *          where match_offset = decode_p()
+ *        • enter copy loop from step 1
+ */
 int decode(uint32_t count, uint8_t *buffer)
-/* The calling function must keep the number of
-   bytes to be processed.  This function decodes
-   either 'count' bytes or 'DICSIZ' bytes, whichever
-   is smaller, into the array 'buffer[]' of size
-   'DICSIZ' or more.
-   Call decode_start() once for each new file
-   before calling this function. */
 {
-    static uint32_t i;
-    uint32_t        r, c;
+    static uint32_t i;  // sliding-window read index (circular)
+    uint32_t        r;  // write position in buffer
+    uint32_t        c;  // symbol or match code

    r = 0;
+
+    // Step 1: finish any pending copy from a previous match
    while(--j >= 0)
    {
-        buffer[r] = buffer[i];
-        i         = (i + 1) & (DICSIZ - 1);
-        if(++r == count) return r;
+        buffer[r] = buffer[i];               // copy one byte from history
+        i         = (i + 1) & (DICSIZ - 1);  // wrap index within [0, DICSIZ)
+        if(++r == count)                     // if output buffer is full
+            return r;                        // return bytes written so far
    }
+
+    // Step 2: decode new symbols until end-of-stream or buffer full
    for(;;)
    {
-        c = decode_c();
-        if(decoded) return r;
+        c = decode_c();  // get next Huffman symbol
+        if(decoded)      // end-of-stream marker reached
+            return r;    // no more bytes to decode
+
        if(c <= UCHAR_MAX)
        {
-            buffer[r] = c;
+            // Literal byte: emit it directly
+            buffer[r] = (uint8_t)c;
            if(++r == count) return r;
        }
        else
        {
+            // Match sequence: compute how many bytes to copy
+            // j = match length
            j = c - (UCHAR_MAX + 1 - THRESHOLD);
+
+            // i = start position in sliding window:
+            //    current output position minus offset minus 1, wrapped
            i = (r - decode_p() - 1) & (DICSIZ - 1);
+
+            // Copy `j` bytes from history
            while(--j >= 0)
            {
                buffer[r] = buffer[i];
@@ -68,4 +101,4 @@ int decode(uint32_t count, uint8_t *buffer)
            }
        }
    }
-}
+}
--- a/zoo/huf.c
+++ b/zoo/huf.c
@@ -1,110 +1,148 @@
 /*$Source: /usr/home/dhesi/zoo/RCS/huf.c,v $*/
 /*$Id: huf.c,v 1.9 91/07/09 01:39:55 dhesi Exp $*/
 /***********************************************************
-    huf.c -- static Huffman
+    huf.c -- static Huffman decoding

-Adapted from "ar" archiver written by Haruhiko Okumura.
+  Adapted from Haruhiko Okumura’s “ar” archiver.
+  Modified in 2025 by Natalia Portillo for in-memory I/O.
 ***********************************************************/
-// Modified for in-memory decompression by Natalia Portillo, 2025

-#include <limits.h>
-
-#include "ar.h"
-#include "lzh.h"
+#include <limits.h>  // UCHAR_MAX
+#include "ar.h"      // archive format constants
+#include "lzh.h"     // LZH algorithm constants (NC, DICBIT, CODE_BIT, etc.)

+// NP = number of position codes = DICBIT+1
+// NT = number of tree codes   = CODE_BIT+3
+// PBIT, TBIT = bit‐width to transmit NP/NT in header
 #define NP   (DICBIT + 1)
 #define NT   (CODE_BIT + 3)
-#define PBIT 4 /* smallest integer such that (1U << PBIT) > NP */
-#define TBIT 5 /* smallest integer such that (1U << TBIT) > NT */
+#define PBIT 4 /* smallest bits so (1<<PBIT)>NP */
+#define TBIT 5 /* smallest bits so (1<<TBIT)>NT */
+
+// NPT = max(NP,NT) for prefix‐tree lengths
 #if NT > NP
 #define NPT NT
 #else
 #define NPT NP
 #endif

-static void read_pt_len(int, int, int);
-static void read_c_len();
+// forward declarations of helper routines
+static void read_pt_len(int nn, int nbit, int i_special);
+static void read_c_len(void);

-int decoded; /* for use in decode.c */
+int decoded;  // flag set when end-of-stream block is seen

-uint16_t        left[2 * NC - 1], right[2 * NC - 1];
+// Huffman tree storage arrays
+// left[]/right[] store the binary tree structure for fast decoding
+uint16_t left[2 * NC - 1], right[2 * NC - 1];
+
+// c_len[] = code lengths for literal/length tree (NC symbols)
+// pt_len[] = code lengths for position‐tree / prefix table (NPT symbols)
+// buf     = temporary buffer pointer used during encoding; unused in decode
 static uint8_t *buf, c_len[NC], pt_len[NPT];
+
+// size of buf if used, and remaining symbols in current block
 static uint32_t bufsiz = 0, blocksize;
-static uint16_t c_freq[2 * NC - 1], c_table[4096], c_code[NC], p_freq[2 * NP - 1], pt_table[256], pt_code[NPT],
-    t_freq[2 * NT - 1];

-/***** decoding *****/
+// Frequency, code and decode‐table structures
+static uint16_t c_freq[2 * NC - 1],  // literal/length frequency counts
+    c_table[4096],                   // fast‐lookup table for literal/length decoding
+    c_code[NC],                      // canonical Huffman codes for literals
+    p_freq[2 * NP - 1],              // position frequency counts
+    pt_table[256],                   // prefix‐tree fast lookup (for reading code lengths)
+    pt_code[NPT],                    // canonical codes for prefix‐tree
+    t_freq[2 * NT - 1];              // temporary freq for tree of code‐length codes

+/***** decoding helper: read prefix‐tree code-lengths *****/
 static void read_pt_len(int nn, int nbit, int i_special)
 {
    int      i, c, n;
    uint32_t mask;

+    // 1) read how many code‐lengths to consume
    n = getbits(nbit);
    if(n == 0)
    {
+        // special case: all code‐lengths are identical
        c = getbits(nbit);
-        for(i = 0; i < nn; i++) pt_len[i] = 0;
-        for(i = 0; i < 256; i++) pt_table[i] = c;
+        for(i = 0; i < nn; i++)  // zero out lengths
+            pt_len[i] = 0;
+        for(i = 0; i < 256; i++)  // prefix‐table always returns 'c'
+            pt_table[i] = c;
    }
    else
    {
+        // 2) read code lengths one by one
        i = 0;
        while(i < n)
        {
+            // peek top 3 bits of bitbuf to guess small lengths
            c = bitbuf >> (BITBUFSIZ - 3);
            if(c == 7)
            {
-                mask = (unsigned)1 << (BITBUFSIZ - 1 - 3);
+                // if all three bits are 1, count additional ones
+                mask = 1U << (BITBUFSIZ - 1 - 3);
                while(mask & bitbuf)
                {
-                    mask >>= 1;
                    c++;
+                    mask >>= 1;
                }
            }
-            fillbuf((c < 7) ? 3 : c - 3);
+            // consume the actual length bits
+            fillbuf((c < 7) ? 3 : (c - 3));
            pt_len[i++] = c;
+
+            // at special index, read a small run of zeros
            if(i == i_special)
            {
                c = getbits(2);
-                while(--c >= 0) pt_len[i++] = 0;
+                while(--c >= 0 && i < nn) pt_len[i++] = 0;
            }
        }
+        // any remaining symbols get code‐length zero
        while(i < nn) pt_len[i++] = 0;
+
+        // build fast lookup table from lengths
        make_table(nn, pt_len, 8, pt_table);
    }
 }

-static void read_c_len()
+/***** decoding helper: read literal/length code‐lengths *****/
+static void read_c_len(void)
 {
    int      i, c, n;
    uint32_t mask;

+    // 1) how many literal codes?
    n = getbits(CBIT);
    if(n == 0)
    {
+        // all code‐lengths identical
        c = getbits(CBIT);
        for(i = 0; i < NC; i++) c_len[i] = 0;
        for(i = 0; i < 4096; i++) c_table[i] = c;
    }
    else
    {
+        // 2) read each code length via prefix‐tree
        i = 0;
        while(i < n)
        {
+            // lookup next symbol in prefix‐table
            c = pt_table[bitbuf >> (BITBUFSIZ - 8)];
            if(c >= NT)
            {
-                mask = (unsigned)1 << (BITBUFSIZ - 1 - 8);
+                // if prefix code is non-leaf, walk tree
+                mask = 1U << (BITBUFSIZ - 1 - 8);
                do {
-                    if(bitbuf & mask)
-                        c = right[c];
-                    else
-                        c = left[c];
+                    c = (bitbuf & mask) ? right[c] : left[c];
                    mask >>= 1;
                } while(c >= NT);
            }
+            // consume code‐length bits
            fillbuf(pt_len[c]);
+
+            // c ≤ 2: run-length encoding of zeros
            if(c <= 2)
            {
                if(c == 0)
@@ -113,75 +151,94 @@ static void read_c_len()
                    c = getbits(4) + 3;
                else
                    c = getbits(CBIT) + 20;
-                while(--c >= 0) c_len[i++] = 0;
+                while(--c >= 0 && i < NC) c_len[i++] = 0;
            }
            else
-                c_len[i++] = c - 2;
+            {
+                // real code-length = c−2
+                c_len[i++] = (uint8_t)(c - 2);
+            }
        }
+        // fill rest with zero lengths
        while(i < NC) c_len[i++] = 0;
+
+        // build fast lookup for literal/length codes
        make_table(NC, c_len, 12, c_table);
    }
 }

-uint32_t decode_c()
+/***** decode next literal/length symbol or end-of-block *****/
+uint32_t decode_c(void)
 {
    uint32_t j, mask;

+    // if starting a new block, read its header
    if(blocksize == 0)
    {
-        blocksize = getbits(16);
+        blocksize = getbits(16);  // block size = number of symbols
        if(blocksize == 0)
-        {
-#if 0
-			(void) fprintf(stderr, "block size = 0, decoded\n");  /* debug */
-#endif
+        {  // zero block → end of data
            decoded = 1;
            return 0;
        }
+        // read three Huffman trees for this block:
+        //   1) code-length codes for literal tree  (NT,TBIT,3)
        read_pt_len(NT, TBIT, 3);
+        //   2) literal/length tree lengths         (CBIT)
        read_c_len();
+        //   3) prefix-tree lengths for positions    (NP,PBIT,-1)
        read_pt_len(NP, PBIT, -1);
    }
+
+    // consume one symbol from this block
    blocksize--;
+
+    // fast table lookup: top 12 bits
    j = c_table[bitbuf >> (BITBUFSIZ - 12)];
    if(j >= NC)
    {
-        mask = (unsigned)1 << (BITBUFSIZ - 1 - 12);
+        // need to walk tree if overflow
+        mask = 1U << (BITBUFSIZ - 1 - 12);
        do {
-            if(bitbuf & mask)
-                j = right[j];
-            else
-                j = left[j];
+            j = (bitbuf & mask) ? right[j] : left[j];
            mask >>= 1;
        } while(j >= NC);
    }
+
+    // remove j’s code length bits from bitbuf
    fillbuf(c_len[j]);
    return j;
 }

-uint32_t decode_p()
+/***** decode match-position extra bits *****/
+uint32_t decode_p(void)
 {
    uint32_t j, mask;

+    // fast table lookup: top 8 bits
    j = pt_table[bitbuf >> (BITBUFSIZ - 8)];
    if(j >= NP)
    {
-        mask = (unsigned)1 << (BITBUFSIZ - 1 - 8);
+        // tree walk for long codes
+        mask = 1U << (BITBUFSIZ - 1 - 8);
        do {
-            if(bitbuf & mask)
-                j = right[j];
-            else
-                j = left[j];
+            j = (bitbuf & mask) ? right[j] : left[j];
            mask >>= 1;
        } while(j >= NP);
    }
+
+    // consume prefix bits
    fillbuf(pt_len[j]);
-    if(j != 0) j = ((unsigned)1 << (j - 1)) + getbits((int)(j - 1));
+
+    // if non-zero, read extra bits to form full position
+    if(j != 0) j = (1U << (j - 1)) + getbits((int)(j - 1));
+
    return j;
 }

-void huf_decode_start()
+/***** start a new Huffman decode session *****/
+void huf_decode_start(void)
 {
-    init_getbits();
-    blocksize = 0;
+    init_getbits();  // reset bit buffer & subbitbuf state
+    blocksize = 0;   // force reading a fresh block header
 }
--- a/zoo/io.c
+++ b/zoo/io.c
@@ -3,103 +3,127 @@
 /***********************************************************
    io.c -- input/output (modified for in-memory I/O)

-Adapted from "ar" archiver written by Haruhiko Okumura.
-This version reads compressed bytes from an input buffer
-via mem_getc() and writes output bytes to a buffer via
-mem_putc(), removing all FILE* dependencies for decompression.
+    Adapted from Haruhiko Okumura’s “ar” archiver.
+    This version feeds compressed bytes from a memory buffer
+    (via mem_getc()) and writes decompressed output to a buffer
+    (via mem_putc()), eliminating FILE* dependencies.
+    Modified for in-memory decompression by Natalia Portillo, 2025
 ***********************************************************/
-// Modified for in-memory decompression by Natalia Portillo, 2025

-#include <limits.h>
+#include <limits.h>  // Provides CHAR_BIT for bit-width operations

-#include "ar.h"
-#include "lzh.h"
+#include "ar.h"   // Archive format constants (e.g., CODE_BIT, NC)
+#include "lh5.h"  // Declarations for mem_getc(), mem_putc(), buffer state
+#include "lzh.h"  // LZH algorithm constants (e.g., BITBUFSIZ, DICSIZ)

-#include "lh5.h" /* mem_getc(), mem_putc(), in_ptr/in_left, out_ptr/out_left */
+//-----------------------------------------------------------------------------
+// Global bit-I/O state
+//-----------------------------------------------------------------------------

-uint16_t bitbuf;
-int      unpackable;
-size_t   compsize, origsize;
-uint32_t subbitbuf;
-int      bitcount;
+uint16_t bitbuf;      // Accumulates bits shifted in from the input stream
+int      unpackable;  // Unused in decompression here (was for encode error)
+// Byte counters (optional diagnostics; not used to gate decompression)
+size_t   compsize;   // Count of output bytes produced (for compression mode)
+size_t   origsize;   // Count of input bytes consumed (for CRC in file I/O)
+uint32_t subbitbuf;  // Holds the last byte fetched; bits are consumed from here
+int      bitcount;   // How many valid bits remain in subbitbuf

-/*
- * fillbuf(n) -- shift bitbuf left by n bits and read in n new bits
- * now reads bytes directly from in-memory input buffer
- */
+//-----------------------------------------------------------------------------
+// fillbuf(n)
+//   Shift the global bitbuf left by n bits, then read in n new bits
+//   from the input buffer (in-memory) to replenish bitbuf.
+//-----------------------------------------------------------------------------
 void fillbuf(int n) /* Shift bitbuf n bits left, read n bits */
 {
+    // Make room for n bits
    bitbuf <<= n;
+
+    // While we still need more bits than we have in subbitbuf...
    while(n > bitcount)
    {
+        // Pull any remaining bits from subbitbuf into bitbuf
        bitbuf |= subbitbuf << (n -= bitcount);

-        /* fetch next compressed byte from in_buf */
+        // Fetch the next compressed byte from input memory
        {
-            int c     = mem_getc();
+            int c     = mem_getc();  // read one byte or 0 at EOF
            subbitbuf = (c == EOF ? 0 : (uint8_t)c);
        }

+        // Reset bitcount: a full new byte is available
        bitcount = CHAR_BIT;
    }
+
+    // Finally, consume the last n bits from subbitbuf into bitbuf
    bitbuf |= subbitbuf >> (bitcount -= n);
 }

-/*
- * getbits(n) -- return next n bits from the bit buffer
- */
+//-----------------------------------------------------------------------------
+// getbits(n)
+//   Return the next n bits from bitbuf (highest-order bits), then
+//   call fillbuf(n) to replace them. Useful for reading variable-length codes.
+//-----------------------------------------------------------------------------
 uint32_t getbits(int n)
 {
-    uint32_t x = bitbuf >> (BITBUFSIZ - n);
-    fillbuf(n);
+    uint32_t x = bitbuf >> (BITBUFSIZ - n);  // extract top n bits
+    fillbuf(n);                              // replenish bitbuf for future reads
    return x;
 }

-/*
- * putbits(n,x) -- write the lowest n bits of x to the bit buffer
- * now writes bytes directly to in-memory output buffer
- */
+//-----------------------------------------------------------------------------
+// putbits(n, x)
+//   Write the lowest n bits of x into the output buffer, packing them
+//   into bytes via subbitbuf/bitcount and sending full bytes out
+//   with mem_putc(). Used by the encoder; kept here for completeness.
+//-----------------------------------------------------------------------------
 void putbits(int n, uint32_t x) /* Write rightmost n bits of x */
 {
+    // If we have enough room in subbitbuf, just pack the bits
    if(n < bitcount) { subbitbuf |= x << (bitcount -= n); }
    else
    {
-        /* output first byte */
+        // Output the first full byte when subbitbuf fills
        {
            int w = (int)(subbitbuf | (x >> (n -= bitcount)));
            mem_putc(w);
-            compsize++;
+            compsize++;  // increment output counter (for compression)
        }
+
+        // If remaining bits don't fill a full byte, stash them
        if(n < CHAR_BIT) { subbitbuf = x << (bitcount = CHAR_BIT - n); }
        else
        {
-            /* output second byte */
+            // Otherwise, flush a second full byte
            {
                int w2 = (int)(x >> (n - CHAR_BIT));
                mem_putc(w2);
                compsize++;
            }
+            // And stash any leftover bits beyond two bytes
            subbitbuf = x << (bitcount = 2 * CHAR_BIT - n);
        }
    }
 }

-/*
- * init_getbits -- initialize bit reader state
- */
+//-----------------------------------------------------------------------------
+// init_getbits()
+//   Reset the bit-reader state so that fillbuf() will load fresh bits
+//   from the start of the input buffer.
+//-----------------------------------------------------------------------------
 void init_getbits()
 {
-    bitbuf    = 0;
-    subbitbuf = 0;
-    bitcount  = 0;
-    fillbuf(BITBUFSIZ);
+    bitbuf    = 0;       // clear accumulated bits
+    subbitbuf = 0;       // no pending byte
+    bitcount  = 0;       // no bits available
+    fillbuf(BITBUFSIZ);  // pre-load the bit buffer fully
 }

-/*
- * init_putbits -- initialize bit writer state
- */
+//-----------------------------------------------------------------------------
+// init_putbits()
+//   Reset the bit-writer state so subsequent putbits() calls start fresh.
+//-----------------------------------------------------------------------------
 void init_putbits()
 {
-    bitcount  = CHAR_BIT;
-    subbitbuf = 0;
+    bitcount  = CHAR_BIT;  // subbitbuf is empty but ready for CHAR_BIT bits
+    subbitbuf = 0;         // clear any leftover byte data
 }
--- a/zoo/maketbl.c
+++ b/zoo/maketbl.c
@@ -3,75 +3,128 @@
 /***********************************************************
    maketbl.c -- make table for decoding

-Adapted from "ar" archiver written by Haruhiko Okumura.
+    Builds a fast lookup table + fallback tree for Huffman
+    codes given code lengths.  Used by decode_c() to map
+    input bit patterns to symbols efficiently.
+
+    Adapted from Haruhiko Okumura’s “ar” archiver.
+    Modified for in-memory decompression by Natalia Portillo, 2025
 ***********************************************************/
-// Modified for in-memory decompression by Natalia Portillo, 2025

-#include "ar.h"
-#include "lzh.h"
+#include <stdio.h>
+#include "ar.h"   // provides NC, CODE_BIT, etc.
+#include "lzh.h"  // provides BITBUFSIZ

+/*
+ * make_table(nchar, bitlen, tablebits, table):
+ *
+ * nchar     = number of symbols
+ * bitlen[]  = array of code lengths for each symbol [0..nchar-1]
+ * tablebits = number of bits for fast direct lookup
+ * table[]   = output table of size (1<<tablebits), entries are:
+ *             - symbol index if code length ≤ tablebits
+ *             - zero or tree node index to follow for longer codes
+ *
+ * Algorithm steps:
+ *  1) Count how many codes of each length (count[1..16]).
+ *  2) Compute 'start' offsets for each length in a 16-bit code space.
+ *  3) Normalize starts to 'tablebits' prefix domain, build 'weight'.
+ *  4) Fill direct-mapped entries for short codes.
+ *  5) Build binary tree (using left[]/right[]) for codes longer than tablebits.
+ */
 void make_table(int nchar, uint8_t *bitlen, int tablebits, uint16_t *table)
 {
-    uint16_t count[17], weight[17], start[18], *p;
-    uint32_t i, k, len, ch, jutbits, avail, nextcode, mask;
+    uint16_t  count[17];   // count[L] = number of symbols with length L
+    uint16_t  weight[17];  // weight[L] = step size in prefix domain for length L
+    uint16_t  start[18];   // start[L] = base code for length L in 16-bit space
+    uint16_t *p;           // pointer into 'table' or tree
+    uint32_t  i, k, len, ch;
+    uint32_t  jutbits;   // bits to drop when mapping into tablebits
+    uint32_t  avail;     // next free node index for left[]/right[] tree
+    uint32_t  nextcode;  // end-of-range code for current length
+    uint32_t  mask;      // bitmask for tree insertion

+    // 1) Zero counts, then tally code-lengths
    for(i = 1; i <= 16; i++) count[i] = 0;
-    for(i = 0; i < nchar; i++) count[bitlen[i]]++;
+    for(i = 0; i < (uint32_t)nchar; i++) count[bitlen[i]]++;

+    // 2) Compute cumulative start positions in the 16-bit code space
    start[1] = 0;
    for(i = 1; i <= 16; i++) start[i + 1] = start[i] + (count[i] << (16 - i));
-    if(start[17] != (uint16_t)((unsigned)1 << 16)) fprintf(stderr, "Bad decode table\n");

+    // Validate: sum of all codes must fill 16-bit range
+    if(start[17] != (uint16_t)(1U << 16)) fprintf(stderr, "make_table: Bad decode table\n");
+
+    // Prepare for mapping into tablebits-bit table
    jutbits = 16 - tablebits;
-    for(i = 1; i <= tablebits; i++)
+    for(i = 1; i <= (uint32_t)tablebits; i++)
    {
+        // Shrink start[i] into prefix domain
        start[i] >>= jutbits;
-        weight[i] = (unsigned)1 << (tablebits - i);
-    }
-    while(i <= 16)
-    {
-        weight[i] = (unsigned)1 << (16 - i);
-        i++;
+        // Weight = 2^(tablebits - i)
+        weight[i] = (uint16_t)(1U << (tablebits - i));
    }
+    // For lengths > tablebits, weight = 2^(16 - length)
+    for(; i <= 16; i++) weight[i] = (uint16_t)(1U << (16 - i));

+    // 3) Clear any unused table slots between last short code and end
    i = start[tablebits + 1] >> jutbits;
-    if(i != (uint16_t)((unsigned)1 << 16))
+    if(i != (uint16_t)(1U << tablebits))
    {
-        k = 1 << tablebits;
-        while(i != k) table[i++] = 0;
+        k = 1U << tablebits;
+        while(i < k) table[i++] = 0;
    }

+    // Initialize tree node index after the direct table entries
    avail = nchar;
-    mask  = (unsigned)1 << (15 - tablebits);
-    for(ch = 0; ch < nchar; ch++)
+    // Mask for inspecting bits when building tree
+    mask  = 1U << (15 - tablebits);
+
+    // 4) For each symbol, place its codes in table or tree
+    for(ch = 0; ch < (uint32_t)nchar; ch++)
    {
-        if((len = bitlen[ch]) == 0) continue;
+        len = bitlen[ch];
+        if(len == 0) continue;  // skip symbols with no code
+
+        // Next code range = [start[len], start[len]+weight[len])
        nextcode = start[len] + weight[len];
+
        if(len <= tablebits)
        {
-            for(i = start[len]; i < nextcode; i++) table[i] = ch;
+            // Direct mapping: fill all table slots in this range
+            for(k = start[len]; k < nextcode; k++) table[k] = (uint16_t)ch;
        }
        else
        {
-            k = start[len];
-            p = &table[k >> jutbits];
-            i = len - tablebits;
-            while(i != 0)
+            // Build or extend tree for longer codes
+            // Start at table index for this prefix
+            k              = start[len];
+            p              = &table[k >> jutbits];
+            // Number of extra bits beyond tablebits
+            uint32_t extra = len - tablebits;
+
+            // Walk/construct tree nodes bit by bit
+            while(extra-- > 0)
            {
                if(*p == 0)
                {
-                    right[avail] = left[avail] = 0;
-                    *p                         = avail++;
+                    // allocate a new node for left[]/right[]
+                    left[avail] = right[avail] = 0;
+                    *p                         = (uint16_t)avail++;
                }
+                // branch left or right based on current code bit
                if(k & mask)
                    p = &right[*p];
                else
                    p = &left[*p];
+
+                // shift to next bit in code
                k <<= 1;
-                i--;
            }
-            *p = ch;
+            // At leaf: assign symbol
+            *p = (uint16_t)ch;
        }
+        // Advance start[len] for next code of same length
        start[len] = nextcode;
    }
 }
Author	SHA1	Message	Date
Natalia Portillo	d188ebe02e	Update build script.	2025-09-02 11:31:54 +01:00
Natalia Portillo	e97dd11da5	Fix missing includes.	2025-09-02 11:31:26 +01:00
Natalia Portillo	27e2baf54a	Bump version to 6.0.0-alpha.11.3.	2025-09-02 10:20:30 +01:00
Natalia Portillo	1323bba72f	Add ARC methods 8 (Crunch) and 9 (squash), 12-bit and 13-bit Dynamic LZW.	2025-09-02 10:19:51 +01:00
Natalia Portillo	a39e6abb97	Add ARC methods 5, 6 and 7, LZW with different hash tables, with and without non-repeat packing, aka crunch.	2025-09-02 03:52:35 +01:00
Natalia Portillo	afc6f3e2bc	Add ARC method 4 Huffman squeezing decompression implementation and tests	2025-09-02 03:06:31 +01:00
Natalia Portillo	bfb9a6b524	Add ARC non-repeat packing decompression implementation and tests.	2025-09-01 23:30:57 +01:00
Natalia Portillo	750df1cca9	Added comments from Copilot.	2025-08-26 02:06:19 +01:00