Implement LZD from ZOO (method 1), heavily modified to support .NET straming mechanisms.

This commit is contained in:
2025-08-23 22:07:11 +01:00
parent 17d4446fb1
commit 6637fb486f
6 changed files with 574 additions and 38 deletions

View File

@@ -128,7 +128,9 @@ endif()
add_subdirectory(3rdparty) add_subdirectory(3rdparty)
add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h adc.c adc.h lzip.c flac.c flac.h) add_library("Aaru.Compression.Native" SHARED library.c apple_rle.c apple_rle.h adc.c adc.h lzip.c flac.c flac.h
zoo/lzd.c
zoo/lzd.h)
include(3rdparty/bzip2.cmake) include(3rdparty/bzip2.cmake)
include(3rdparty/flac.cmake) include(3rdparty/flac.cmake)

129
library.h
View File

@@ -54,55 +54,112 @@
#define FORCE_INLINE static inline __attribute__((always_inline)) #define FORCE_INLINE static inline __attribute__((always_inline))
#endif #endif
AARU_EXPORT int32_t AARU_CALL AARU_adc_decode_buffer(uint8_t *dst_buffer, int32_t dst_size, const uint8_t *src_buffer, AARU_EXPORT int32_t AARU_CALL AARU_adc_decode_buffer(uint8_t * dst_buffer,
int32_t src_size); int32_t dst_size,
const uint8_t *src_buffer,
int32_t src_size);
AARU_EXPORT int32_t AARU_CALL AARU_apple_rle_decode_buffer(uint8_t *dst_buffer, int32_t dst_size, AARU_EXPORT int32_t AARU_CALL AARU_apple_rle_decode_buffer(uint8_t * dst_buffer,
const uint8_t *src_buffer, int32_t src_size); int32_t dst_size,
const uint8_t *src_buffer,
int32_t src_size);
AARU_EXPORT size_t AARU_CALL AARU_flac_decode_redbook_buffer(uint8_t *dst_buffer, size_t dst_size, AARU_EXPORT size_t AARU_CALL AARU_flac_decode_redbook_buffer(uint8_t * dst_buffer,
const uint8_t *src_buffer, size_t src_size); size_t dst_size,
const uint8_t *src_buffer,
size_t src_size);
AARU_EXPORT size_t AARU_CALL AARU_flac_encode_redbook_buffer( AARU_EXPORT size_t AARU_CALL AARU_flac_encode_redbook_buffer(uint8_t * dst_buffer,
uint8_t *dst_buffer, size_t dst_size, const uint8_t *src_buffer, size_t src_size, uint32_t blocksize, size_t dst_size,
int32_t do_mid_side_stereo, int32_t loose_mid_side_stereo, const char *apodization, uint32_t max_lpc_order, const uint8_t *src_buffer,
uint32_t qlp_coeff_precision, int32_t do_qlp_coeff_prec_search, int32_t do_exhaustive_model_search, size_t src_size,
uint32_t min_residual_partition_order, uint32_t max_residual_partition_order, const char *application_id, uint32_t blocksize,
uint32_t application_id_len); int32_t do_mid_side_stereo,
int32_t loose_mid_side_stereo,
const char * apodization,
uint32_t max_lpc_order,
uint32_t qlp_coeff_precision,
int32_t do_qlp_coeff_prec_search,
int32_t do_exhaustive_model_search,
uint32_t min_residual_partition_order,
uint32_t max_residual_partition_order,
const char * application_id,
uint32_t application_id_len);
AARU_EXPORT int32_t AARU_CALL AARU_lzip_decode_buffer(uint8_t *dst_buffer, int32_t dst_size, const uint8_t *src_buffer, AARU_EXPORT int32_t AARU_CALL AARU_lzip_decode_buffer(uint8_t * dst_buffer,
int32_t src_size); int32_t dst_size,
const uint8_t *src_buffer,
int32_t src_size);
AARU_EXPORT int32_t AARU_CALL AARU_lzip_encode_buffer(uint8_t *dst_buffer, int32_t dst_size, const uint8_t *src_buffer, AARU_EXPORT int32_t AARU_CALL AARU_lzip_encode_buffer(uint8_t * dst_buffer,
int32_t src_size, int32_t dictionary_size, int32_t dst_size,
int32_t match_len_limit); const uint8_t *src_buffer,
int32_t src_size,
int32_t dictionary_size,
int32_t match_len_limit);
AARU_EXPORT int32_t AARU_CALL AARU_bzip2_decode_buffer(uint8_t *dst_buffer, uint32_t *dst_size, AARU_EXPORT int32_t AARU_CALL AARU_bzip2_decode_buffer(uint8_t * dst_buffer,
const uint8_t *src_buffer, uint32_t src_size); uint32_t * dst_size,
const uint8_t *src_buffer,
uint32_t src_size);
AARU_EXPORT int32_t AARU_CALL AARU_bzip2_encode_buffer(uint8_t *dst_buffer, uint32_t *dst_size, AARU_EXPORT int32_t AARU_CALL AARU_bzip2_encode_buffer(uint8_t * dst_buffer,
const uint8_t *src_buffer, uint32_t src_size, uint32_t * dst_size,
int32_t blockSize100k); const uint8_t *src_buffer,
uint32_t src_size,
int32_t blockSize100k);
AARU_EXPORT size_t AARU_CALL AARU_lzfse_decode_buffer(uint8_t *dst_buffer, size_t dst_size, const uint8_t *src_buffer, AARU_EXPORT size_t AARU_CALL AARU_lzfse_decode_buffer(uint8_t * dst_buffer,
size_t src_size, void *scratch_buffer); size_t dst_size,
const uint8_t *src_buffer,
size_t src_size,
void * scratch_buffer);
AARU_EXPORT size_t AARU_CALL AARU_lzfse_encode_buffer(uint8_t *dst_buffer, size_t dst_size, const uint8_t *src_buffer, AARU_EXPORT size_t AARU_CALL AARU_lzfse_encode_buffer(uint8_t * dst_buffer,
size_t src_size, void *scratch_buffer); size_t dst_size,
const uint8_t *src_buffer,
size_t src_size,
void * scratch_buffer);
AARU_EXPORT int32_t AARU_CALL AARU_lzma_decode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer, AARU_EXPORT int32_t AARU_CALL AARU_lzma_decode_buffer(uint8_t * dst_buffer,
size_t *src_size, const uint8_t *props, size_t propsSize); size_t * dst_size,
const uint8_t *src_buffer,
size_t * src_size,
const uint8_t *props,
size_t propsSize);
AARU_EXPORT int32_t AARU_CALL AARU_lzma_encode_buffer(uint8_t *dst_buffer, size_t *dst_size, const uint8_t *src_buffer, AARU_EXPORT int32_t AARU_CALL AARU_lzma_encode_buffer(uint8_t * dst_buffer,
size_t src_size, uint8_t *outProps, size_t *outPropsSize, size_t * dst_size,
int32_t level, uint32_t dictSize, int32_t lc, int32_t lp, const uint8_t *src_buffer,
int32_t pb, int32_t fb, int32_t numThreads); size_t src_size,
uint8_t * outProps,
size_t * outPropsSize,
int32_t level,
uint32_t dictSize,
int32_t lc,
int32_t lp,
int32_t pb,
int32_t fb,
int32_t numThreads);
AARU_EXPORT size_t AARU_CALL AARU_zstd_decode_buffer(void *dst_buffer, size_t dst_size, const void *src_buffer, AARU_EXPORT size_t AARU_CALL AARU_zstd_decode_buffer(void * dst_buffer,
size_t src_size); size_t dst_size,
const void *src_buffer,
size_t src_size);
AARU_EXPORT size_t AARU_CALL AARU_zstd_encode_buffer(void *dst_buffer, size_t dst_size, const void *src_buffer, AARU_EXPORT size_t AARU_CALL AARU_zstd_encode_buffer(void * dst_buffer,
size_t src_size, int32_t compressionLevel); size_t dst_size,
const void *src_buffer,
size_t src_size,
int32_t compressionLevel);
AARU_EXPORT void * AARU_CALL CreateLZDContext(void);
AARU_EXPORT void AARU_CALL DestroyLZDContext(void *ctx);
AARU_EXPORT int AARU_CALL LZD_FeedNative(void *ctx, const unsigned char *data, size_t length);
AARU_EXPORT int AARU_CALL LZD_DrainNative(void *ctx, unsigned char *outBuf, size_t outBufLen, size_t *produced);
#define AARU_CHECKUMS_NATIVE_VERSION 0x06000089 #define AARU_CHECKUMS_NATIVE_VERSION 0x06000089

View File

@@ -39,7 +39,11 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/audio.bin
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/data.bin file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/data.bin
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/) DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/alice29.lzd
DESTINATION ${CMAKE_CURRENT_BINARY_DIR}/data/)
# 'Google_Tests_run' is the target name # 'Google_Tests_run' is the target name
# 'test1.cpp tests2.cpp' are source files with tests # 'test1.cpp tests2.cpp' are source files with tests
add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp) add_executable(tests_run apple_rle.cpp crc32.c crc32.h adc.cpp bzip2.cpp lzip.cpp lzfse.cpp zstd.cpp lzma.cpp flac.cpp
zoo/lzd.cpp)
target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native") target_link_libraries(tests_run gtest gtest_main "Aaru.Compression.Native")

182
tests/zoo/lzd.cpp Normal file
View File

@@ -0,0 +1,182 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#include <climits>
#include <cstddef>
#include <cstdint>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <sys/stat.h>
#include <unistd.h>
#include "../library.h"
#include "../crc32.h"
#include "gtest/gtest.h"
#define OUTPUT_CHUNK 8192
#define EXPECTED_SIZE 152089
#define EXPECTED_CRC32_VAL 0x66007dba
static long long filesize_of(const char* path)
{
struct stat st;
if(stat(path, &st) == 0) return (long long)st.st_size;
return -1;
}
class lzdFixture : public ::testing::Test
{
protected:
void SetUp() override
{
char cwd[PATH_MAX];
char filename[PATH_MAX];
getcwd(cwd, PATH_MAX);
snprintf(filename, PATH_MAX, "%s/data/alice29.lzd", cwd);
inFile = fopen(filename, "rb");
ASSERT_NE(inFile, nullptr) << "Failed to open input file";
fileSize = filesize_of(filename);
}
void TearDown() override
{
if(inFile) fclose(inFile);
}
FILE* inFile = nullptr;
long long fileSize = 0;
};
typedef enum {
LZD_OK = 0,
LZD_NEED_INPUT = 1,
LZD_NEED_OUTPUT = 2,
LZD_DONE = 3
} LZDStatus;
TEST_F(lzdFixture, ZooMethod1)
{
unsigned char inbuf[OUTPUT_CHUNK];
unsigned char outbuf[OUTPUT_CHUNK];
bool flushed = false;
bool eof = false;
size_t total_out = 0;
size_t total_in = 0;
size_t iter = 0;
fprintf(stderr, "INPUT FILE SIZE=%lld bytes\n", fileSize);
void* ctx = CreateLZDContext();
ASSERT_NE(ctx, nullptr) << "Failed to create LZD context";
// allocate buffer for full output to CRC at the end
uint8_t* full_out = (uint8_t*)malloc(EXPECTED_SIZE);
ASSERT_NE(full_out, nullptr);
size_t full_offset = 0;
while(!eof)
{
size_t nread = fread(inbuf, 1, sizeof inbuf, inFile);
if(nread == 0)
{
fprintf(stderr, "[FEED] size=0 flushed=0 (final empty feed)\n");
LZD_FeedNative(ctx, nullptr, 0);
flushed = true;
}
else
{
total_in += nread;
fprintf(stderr, "[FEED] size=%zu flushed=0 (real data) total_in=%zu\n",
nread, total_in);
LZD_FeedNative(ctx, inbuf, nread);
}
size_t produced = 0;
int st = LZD_OK;
do
{
produced = 0;
st = LZD_DrainNative(ctx, outbuf, sizeof outbuf, &produced);
fprintf(stderr, "-- LOOP iter=%zu total_out=%zu --\n", iter++, total_out);
fprintf(stderr, "[DRAIN] produced=%zu status=%d flushed=%d eof=%d\n",
produced, st, flushed ? 1 : 0, eof ? 1 : 0);
if(produced > 0)
{
memcpy(full_out + full_offset, outbuf, produced);
full_offset += produced;
total_out += produced;
}
if(st == LZD_DONE)
{
fprintf(stderr, ">>> SET eof=1 (DONE from decoder)\n");
eof = true;
}
} while(produced > 0);
if(flushed)
{
for(int spins = 0; spins < 8 && !eof; spins++)
{
size_t more = 0;
int st2 = LZD_DrainNative(ctx, outbuf, sizeof outbuf, &more);
fprintf(stderr, "-- LOOP iter=%zu total_out=%zu --\n", iter++, total_out);
fprintf(stderr, "[DRAIN] produced=%zu status=%d flushed=1 eof=%d\n",
more, st2, eof ? 1 : 0);
if(more > 0)
{
memcpy(full_out + full_offset, outbuf, more);
full_offset += more;
total_out += more;
}
else if(st2 == LZD_DONE)
{
fprintf(stderr, ">>> SET eof=1 (DONE after flush)\n");
eof = true;
}
else
{
break;
}
}
if(!eof)
{
fprintf(stderr, ">>> SET eof=1 (no more data and already flushed)\n");
eof = true;
}
}
}
fprintf(stderr, "\nTOTAL IN=%zu bytes\n", total_in);
fprintf(stderr, "TOTAL OUT=%zu bytes\n", total_out);
DestroyLZDContext(ctx);
// Now verify the decompressed size and CRC
EXPECT_EQ(total_out, static_cast<size_t>(EXPECTED_SIZE));
uint32_t crc = crc32_data(full_out, total_out);
free(full_out);
EXPECT_EQ(crc, static_cast<uint32_t>(EXPECTED_CRC32_VAL));
EXPECT_EQ(total_in, static_cast<size_t>(fileSize));
}

205
zoo/lzd.c Normal file
View File

@@ -0,0 +1,205 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
// Lempel-Ziv-Davis compression implementation based on the public domain code from
// Rahul Dhesi from zoo
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include "lzd.h"
#include "../library.h"
static void init_dict(LZDContext *ctx)
{
ctx->nbits = 9;
ctx->max_code = 1u << 9;
ctx->free_code = FIRST_FREE;
ctx->have_old = 0;
}
static int firstch(LZDContext *ctx, int code)
{
int steps = 0;
while(code > 255)
{
if((unsigned)code > MAXMAX) return -1;
if(++steps > (int)MAXMAX) return -1;
code = ctx->head[code];
}
return code;
}
static int fill_bits(LZDContext *ctx)
{
while(ctx->bitcount < (int)ctx->nbits)
{
if(ctx->in_pos >= ctx->in_len) return -1;
ctx->bitbuf |= (uint64_t)ctx->in_ptr[ctx->in_pos++] << ctx->bitcount;
ctx->bitcount += 8;
}
return 0;
}
static int read_code(LZDContext *ctx)
{
if(fill_bits(ctx) < 0) return -1;
int code = (int)(ctx->bitbuf & masks[ctx->nbits]);
ctx->bitbuf >>= ctx->nbits;
ctx->bitcount -= ctx->nbits;
return code;
}
LZDStatus LZD_Init(LZDContext *ctx)
{
memset(ctx, 0, sizeof *ctx);
ctx->head = malloc((MAXMAX + 1) * sizeof *ctx->head);
ctx->tail = malloc((MAXMAX + 1) * sizeof *ctx->tail);
ctx->stack = malloc((MAXMAX + 1) * sizeof *ctx->stack);
if(!ctx->head || !ctx->tail || !ctx->stack) return LZD_NEED_INPUT;
for(int i = 0; i < 256; i++)
{
ctx->head[i] = -1;
ctx->tail[i] = (uint8_t)i;
}
ctx->stack_lim = ctx->stack + (MAXMAX + 1);
ctx->stack_ptr = ctx->stack_lim;
init_dict(ctx);
ctx->bitbuf = 0;
ctx->bitcount = 0;
return LZD_OK;
}
LZDStatus LZD_Feed(LZDContext *ctx, const unsigned char *in, size_t in_len)
{
ctx->in_ptr = in;
ctx->in_len = in_len;
ctx->in_pos = 0;
return LZD_OK;
}
LZDStatus LZD_Drain(LZDContext *ctx, unsigned char *out, size_t out_len, size_t *out_produced)
{
size_t outpos = 0;
while(outpos < out_len)
{
if(ctx->stack_ptr < ctx->stack_lim)
{
out[outpos++] = (uint8_t)*ctx->stack_ptr++;
continue;
}
int raw = read_code(ctx);
if(raw < 0)
{
*out_produced = outpos;
return outpos > 0 ? LZD_OK : LZD_NEED_INPUT;
}
unsigned code = (unsigned)raw;
if(code == CLEAR)
{
init_dict(ctx);
int lit = read_code(ctx);
if(lit < 0)
{
*out_produced = outpos;
return outpos > 0 ? LZD_OK : LZD_NEED_INPUT;
}
ctx->old_code = (unsigned)lit;
ctx->have_old = 1;
out[outpos++] = (uint8_t)lit;
continue;
}
if(code == Z_EOF)
{
*out_produced = outpos;
return LZD_DONE;
}
unsigned in_code = code;
if(code >= ctx->free_code)
{
if(!ctx->have_old) return LZD_DONE;
int fc = firstch(ctx, ctx->old_code);
if(fc < 0) return LZD_DONE;
*--ctx->stack_ptr = (char)fc;
code = ctx->old_code;
}
while(code > 255)
{
*--ctx->stack_ptr = (char)ctx->tail[code];
code = ctx->head[code];
}
uint8_t first_byte = (uint8_t)code;
*--ctx->stack_ptr = (char)first_byte;
if(ctx->have_old && ctx->free_code <= MAXMAX)
{
ctx->tail[ctx->free_code] = first_byte;
ctx->head[ctx->free_code] = (int)ctx->old_code;
ctx->free_code++;
if(ctx->free_code >= ctx->max_code && ctx->nbits < MAXBITS)
{
ctx->nbits++;
ctx->max_code <<= 1;
}
}
ctx->old_code = in_code;
ctx->have_old = 1;
}
*out_produced = outpos;
return LZD_OK;
}
AARU_EXPORT void LZD_Destroy(LZDContext *ctx)
{
if(!ctx) return;
free(ctx->head);
free(ctx->tail);
free(ctx->stack);
}
AARU_EXPORT void *CreateLZDContext(void)
{
LZDContext *c = malloc(sizeof *c);
return c && LZD_Init(c) == LZD_OK ? c : (free(c), NULL);
}
AARU_EXPORT void DestroyLZDContext(void *ctx)
{
if(ctx)
{
LZD_Destroy(ctx);
free(ctx);
}
}
AARU_EXPORT int LZD_FeedNative(void *ctx, const unsigned char *data, size_t length)
{
return (int)LZD_Feed(ctx, data, length);
}
AARU_EXPORT int LZD_DrainNative(void *ctx, unsigned char *outBuf, size_t outBufLen, size_t *produced)
{
return (int)LZD_Drain(ctx, outBuf, outBufLen, produced);
}

86
zoo/lzd.h Normal file
View File

@@ -0,0 +1,86 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* This library is free software; you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of the
* License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
*/
#ifndef LZD_H
#define LZD_H
#include <stddef.h>
#include <stdint.h>
#define MAXBITS 13
#define MAXMAX ((1U << MAXBITS) - 1) // 8191
#define FIRST_FREE 258 // first free code after CLEAR+EOF
#define CLEAR 256
#define Z_EOF 257
typedef enum
{
LZD_OK = 0,
LZD_NEED_INPUT = 1,
LZD_NEED_OUTPUT = 2,
LZD_DONE = 3
} LZDStatus;
typedef struct
{
int * head;
uint8_t *tail;
unsigned nbits;
unsigned max_code;
unsigned free_code;
unsigned old_code;
int have_old;
char *stack;
char *stack_lim;
char *stack_ptr;
uint64_t bitbuf;
int bitcount;
const unsigned char *in_ptr;
size_t in_len;
size_t in_pos;
} LZDContext;
static const unsigned masks[MAXBITS + 1] = {
0,
0,
0,
0,
0,
0,
0,
0,
0,
(1u << 9) - 1,
(1u << 10) - 1,
(1u << 11) - 1,
(1u << 12) - 1,
(1u << 13) - 1
};
LZDStatus LZD_Init(LZDContext *ctx);
LZDStatus LZD_Feed(LZDContext *ctx, const unsigned char *in, size_t in_len);
LZDStatus LZD_Drain(LZDContext *ctx, unsigned char *out, size_t out_len, size_t *out_produced);
void LZD_Destroy(LZDContext *ctx);
#endif // LZD_H