Add MD5 implementation and integrate with existing codebase

This commit is contained in:
2025-10-02 22:16:02 +01:00
parent 1c0c9cbf71
commit 8f9abf7288
6 changed files with 400 additions and 2 deletions

View File

@@ -125,7 +125,9 @@ add_library(aaruformat SHARED include/aaruformat/consts.h include/aaruformat/enu
src/write.c src/write.c
include/log.h include/log.h
src/ddt/hash_map.c src/ddt/hash_map.c
include/aaruformat/hash_map.h) include/aaruformat/hash_map.h
src/md5.c
include/md5.h)
include_directories(include include/aaruformat) include_directories(include include/aaruformat)

View File

@@ -19,6 +19,8 @@
#ifndef LIBAARUFORMAT_DECLS_H #ifndef LIBAARUFORMAT_DECLS_H
#define LIBAARUFORMAT_DECLS_H #define LIBAARUFORMAT_DECLS_H
#include "crc64.h"
#include "md5.h"
#include "simd.h" #include "simd.h"
#include "spamsum.h" #include "spamsum.h"
#ifdef __cplusplus #ifdef __cplusplus
@@ -156,6 +158,11 @@ AARU_EXPORT int32_t AARU_CALL aaruf_lzma_encode_buffer(uint8_t *dst_buffer, size
int32_t level, uint32_t dict_size, int32_t lc, int32_t lp, int32_t level, uint32_t dict_size, int32_t lc, int32_t lp,
int32_t pb, int32_t fb, int32_t num_threads); int32_t pb, int32_t fb, int32_t num_threads);
AARU_EXPORT void AARU_CALL aaruf_md5_init(md5_ctx *ctx);
AARU_EXPORT void AARU_CALL aaruf_md5_update(md5_ctx *ctx, const void *data, unsigned long size);
AARU_EXPORT void AARU_CALL aaruf_md5_final(md5_ctx *ctx, unsigned char *result);
AARU_EXPORT void AARU_CALL aaruf_md5_buffer(const void *data, unsigned long size, unsigned char *result);
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ #if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)

39
include/md5.h Normal file
View File

@@ -0,0 +1,39 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* See md5.c for more information.
*/
#ifndef LIBAARUFORMAT_MD5_H
#define LIBAARUFORMAT_MD5_H
#include <stdint.h>
typedef struct
{
uint32_t lo, hi;
uint32_t a, b, c, d;
unsigned char buffer[64];
uint32_t block[16];
} md5_ctx;
#endif /* LIBAARUFORMAT_MD5_H */

292
src/md5.c Normal file
View File

@@ -0,0 +1,292 @@
/*
* This is an OpenSSL-compatible implementation of the RSA Data Security, Inc.
* MD5 Message-Digest Algorithm (RFC 1321).
*
* Homepage:
* http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5
*
* Author:
* Alexander Peslyak, better known as Solar Designer <solar at openwall.com>
*
* This software was written by Alexander Peslyak in 2001. No copyright is
* claimed, and the software is hereby placed in the public domain.
* In case this attempt to disclaim copyright and place the software in the
* public domain is deemed null and void, then the software is
* Copyright (c) 2001 Alexander Peslyak and it is hereby released to the
* general public under the following terms:
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted.
*
* There's ABSOLUTELY NO WARRANTY, express or implied.
*
* (This is a heavily cut-down "BSD license".)
*
* This differs from Colin Plumb's older public domain implementation in that
* no exactly 32-bit integer data type is required (any 32-bit or wider
* unsigned integer data type will do), there's no compile-time endianness
* configuration, and the function prototypes match OpenSSL's. No code from
* Colin Plumb's implementation has been reused; this comment merely compares
* the properties of the two independent implementations.
*
* The primary goals of this implementation are portability and ease of use.
* It is meant to be fast, but not as fast as possible. Some known
* optimizations are not included to reduce source code size and avoid
* compile-time configuration.
*/
#include <stdbool.h>
#include <stdio.h>
#include <string.h>
#include "decls.h"
#include "md5.h"
/*
* The basic MD5 functions.
*
* F and G are optimized compared to their RFC 1321 definitions for
* architectures that lack an AND-NOT instruction, just like in Colin Plumb's
* implementation.
*/
#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z))))
#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y))))
#define H(x, y, z) (((x) ^ (y)) ^ (z))
#define H2(x, y, z) ((x) ^ ((y) ^ (z)))
#define I(x, y, z) ((y) ^ ((x) | ~(z)))
/*
* The MD5 transformation for all four rounds.
*/
#define STEP(f, a, b, c, d, x, t, s) \
(a) += f((b), (c), (d)) + (x) + (t); \
(a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \
(a) += (b);
/*
* SET reads 4 input bytes in little-endian byte order and stores them in a
* properly aligned word in host byte order.
*
* The check for little-endian architectures that tolerate unaligned memory
* accesses is just an optimization. Nothing will break if it fails to detect
* a suitable architecture.
*
* Unfortunately, this optimization may be a C strict aliasing rules violation
* if the caller's data buffer has effective type that cannot be aliased by
* uint32_t. In practice, this problem may occur if these MD5 routines are
* inlined into a calling function, or with future and dangerously advanced
* link-time optimizations. For the time being, keeping these MD5 routines in
* their own translation unit avoids the problem.
*/
#if defined(__i386__) || defined(__x86_64__) || defined(__vax__)
#define SET(n) (*(uint32_t *)&ptr[(n) * 4])
#define GET(n) SET(n)
#else
#define SET(n) \
(ctx->block[(n)] = (uint32_t)ptr[(n) * 4] | ((uint32_t)ptr[(n) * 4 + 1] << 8) | \
((uint32_t)ptr[(n) * 4 + 2] << 16) | ((uint32_t)ptr[(n) * 4 + 3] << 24))
#define GET(n) (ctx->block[(n)])
#endif
/*
* This processes one or more 64-byte data blocks, but does NOT update the bit
* counters. There are no alignment requirements.
*/
static const void *body(md5_ctx *ctx, const void *data, unsigned long size)
{
const unsigned char *ptr;
uint32_t a, b, c, d;
uint32_t saved_a, saved_b, saved_c, saved_d;
ptr = (const unsigned char *)data;
a = ctx->a;
b = ctx->b;
c = ctx->c;
d = ctx->d;
do {
saved_a = a;
saved_b = b;
saved_c = c;
saved_d = d;
/* Round 1 */
STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7)
STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12)
STEP(F, c, d, a, b, SET(2), 0x242070db, 17)
STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22)
STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7)
STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12)
STEP(F, c, d, a, b, SET(6), 0xa8304613, 17)
STEP(F, b, c, d, a, SET(7), 0xfd469501, 22)
STEP(F, a, b, c, d, SET(8), 0x698098d8, 7)
STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12)
STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17)
STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22)
STEP(F, a, b, c, d, SET(12), 0x6b901122, 7)
STEP(F, d, a, b, c, SET(13), 0xfd987193, 12)
STEP(F, c, d, a, b, SET(14), 0xa679438e, 17)
STEP(F, b, c, d, a, SET(15), 0x49b40821, 22)
/* Round 2 */
STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5)
STEP(G, d, a, b, c, GET(6), 0xc040b340, 9)
STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14)
STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20)
STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5)
STEP(G, d, a, b, c, GET(10), 0x02441453, 9)
STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14)
STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20)
STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5)
STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9)
STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14)
STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20)
STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5)
STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9)
STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14)
STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20)
/* Round 3 */
STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4)
STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11)
STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16)
STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23)
STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4)
STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11)
STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16)
STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23)
STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4)
STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11)
STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16)
STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23)
STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4)
STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11)
STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16)
STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23)
/* Round 4 */
STEP(I, a, b, c, d, GET(0), 0xf4292244, 6)
STEP(I, d, a, b, c, GET(7), 0x432aff97, 10)
STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15)
STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21)
STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6)
STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10)
STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15)
STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21)
STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6)
STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10)
STEP(I, c, d, a, b, GET(6), 0xa3014314, 15)
STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21)
STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6)
STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10)
STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15)
STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21)
a += saved_a;
b += saved_b;
c += saved_c;
d += saved_d;
ptr += 64;
} while(size -= 64);
ctx->a = a;
ctx->b = b;
ctx->c = c;
ctx->d = d;
return ptr;
}
AARU_EXPORT void AARU_CALL aaruf_md5_init(md5_ctx *ctx)
{
ctx->a = 0x67452301;
ctx->b = 0xefcdab89;
ctx->c = 0x98badcfe;
ctx->d = 0x10325476;
ctx->lo = 0;
ctx->hi = 0;
}
AARU_EXPORT void AARU_CALL aaruf_md5_update(md5_ctx *ctx, const void *data, unsigned long size)
{
const uint32_t saved_lo = ctx->lo;
if((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) ctx->hi++;
ctx->hi += size >> 29;
const unsigned long used = saved_lo & 0x3f;
if(used)
{
unsigned long available = 64 - used;
if(size < available)
{
memcpy(&ctx->buffer[used], data, size);
return;
}
memcpy(&ctx->buffer[used], data, available);
data = (const unsigned char *)data + available;
size -= available;
body(ctx, ctx->buffer, 64);
}
if(size >= 64)
{
data = body(ctx, data, size & ~(unsigned long)0x3f);
size &= 0x3f;
}
memcpy(ctx->buffer, data, size);
}
#define OUT(dst, src) \
(dst)[0] = (unsigned char)(src); \
(dst)[1] = (unsigned char)((src) >> 8); \
(dst)[2] = (unsigned char)((src) >> 16); \
(dst)[3] = (unsigned char)((src) >> 24);
AARU_EXPORT void AARU_CALL aaruf_md5_final(md5_ctx *ctx, unsigned char *result)
{
unsigned long used = ctx->lo & 0x3f;
ctx->buffer[used++] = 0x80;
unsigned long available = 64 - used;
if(available < 8)
{
memset(&ctx->buffer[used], 0, available);
body(ctx, ctx->buffer, 64);
used = 0;
available = 64;
}
memset(&ctx->buffer[used], 0, available - 8);
ctx->lo <<= 3;
OUT(&ctx->buffer[56], ctx->lo)
OUT(&ctx->buffer[60], ctx->hi)
body(ctx, ctx->buffer, 64);
OUT(&result[0], ctx->a)
OUT(&result[4], ctx->b)
OUT(&result[8], ctx->c)
OUT(&result[12], ctx->d)
memset(ctx, 0, sizeof(*ctx));
}
AARU_EXPORT void AARU_CALL aaruf_md5_buffer(const void *data, unsigned long size, unsigned char *result)
{
md5_ctx ctx;
aaruf_md5_init(&ctx);
aaruf_md5_update(&ctx, data, size);
aaruf_md5_final(&ctx, result);
}

View File

@@ -26,7 +26,7 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/data.bin
# 'Google_Tests_run' is the target name # 'Google_Tests_run' is the target name
# 'test1.cpp tests2.cpp' are source files with tests # 'test1.cpp tests2.cpp' are source files with tests
add_executable(tests_run crc64.cpp spamsum.cpp crc32.c crc32.h flac.cpp lzma.cpp sha256.cpp) add_executable(tests_run crc64.cpp spamsum.cpp crc32.c crc32.h flac.cpp lzma.cpp sha256.cpp md5.cpp)
# Link libraries including OpenSSL for SHA256 test # Link libraries including OpenSSL for SHA256 test
target_link_libraries(tests_run gtest gtest_main "aaruformat") target_link_libraries(tests_run gtest gtest_main "aaruformat")

58
tests/md5.cpp Normal file
View File

@@ -0,0 +1,58 @@
/*
* This file is part of the Aaru Data Preservation Suite.
* Copyright (c) 2019-2025 Natalia Portillo.
*
* Standalone MD5 test for the file 'random'.
* Expected MD5: d78f0eec417be386219b21b700044b95
*/
#include <climits>
#include <cstdint>
#include <cstring>
#include <cstdio>
#include <cstdlib>
#include "md5.h"
#include "decls.h"
#include "gtest/gtest.h"
static uint8_t *buffer;
unsigned char expected_md5[16] = {
0xd7, 0x8f, 0x0e, 0xec, 0x41, 0x7b, 0xe3, 0x86,
0x21, 0x9b, 0x21, 0xb7, 0x00, 0x04, 0x4b, 0x95
};
class md5Fixture : public ::testing::Test
{
public:
md5Fixture() = default;
protected:
void SetUp() override
{
char path[PATH_MAX];
char filename[PATH_MAX];
getcwd(path, PATH_MAX);
snprintf(filename, PATH_MAX, "%s/data/random", path);
FILE *file = fopen(filename, "rb");
buffer = static_cast<uint8_t *>(malloc(1048576));
fread(buffer, 1, 1048576, file);
fclose(file);
}
void TearDown() override { free(buffer); }
~md5Fixture() override = default;
};
TEST_F(md5Fixture, MD5OfRandomFile)
{
uint8_t md5_result[16];
aaruf_md5_buffer(buffer, 1048576, md5_result);
EXPECT_EQ(memcmp(md5_result, expected_md5, 16), 0);
}