diff --git a/CMakeLists.txt b/CMakeLists.txt index 1ddfcc8..5b0b06b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -125,7 +125,9 @@ add_library(aaruformat SHARED include/aaruformat/consts.h include/aaruformat/enu src/write.c include/log.h src/ddt/hash_map.c - include/aaruformat/hash_map.h) + include/aaruformat/hash_map.h + src/md5.c + include/md5.h) include_directories(include include/aaruformat) diff --git a/include/aaruformat/decls.h b/include/aaruformat/decls.h index 58790ac..c7bbe0c 100644 --- a/include/aaruformat/decls.h +++ b/include/aaruformat/decls.h @@ -19,6 +19,8 @@ #ifndef LIBAARUFORMAT_DECLS_H #define LIBAARUFORMAT_DECLS_H +#include "crc64.h" +#include "md5.h" #include "simd.h" #include "spamsum.h" #ifdef __cplusplus @@ -156,6 +158,11 @@ AARU_EXPORT int32_t AARU_CALL aaruf_lzma_encode_buffer(uint8_t *dst_buffer, size int32_t level, uint32_t dict_size, int32_t lc, int32_t lp, int32_t pb, int32_t fb, int32_t num_threads); +AARU_EXPORT void AARU_CALL aaruf_md5_init(md5_ctx *ctx); +AARU_EXPORT void AARU_CALL aaruf_md5_update(md5_ctx *ctx, const void *data, unsigned long size); +AARU_EXPORT void AARU_CALL aaruf_md5_final(md5_ctx *ctx, unsigned char *result); +AARU_EXPORT void AARU_CALL aaruf_md5_buffer(const void *data, unsigned long size, unsigned char *result); + #if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) diff --git a/include/md5.h b/include/md5.h new file mode 100644 index 0000000..5d95839 --- /dev/null +++ b/include/md5.h @@ -0,0 +1,39 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * See md5.c for more information. + */ + +#ifndef LIBAARUFORMAT_MD5_H +#define LIBAARUFORMAT_MD5_H + +#include + +typedef struct +{ + uint32_t lo, hi; + uint32_t a, b, c, d; + unsigned char buffer[64]; + uint32_t block[16]; +} md5_ctx; + +#endif /* LIBAARUFORMAT_MD5_H */ \ No newline at end of file diff --git a/src/md5.c b/src/md5.c new file mode 100644 index 0000000..f9b5b6e --- /dev/null +++ b/src/md5.c @@ -0,0 +1,292 @@ +/* + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + * + * This differs from Colin Plumb's older public domain implementation in that + * no exactly 32-bit integer data type is required (any 32-bit or wider + * unsigned integer data type will do), there's no compile-time endianness + * configuration, and the function prototypes match OpenSSL's. No code from + * Colin Plumb's implementation has been reused; this comment merely compares + * the properties of the two independent implementations. + * + * The primary goals of this implementation are portability and ease of use. + * It is meant to be fast, but not as fast as possible. Some known + * optimizations are not included to reduce source code size and avoid + * compile-time configuration. + */ + +#include +#include +#include + +#include "decls.h" +#include "md5.h" + +/* + * The basic MD5 functions. + * + * F and G are optimized compared to their RFC 1321 definitions for + * architectures that lack an AND-NOT instruction, just like in Colin Plumb's + * implementation. + */ +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) +#define H(x, y, z) (((x) ^ (y)) ^ (z)) +#define H2(x, y, z) ((x) ^ ((y) ^ (z))) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) + +/* + * The MD5 transformation for all four rounds. + */ +#define STEP(f, a, b, c, d, x, t, s) \ + (a) += f((b), (c), (d)) + (x) + (t); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ + (a) += (b); + +/* + * SET reads 4 input bytes in little-endian byte order and stores them in a + * properly aligned word in host byte order. + * + * The check for little-endian architectures that tolerate unaligned memory + * accesses is just an optimization. Nothing will break if it fails to detect + * a suitable architecture. + * + * Unfortunately, this optimization may be a C strict aliasing rules violation + * if the caller's data buffer has effective type that cannot be aliased by + * uint32_t. In practice, this problem may occur if these MD5 routines are + * inlined into a calling function, or with future and dangerously advanced + * link-time optimizations. For the time being, keeping these MD5 routines in + * their own translation unit avoids the problem. + */ +#if defined(__i386__) || defined(__x86_64__) || defined(__vax__) +#define SET(n) (*(uint32_t *)&ptr[(n) * 4]) +#define GET(n) SET(n) +#else +#define SET(n) \ + (ctx->block[(n)] = (uint32_t)ptr[(n) * 4] | ((uint32_t)ptr[(n) * 4 + 1] << 8) | \ + ((uint32_t)ptr[(n) * 4 + 2] << 16) | ((uint32_t)ptr[(n) * 4 + 3] << 24)) +#define GET(n) (ctx->block[(n)]) +#endif + +/* + * This processes one or more 64-byte data blocks, but does NOT update the bit + * counters. There are no alignment requirements. + */ +static const void *body(md5_ctx *ctx, const void *data, unsigned long size) +{ + const unsigned char *ptr; + uint32_t a, b, c, d; + uint32_t saved_a, saved_b, saved_c, saved_d; + + ptr = (const unsigned char *)data; + + a = ctx->a; + b = ctx->b; + c = ctx->c; + d = ctx->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + + /* Round 1 */ + STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) + STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) + STEP(F, c, d, a, b, SET(2), 0x242070db, 17) + STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) + STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) + STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) + STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) + STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) + STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) + STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) + STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) + STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) + STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) + STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) + STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) + STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) + + /* Round 2 */ + STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) + STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) + STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) + STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) + STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) + STEP(G, d, a, b, c, GET(10), 0x02441453, 9) + STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) + STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) + STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) + STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) + STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) + STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) + STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) + STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) + STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) + STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) + + /* Round 3 */ + STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) + STEP(H2, d, a, b, c, GET(8), 0x8771f681, 11) + STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) + STEP(H2, b, c, d, a, GET(14), 0xfde5380c, 23) + STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) + STEP(H2, d, a, b, c, GET(4), 0x4bdecfa9, 11) + STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) + STEP(H2, b, c, d, a, GET(10), 0xbebfbc70, 23) + STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) + STEP(H2, d, a, b, c, GET(0), 0xeaa127fa, 11) + STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) + STEP(H2, b, c, d, a, GET(6), 0x04881d05, 23) + STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) + STEP(H2, d, a, b, c, GET(12), 0xe6db99e5, 11) + STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) + STEP(H2, b, c, d, a, GET(2), 0xc4ac5665, 23) + + /* Round 4 */ + STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) + STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) + STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) + STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) + STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) + STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) + STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) + STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) + STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) + STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) + STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) + STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) + STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) + STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) + STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) + STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while(size -= 64); + + ctx->a = a; + ctx->b = b; + ctx->c = c; + ctx->d = d; + + return ptr; +} + +AARU_EXPORT void AARU_CALL aaruf_md5_init(md5_ctx *ctx) +{ + ctx->a = 0x67452301; + ctx->b = 0xefcdab89; + ctx->c = 0x98badcfe; + ctx->d = 0x10325476; + + ctx->lo = 0; + ctx->hi = 0; +} + +AARU_EXPORT void AARU_CALL aaruf_md5_update(md5_ctx *ctx, const void *data, unsigned long size) +{ + + const uint32_t saved_lo = ctx->lo; + if((ctx->lo = (saved_lo + size) & 0x1fffffff) < saved_lo) ctx->hi++; + ctx->hi += size >> 29; + + const unsigned long used = saved_lo & 0x3f; + + if(used) + { + unsigned long available = 64 - used; + + if(size < available) + { + memcpy(&ctx->buffer[used], data, size); + return; + } + + memcpy(&ctx->buffer[used], data, available); + data = (const unsigned char *)data + available; + size -= available; + body(ctx, ctx->buffer, 64); + } + + if(size >= 64) + { + data = body(ctx, data, size & ~(unsigned long)0x3f); + size &= 0x3f; + } + + memcpy(ctx->buffer, data, size); +} + +#define OUT(dst, src) \ + (dst)[0] = (unsigned char)(src); \ + (dst)[1] = (unsigned char)((src) >> 8); \ + (dst)[2] = (unsigned char)((src) >> 16); \ + (dst)[3] = (unsigned char)((src) >> 24); + +AARU_EXPORT void AARU_CALL aaruf_md5_final(md5_ctx *ctx, unsigned char *result) +{ + + unsigned long used = ctx->lo & 0x3f; + + ctx->buffer[used++] = 0x80; + + unsigned long available = 64 - used; + + if(available < 8) + { + memset(&ctx->buffer[used], 0, available); + body(ctx, ctx->buffer, 64); + used = 0; + available = 64; + } + + memset(&ctx->buffer[used], 0, available - 8); + + ctx->lo <<= 3; + OUT(&ctx->buffer[56], ctx->lo) + OUT(&ctx->buffer[60], ctx->hi) + + body(ctx, ctx->buffer, 64); + + OUT(&result[0], ctx->a) + OUT(&result[4], ctx->b) + OUT(&result[8], ctx->c) + OUT(&result[12], ctx->d) + + memset(ctx, 0, sizeof(*ctx)); +} + +AARU_EXPORT void AARU_CALL aaruf_md5_buffer(const void *data, unsigned long size, unsigned char *result) +{ + md5_ctx ctx; + aaruf_md5_init(&ctx); + aaruf_md5_update(&ctx, data, size); + aaruf_md5_final(&ctx, result); +} diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index f835874..291abcc 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -26,7 +26,7 @@ file(COPY ${CMAKE_CURRENT_SOURCE_DIR}/data/data.bin # 'Google_Tests_run' is the target name # 'test1.cpp tests2.cpp' are source files with tests -add_executable(tests_run crc64.cpp spamsum.cpp crc32.c crc32.h flac.cpp lzma.cpp sha256.cpp) +add_executable(tests_run crc64.cpp spamsum.cpp crc32.c crc32.h flac.cpp lzma.cpp sha256.cpp md5.cpp) # Link libraries including OpenSSL for SHA256 test target_link_libraries(tests_run gtest gtest_main "aaruformat") diff --git a/tests/md5.cpp b/tests/md5.cpp new file mode 100644 index 0000000..bdbfeaf --- /dev/null +++ b/tests/md5.cpp @@ -0,0 +1,58 @@ +/* + * This file is part of the Aaru Data Preservation Suite. + * Copyright (c) 2019-2025 Natalia Portillo. + * + * Standalone MD5 test for the file 'random'. + * Expected MD5: d78f0eec417be386219b21b700044b95 + */ + +#include +#include +#include +#include +#include + +#include "md5.h" + +#include "decls.h" +#include "gtest/gtest.h" + +static uint8_t *buffer; + +unsigned char expected_md5[16] = { + 0xd7, 0x8f, 0x0e, 0xec, 0x41, 0x7b, 0xe3, 0x86, + 0x21, 0x9b, 0x21, 0xb7, 0x00, 0x04, 0x4b, 0x95 +}; + +class md5Fixture : public ::testing::Test +{ +public: + md5Fixture() = default; + +protected: + void SetUp() override + { + char path[PATH_MAX]; + char filename[PATH_MAX]; + + getcwd(path, PATH_MAX); + snprintf(filename, PATH_MAX, "%s/data/random", path); + + FILE *file = fopen(filename, "rb"); + buffer = static_cast(malloc(1048576)); + fread(buffer, 1, 1048576, file); + fclose(file); + } + + void TearDown() override { free(buffer); } + ~md5Fixture() override = default; +}; + +TEST_F(md5Fixture, MD5OfRandomFile) +{ + uint8_t md5_result[16]; + + aaruf_md5_buffer(buffer, 1048576, md5_result); + EXPECT_EQ(memcmp(md5_result, expected_md5, 16), 0); +} +