From 9dbca24425c737d05ca738d318c578c838416f74 Mon Sep 17 00:00:00 2001 From: Natalia Portillo Date: Sat, 28 May 2022 12:31:54 +0100 Subject: [PATCH] Implement SIMD helper functions. --- CMakeLists.txt | 3 +- include/aaruformat.h | 1 + include/aaruformat/decls.h | 14 +++ include/aaruformat/simd.h | 127 +++++++++++++++++++++ src/simd.c | 228 +++++++++++++++++++++++++++++++++++++ 5 files changed, 372 insertions(+), 1 deletion(-) create mode 100644 include/aaruformat/simd.h create mode 100644 src/simd.c diff --git a/CMakeLists.txt b/CMakeLists.txt index c375f8e..fdc961b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -6,7 +6,8 @@ add_compile_definitions(__STDC_FORMAT_MACROS=1) add_library(aaruformat SHARED include/aaruformat/consts.h include/aaruformat/enums.h include/aaru.h include/aaruformat.h include/aaruformat/decls.h include/aaruformat/structs.h src/identify.c src/open.c include/aaruformat/context.h - src/close.c include/aaruformat/errors.h src/read.c src/crc64.c src/cst.c src/ecc_cd.c src/helpers.c) + src/close.c include/aaruformat/errors.h src/read.c src/crc64.c src/cst.c src/ecc_cd.c src/helpers.c src/simd.c + include/aaruformat/simd.h) include_directories(include include/aaruformat) diff --git a/include/aaruformat.h b/include/aaruformat.h index 1753ef4..5e2b82b 100644 --- a/include/aaruformat.h +++ b/include/aaruformat.h @@ -42,5 +42,6 @@ #include "aaruformat/enums.h" #include "aaruformat/errors.h" #include "aaruformat/structs.h" +#include "aaruformat/simd.h" #endif // LIBAARUFORMAT_AARUFORMAT_H diff --git a/include/aaruformat/decls.h b/include/aaruformat/decls.h index 4ce0d6f..f5cf5b0 100644 --- a/include/aaruformat/decls.h +++ b/include/aaruformat/decls.h @@ -149,4 +149,18 @@ AARU_LOCAL int32_t AARU_CALL aaruf_get_media_tag_type_for_datatype(int32_t type) AARU_LOCAL int32_t AARU_CALL aaruf_get_xml_mediatype(int32_t type); +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + +AARU_LOCAL int have_clmul(); +AARU_LOCAL int have_ssse3(); +AARU_LOCAL int have_avx2(); +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +AARU_LOCAL int have_neon(); +AARU_LOCAL int have_arm_crc32(); +AARU_LOCAL int have_arm_crypto(); +#endif + #endif // LIBAARUFORMAT_DECLS_H diff --git a/include/aaruformat/simd.h b/include/aaruformat/simd.h new file mode 100644 index 0000000..bcb5a7d --- /dev/null +++ b/include/aaruformat/simd.h @@ -0,0 +1,127 @@ +// /*************************************************************************** +// Aaru Data Preservation Suite +// ---------------------------------------------------------------------------- +// +// Filename : simd.h +// Author(s) : Natalia Portillo +// +// Component : libaaruformat. +// +// --[ Description ] ---------------------------------------------------------- +// +// Declares SIMD constants. +// +// --[ License ] -------------------------------------------------------------- +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, see . +// +// ---------------------------------------------------------------------------- +// Copyright © 2011-2022 Natalia Portillo +// ****************************************************************************/ + +#ifndef LIBAARUFORMAT_SIMD_H +#define LIBAARUFORMAT_SIMD_H + +#ifdef _MSC_VER +#define ALIGNED_(n) __declspec(align(n)) +#else +#define ALIGNED_(n) __attribute__((aligned(n))) +#endif + +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + +#ifdef _MSC_VER +#define AVX2 +#define SSSE3 +#define CLMUL +#else +#define AVX2 __attribute__((target("avx2"))) +#define SSSE3 __attribute__((target("ssse3"))) +#define CLMUL __attribute__((target("pclmul,sse4.1"))) +#endif +#endif + +#if(defined(__arm__) || defined(_M_ARM)) && !defined(_WIN32) +#define HWCAP_NEON (1 << 12) +#define HWCAP2_AES (1 << 0) +#define HWCAP2_CRC32 (1 << 4) +#endif + +#if(defined(__aarch64__) || defined(_M_ARM64)) && !defined(_WIN32) +#define HWCAP_NEON (1 << 1) +#define HWCAP_AES (1 << 3) +#define HWCAP_CRC32 (1 << 7) +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) + +#ifndef __ARM_FEATURE_CRC32 +#define __ARM_FEATURE_CRC32 1 +#endif + +#ifdef _MSC_VER + +#define TARGET_ARMV8_WITH_CRC +#define TARGET_WITH_CRYPTO +#define TARGET_WITH_SIMD + +#else // _MSC_VER + +#if defined(__aarch64__) || defined(_M_ARM64) + +#ifdef __clang__ +#define TARGET_ARMV8_WITH_CRC __attribute__((target("crc"))) +#else +#define TARGET_ARMV8_WITH_CRC __attribute__((target("+crc"))) +#endif + +#ifdef __clang__ +#define TARGET_WITH_CRYPTO __attribute__((target("crypto"))) +#else +#define TARGET_WITH_CRYPTO __attribute__((target("+crypto"))) +#endif + +#define TARGET_WITH_SIMD +#else + +#if __ARM_ARCH >= 8 + +#ifdef __clang__ +#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc"))) +#else +#define TARGET_ARMV8_WITH_CRC __attribute__((target("arch=armv8-a+crc"))) +#endif + +#endif // __ARM_ARCH >= 8 + +#ifdef __clang__ +#define TARGET_WITH_CRYPTO __attribute__((target("armv8-a,crypto"))) +#else +#define TARGET_WITH_CRYPTO __attribute__((target("fpu=crypto-neon-fp-armv8"))) +#endif + +#ifdef __clang__ +#define TARGET_WITH_SIMD __attribute__((target("neon"))) +#else +#define TARGET_WITH_SIMD __attribute__((target("fpu=neon"))) +#endif + +#endif // __aarch64__ || _M_ARM64 + +#endif // _MSC_VER + +#endif // __aarch64__ || _M_ARM64 || __arm__ || _M_ARM + +#endif // LIBAARUFORMAT_SIMD_H diff --git a/src/simd.c b/src/simd.c new file mode 100644 index 0000000..e9adb06 --- /dev/null +++ b/src/simd.c @@ -0,0 +1,228 @@ +// /*************************************************************************** +// Aaru Data Preservation Suite +// ---------------------------------------------------------------------------- +// +// Filename : simd.c +// Author(s) : Natalia Portillo +// +// Component : libaaruformat. +// +// --[ Description ] ---------------------------------------------------------- +// +// Detects SIMD availability. +// +// --[ License ] -------------------------------------------------------------- +// +// This library is free software; you can redistribute it and/or modify +// it under the terms of the GNU Lesser General Public License as +// published by the Free Software Foundation; either version 2.1 of the +// License, or (at your option) any later version. +// +// This library is distributed in the hope that it will be useful, but +// WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +// Lesser General Public License for more details. +// +// You should have received a copy of the GNU Lesser General Public +// License along with this library; if not, see . +// +// ---------------------------------------------------------------------------- +// Copyright © 2011-2022 Natalia Portillo +// ****************************************************************************/ + +#include +#include + +#include + +#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \ + defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86) + +#ifdef _MSC_VER +#include +#else +/* + * Newer versions of GCC and clang come with cpuid.h + * (ftr GCC 4.7 in Debian Wheezy has this) + */ +#include + +#endif + +static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) +{ +#ifdef _MSC_VER + unsigned int registers[4]; + __cpuid(registers, info); + *eax = registers[0]; + *ebx = registers[1]; + *ecx = registers[2]; + *edx = registers[3]; +#else + /* GCC, clang */ + unsigned int _eax; + unsigned int _ebx; + unsigned int _ecx; + unsigned int _edx; + __cpuid(info, _eax, _ebx, _ecx, _edx); + *eax = _eax; + *ebx = _ebx; + *ecx = _ecx; + *edx = _edx; +#endif +} + +static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx) +{ +#ifdef _MSC_VER + unsigned int registers[4]; + __cpuidex(registers, info, count); + *eax = registers[0]; + *ebx = registers[1]; + *ecx = registers[2]; + *edx = registers[3]; +#else + /* GCC, clang */ + unsigned int _eax; + unsigned int _ebx; + unsigned int _ecx; + unsigned int _edx; + __cpuid_count(info, count, _eax, _ebx, _ecx, _edx); + *eax = _eax; + *ebx = _ebx; + *ecx = _ecx; + *edx = _edx; +#endif +} + +int have_clmul() +{ + unsigned eax, ebx, ecx, edx; + int has_pclmulqdq; + int has_sse41; + cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx); + + has_pclmulqdq = ecx & 0x2; /* bit 1 */ + has_sse41 = ecx & 0x80000; /* bit 19 */ + + return has_pclmulqdq && has_sse41; +} + +int have_ssse3() +{ + unsigned eax, ebx, ecx, edx; + cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx); + + return ecx & 0x200; +} + +int have_avx2() +{ + unsigned eax, ebx, ecx, edx; + cpuidex(7 /* extended feature bits */, 0, &eax, &ebx, &ecx, &edx); + + return ebx & 0x20; +} +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM) +#if defined(_WIN32) +#include + +#include +#elif defined(__APPLE__) +#include +#else +#include +#endif +#endif + +#if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__) +int have_neon_apple() +{ + int value; + size_t len = sizeof(int); + int ret = sysctlbyname("hw.optional.neon", &value, &len, NULL, 0); + + if(ret != 0) return 0; + + return value == 1; +} + +int have_crc32_apple() +{ + int value; + size_t len = sizeof(int); + int ret = sysctlbyname("hw.optional.crc32", &value, &len, NULL, 0); + + if(ret != 0) return 0; + + return value == 1; +} + +int have_crypto_apple() { return 0; } +#endif + +#if defined(__aarch64__) || defined(_M_ARM64) +int have_neon() +{ + return 1; // ARMv8-A made it mandatory +} + +int have_arm_crc32() +{ +#if defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; +#elif defined(__APPLE__) + return have_crc32_apple(); +#else + return getauxval(AT_HWCAP) & HWCAP_CRC32; +#endif +} + +int have_arm_crypto() +{ +#if defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; +#elif defined(__APPLE__) + return have_crypto_apple(); +#else + return getauxval(AT_HWCAP) & HWCAP_AES; +#endif +} +#endif + +#if defined(__arm__) || defined(_M_ARM) +int have_neon() +{ +#if defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != 0; +#elif defined(__APPLE__) + return have_neon_apple(); +#else + return getauxval(AT_HWCAP) & HWCAP_NEON; +#endif +} + +int have_arm_crc32() +{ +#if defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0; +#elif defined(__APPLE__) + return have_crc32_apple(); +#else + return getauxval(AT_HWCAP2) & HWCAP2_CRC32; +#endif +} + +int have_arm_crypto() +{ +#if defined(_WIN32) + return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0; +#elif defined(__APPLE__) + return have_crypto_apple(); +#else + return getauxval(AT_HWCAP2) & HWCAP2_AES; +#endif +} +#endif