mirror of
https://github.com/aaru-dps/libaaruformat.git
synced 2025-12-16 19:24:40 +00:00
Implement SIMD helper functions.
This commit is contained in:
@@ -6,7 +6,8 @@ add_compile_definitions(__STDC_FORMAT_MACROS=1)
|
||||
|
||||
add_library(aaruformat SHARED include/aaruformat/consts.h include/aaruformat/enums.h include/aaru.h include/aaruformat.h
|
||||
include/aaruformat/decls.h include/aaruformat/structs.h src/identify.c src/open.c include/aaruformat/context.h
|
||||
src/close.c include/aaruformat/errors.h src/read.c src/crc64.c src/cst.c src/ecc_cd.c src/helpers.c)
|
||||
src/close.c include/aaruformat/errors.h src/read.c src/crc64.c src/cst.c src/ecc_cd.c src/helpers.c src/simd.c
|
||||
include/aaruformat/simd.h)
|
||||
|
||||
include_directories(include include/aaruformat)
|
||||
|
||||
|
||||
@@ -42,5 +42,6 @@
|
||||
#include "aaruformat/enums.h"
|
||||
#include "aaruformat/errors.h"
|
||||
#include "aaruformat/structs.h"
|
||||
#include "aaruformat/simd.h"
|
||||
|
||||
#endif // LIBAARUFORMAT_AARUFORMAT_H
|
||||
|
||||
@@ -149,4 +149,18 @@ AARU_LOCAL int32_t AARU_CALL aaruf_get_media_tag_type_for_datatype(int32_t type)
|
||||
|
||||
AARU_LOCAL int32_t AARU_CALL aaruf_get_xml_mediatype(int32_t type);
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
AARU_LOCAL int have_clmul();
|
||||
AARU_LOCAL int have_ssse3();
|
||||
AARU_LOCAL int have_avx2();
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
AARU_LOCAL int have_neon();
|
||||
AARU_LOCAL int have_arm_crc32();
|
||||
AARU_LOCAL int have_arm_crypto();
|
||||
#endif
|
||||
|
||||
#endif // LIBAARUFORMAT_DECLS_H
|
||||
|
||||
127
include/aaruformat/simd.h
Normal file
127
include/aaruformat/simd.h
Normal file
@@ -0,0 +1,127 @@
|
||||
// /***************************************************************************
|
||||
// Aaru Data Preservation Suite
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// Filename : simd.h
|
||||
// Author(s) : Natalia Portillo <claunia@claunia.com>
|
||||
//
|
||||
// Component : libaaruformat.
|
||||
//
|
||||
// --[ Description ] ----------------------------------------------------------
|
||||
//
|
||||
// Declares SIMD constants.
|
||||
//
|
||||
// --[ License ] --------------------------------------------------------------
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as
|
||||
// published by the Free Software Foundation; either version 2.1 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright © 2011-2022 Natalia Portillo
|
||||
// ****************************************************************************/
|
||||
|
||||
#ifndef LIBAARUFORMAT_SIMD_H
|
||||
#define LIBAARUFORMAT_SIMD_H
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define ALIGNED_(n) __declspec(align(n))
|
||||
#else
|
||||
#define ALIGNED_(n) __attribute__((aligned(n)))
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define AVX2
|
||||
#define SSSE3
|
||||
#define CLMUL
|
||||
#else
|
||||
#define AVX2 __attribute__((target("avx2")))
|
||||
#define SSSE3 __attribute__((target("ssse3")))
|
||||
#define CLMUL __attribute__((target("pclmul,sse4.1")))
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if(defined(__arm__) || defined(_M_ARM)) && !defined(_WIN32)
|
||||
#define HWCAP_NEON (1 << 12)
|
||||
#define HWCAP2_AES (1 << 0)
|
||||
#define HWCAP2_CRC32 (1 << 4)
|
||||
#endif
|
||||
|
||||
#if(defined(__aarch64__) || defined(_M_ARM64)) && !defined(_WIN32)
|
||||
#define HWCAP_NEON (1 << 1)
|
||||
#define HWCAP_AES (1 << 3)
|
||||
#define HWCAP_CRC32 (1 << 7)
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
|
||||
#ifndef __ARM_FEATURE_CRC32
|
||||
#define __ARM_FEATURE_CRC32 1
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
|
||||
#define TARGET_ARMV8_WITH_CRC
|
||||
#define TARGET_WITH_CRYPTO
|
||||
#define TARGET_WITH_SIMD
|
||||
|
||||
#else // _MSC_VER
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
|
||||
#ifdef __clang__
|
||||
#define TARGET_ARMV8_WITH_CRC __attribute__((target("crc")))
|
||||
#else
|
||||
#define TARGET_ARMV8_WITH_CRC __attribute__((target("+crc")))
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
#define TARGET_WITH_CRYPTO __attribute__((target("crypto")))
|
||||
#else
|
||||
#define TARGET_WITH_CRYPTO __attribute__((target("+crypto")))
|
||||
#endif
|
||||
|
||||
#define TARGET_WITH_SIMD
|
||||
#else
|
||||
|
||||
#if __ARM_ARCH >= 8
|
||||
|
||||
#ifdef __clang__
|
||||
#define TARGET_ARMV8_WITH_CRC __attribute__((target("armv8-a,crc")))
|
||||
#else
|
||||
#define TARGET_ARMV8_WITH_CRC __attribute__((target("arch=armv8-a+crc")))
|
||||
#endif
|
||||
|
||||
#endif // __ARM_ARCH >= 8
|
||||
|
||||
#ifdef __clang__
|
||||
#define TARGET_WITH_CRYPTO __attribute__((target("armv8-a,crypto")))
|
||||
#else
|
||||
#define TARGET_WITH_CRYPTO __attribute__((target("fpu=crypto-neon-fp-armv8")))
|
||||
#endif
|
||||
|
||||
#ifdef __clang__
|
||||
#define TARGET_WITH_SIMD __attribute__((target("neon")))
|
||||
#else
|
||||
#define TARGET_WITH_SIMD __attribute__((target("fpu=neon")))
|
||||
#endif
|
||||
|
||||
#endif // __aarch64__ || _M_ARM64
|
||||
|
||||
#endif // _MSC_VER
|
||||
|
||||
#endif // __aarch64__ || _M_ARM64 || __arm__ || _M_ARM
|
||||
|
||||
#endif // LIBAARUFORMAT_SIMD_H
|
||||
228
src/simd.c
Normal file
228
src/simd.c
Normal file
@@ -0,0 +1,228 @@
|
||||
// /***************************************************************************
|
||||
// Aaru Data Preservation Suite
|
||||
// ----------------------------------------------------------------------------
|
||||
//
|
||||
// Filename : simd.c
|
||||
// Author(s) : Natalia Portillo <claunia@claunia.com>
|
||||
//
|
||||
// Component : libaaruformat.
|
||||
//
|
||||
// --[ Description ] ----------------------------------------------------------
|
||||
//
|
||||
// Detects SIMD availability.
|
||||
//
|
||||
// --[ License ] --------------------------------------------------------------
|
||||
//
|
||||
// This library is free software; you can redistribute it and/or modify
|
||||
// it under the terms of the GNU Lesser General Public License as
|
||||
// published by the Free Software Foundation; either version 2.1 of the
|
||||
// License, or (at your option) any later version.
|
||||
//
|
||||
// This library is distributed in the hope that it will be useful, but
|
||||
// WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
// Lesser General Public License for more details.
|
||||
//
|
||||
// You should have received a copy of the GNU Lesser General Public
|
||||
// License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
||||
//
|
||||
// ----------------------------------------------------------------------------
|
||||
// Copyright © 2011-2022 Natalia Portillo
|
||||
// ****************************************************************************/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#include <aaruformat.h>
|
||||
|
||||
#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
|
||||
defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
/*
|
||||
* Newer versions of GCC and clang come with cpuid.h
|
||||
* (ftr GCC 4.7 in Debian Wheezy has this)
|
||||
*/
|
||||
#include <cpuid.h>
|
||||
|
||||
#endif
|
||||
|
||||
static void cpuid(int info, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned int registers[4];
|
||||
__cpuid(registers, info);
|
||||
*eax = registers[0];
|
||||
*ebx = registers[1];
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
/* GCC, clang */
|
||||
unsigned int _eax;
|
||||
unsigned int _ebx;
|
||||
unsigned int _ecx;
|
||||
unsigned int _edx;
|
||||
__cpuid(info, _eax, _ebx, _ecx, _edx);
|
||||
*eax = _eax;
|
||||
*ebx = _ebx;
|
||||
*ecx = _ecx;
|
||||
*edx = _edx;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void cpuidex(int info, int count, unsigned* eax, unsigned* ebx, unsigned* ecx, unsigned* edx)
|
||||
{
|
||||
#ifdef _MSC_VER
|
||||
unsigned int registers[4];
|
||||
__cpuidex(registers, info, count);
|
||||
*eax = registers[0];
|
||||
*ebx = registers[1];
|
||||
*ecx = registers[2];
|
||||
*edx = registers[3];
|
||||
#else
|
||||
/* GCC, clang */
|
||||
unsigned int _eax;
|
||||
unsigned int _ebx;
|
||||
unsigned int _ecx;
|
||||
unsigned int _edx;
|
||||
__cpuid_count(info, count, _eax, _ebx, _ecx, _edx);
|
||||
*eax = _eax;
|
||||
*ebx = _ebx;
|
||||
*ecx = _ecx;
|
||||
*edx = _edx;
|
||||
#endif
|
||||
}
|
||||
|
||||
int have_clmul()
|
||||
{
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
int has_pclmulqdq;
|
||||
int has_sse41;
|
||||
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
has_pclmulqdq = ecx & 0x2; /* bit 1 */
|
||||
has_sse41 = ecx & 0x80000; /* bit 19 */
|
||||
|
||||
return has_pclmulqdq && has_sse41;
|
||||
}
|
||||
|
||||
int have_ssse3()
|
||||
{
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
return ecx & 0x200;
|
||||
}
|
||||
|
||||
int have_avx2()
|
||||
{
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
cpuidex(7 /* extended feature bits */, 0, &eax, &ebx, &ecx, &edx);
|
||||
|
||||
return ebx & 0x20;
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
|
||||
#if defined(_WIN32)
|
||||
#include <windows.h>
|
||||
|
||||
#include <processthreadsapi.h>
|
||||
#elif defined(__APPLE__)
|
||||
#include <sys/sysctl.h>
|
||||
#else
|
||||
#include <sys/auxv.h>
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if(defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__)
|
||||
int have_neon_apple()
|
||||
{
|
||||
int value;
|
||||
size_t len = sizeof(int);
|
||||
int ret = sysctlbyname("hw.optional.neon", &value, &len, NULL, 0);
|
||||
|
||||
if(ret != 0) return 0;
|
||||
|
||||
return value == 1;
|
||||
}
|
||||
|
||||
int have_crc32_apple()
|
||||
{
|
||||
int value;
|
||||
size_t len = sizeof(int);
|
||||
int ret = sysctlbyname("hw.optional.crc32", &value, &len, NULL, 0);
|
||||
|
||||
if(ret != 0) return 0;
|
||||
|
||||
return value == 1;
|
||||
}
|
||||
|
||||
int have_crypto_apple() { return 0; }
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || defined(_M_ARM64)
|
||||
int have_neon()
|
||||
{
|
||||
return 1; // ARMv8-A made it mandatory
|
||||
}
|
||||
|
||||
int have_arm_crc32()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0;
|
||||
#elif defined(__APPLE__)
|
||||
return have_crc32_apple();
|
||||
#else
|
||||
return getauxval(AT_HWCAP) & HWCAP_CRC32;
|
||||
#endif
|
||||
}
|
||||
|
||||
int have_arm_crypto()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
|
||||
#elif defined(__APPLE__)
|
||||
return have_crypto_apple();
|
||||
#else
|
||||
return getauxval(AT_HWCAP) & HWCAP_AES;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(__arm__) || defined(_M_ARM)
|
||||
int have_neon()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != 0;
|
||||
#elif defined(__APPLE__)
|
||||
return have_neon_apple();
|
||||
#else
|
||||
return getauxval(AT_HWCAP) & HWCAP_NEON;
|
||||
#endif
|
||||
}
|
||||
|
||||
int have_arm_crc32()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0;
|
||||
#elif defined(__APPLE__)
|
||||
return have_crc32_apple();
|
||||
#else
|
||||
return getauxval(AT_HWCAP2) & HWCAP2_CRC32;
|
||||
#endif
|
||||
}
|
||||
|
||||
int have_arm_crypto()
|
||||
{
|
||||
#if defined(_WIN32)
|
||||
return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
|
||||
#elif defined(__APPLE__)
|
||||
return have_crypto_apple();
|
||||
#else
|
||||
return getauxval(AT_HWCAP2) & HWCAP2_AES;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
Reference in New Issue
Block a user