libaaruformat 1.0
Aaru Data Preservation Suite - Format Library
Loading...
Searching...
No Matches
simd.c
Go to the documentation of this file.
1/*
2 * This file is part of the Aaru Data Preservation Suite.
3 * Copyright (c) 2019-2025 Natalia Portillo.
4 *
5 * This library is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as
7 * published by the Free Software Foundation; either version 2.1 of the
8 * License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful, but
11 * WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, see <http://www.gnu.org/licenses/>.
17 */
18
19#include <stddef.h>
20#include <stdint.h>
21
22#include "aaruformat.h"
23#include "log.h"
24
25#if defined(__x86_64__) || defined(__amd64) || defined(_M_AMD64) || defined(_M_X64) || defined(__I386__) || \
26 defined(__i386__) || defined(__THW_INTEL) || defined(_M_IX86)
27
28#ifdef _MSC_VER
29#include <intrin.h>
30#else
31/*
32 * Newer versions of GCC and clang come with cpuid.h
33 * (ftr GCC 4.7 in Debian Wheezy has this)
34 */
35#include <cpuid.h>
36
37#endif
38
50static void cpuid(int info, unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
51{
52 TRACE("Entering cpuid(%d, %d, %d, %d, %d)", info, *eax, *ebx, *ecx, *edx);
53
54#ifdef _MSC_VER
55 unsigned int registers[4];
56 __cpuid(registers, info);
57 *eax = registers[0];
58 *ebx = registers[1];
59 *ecx = registers[2];
60 *edx = registers[3];
61#else
62 /* GCC, clang */
63 unsigned int _eax;
64 unsigned int _ebx;
65 unsigned int _ecx;
66 unsigned int _edx;
67 __cpuid(info, _eax, _ebx, _ecx, _edx);
68 *eax = _eax;
69 *ebx = _ebx;
70 *ecx = _ecx;
71 *edx = _edx;
72#endif
73
74 TRACE("Exiting cpuid(%d, %d, %d, %d, %d)", info, *eax, *ebx, *ecx, *edx);
75}
76
87static void cpuidex(int info, int count, unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx)
88{
89 TRACE("Entering cpuidex(%d, %d, %d, %d, %d, %d)", info, count, *eax, *ebx, *ecx, *edx);
90
91#ifdef _MSC_VER
92 unsigned int registers[4];
93 __cpuidex(registers, info, count);
94 *eax = registers[0];
95 *ebx = registers[1];
96 *ecx = registers[2];
97 *edx = registers[3];
98#else
99 /* GCC, clang */
100 unsigned int _eax;
101 unsigned int _ebx;
102 unsigned int _ecx;
103 unsigned int _edx;
104 __cpuid_count(info, count, _eax, _ebx, _ecx, _edx);
105 *eax = _eax;
106 *ebx = _ebx;
107 *ecx = _ecx;
108 *edx = _edx;
109#endif
110
111 TRACE("Exiting cpuidex(%d, %d, %d, %d, %d, %d)", info, count, *eax, *ebx, *ecx, *edx);
112}
113
119int have_clmul()
120{
121 TRACE("Entering have_clmul()");
122
123 unsigned eax, ebx, ecx, edx;
124 cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
125
126 int has_pclmulqdq = ecx & 0x2; /* bit 1 */
127 int has_sse41 = ecx & 0x80000; /* bit 19 */
128
129 TRACE("Exiting have_clmul() = %d", has_pclmulqdq && has_sse41);
130 return has_pclmulqdq && has_sse41;
131}
132
138int have_ssse3()
139{
140 TRACE("Entering have_ssse3()");
141 unsigned eax, ebx, ecx, edx;
142 cpuid(1 /* feature bits */, &eax, &ebx, &ecx, &edx);
143
144 TRACE("Exiting have_ssse3() = %d", ecx & 0x200);
145 return ecx & 0x200;
146}
147
153int have_avx2()
154{
155 TRACE("Entering have_avx2()");
156 unsigned eax, ebx, ecx, edx;
157 cpuidex(7 /* extended feature bits */, 0, &eax, &ebx, &ecx, &edx);
158
159 TRACE("Exiting have_avx2() = %d", ebx & 0x20);
160 return ebx & 0x20;
161}
162#endif
163
164#if defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)
165#if defined(_WIN32)
166#include <windows.h>
167
168#include <processthreadsapi.h>
169#elif defined(__APPLE__)
170#include <sys/sysctl.h>
171#else
172#include <sys/auxv.h>
173#endif
174#endif
175
176#if (defined(__aarch64__) || defined(_M_ARM64) || defined(__arm__) || defined(_M_ARM)) && defined(__APPLE__)
182int have_neon_apple()
183{
184 TRACE("Entering have_neon_apple()");
185 int value = 0;
186 size_t len = sizeof(int);
187 int ret = sysctlbyname("hw.optional.neon", &value, &len, NULL, 0);
188
189 if(ret != 0)
190 {
191 TRACE("Exiting have_neon_apple() = 0");
192 return 0;
193 }
194
195 TRACE("Exiting have_neon_apple() = %d", value == 1);
196 return value == 1;
197}
198
204int have_crc32_apple()
205{
206 TRACE("Entering have_crc32_apple()");
207 int value = 0;
208 size_t len = sizeof(int);
209 int ret = sysctlbyname("hw.optional.crc32", &value, &len, NULL, 0);
210
211 if(ret != 0)
212 {
213 TRACE("Exiting have_crc32_apple() = 0");
214 return 0;
215 }
216
217 TRACE("Exiting have_crc32_apple() = %d", value == 1);
218 return value == 1;
219}
220
226int have_crypto_apple() { return 0; }
227#endif
228
229#if defined(__aarch64__) || defined(_M_ARM64)
230int have_neon()
231{
232 return 1; // ARMv8-A made it mandatory
233}
234
235int have_arm_crc32()
236{
237#if defined(_WIN32)
238 return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0;
239#elif defined(__APPLE__)
240 return have_crc32_apple();
241#else
242 return getauxval(AT_HWCAP) & HWCAP_CRC32;
243#endif
244}
245
246int have_arm_crypto()
247{
248#if defined(_WIN32)
249 return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
250#elif defined(__APPLE__)
251 return have_crypto_apple();
252#else
253 return getauxval(AT_HWCAP) & HWCAP_AES;
254#endif
255}
256#endif
257
258#if defined(__arm__) || defined(_M_ARM)
259int have_neon()
260{
261#if defined(_WIN32)
262 return IsProcessorFeaturePresent(PF_ARM_VFP_32_REGISTERS_AVAILABLE) != 0;
263#elif defined(__APPLE__)
264 return have_neon_apple();
265#else
266 return getauxval(AT_HWCAP) & HWCAP_NEON;
267#endif
268}
269
270int have_arm_crc32()
271{
272#if defined(_WIN32)
273 return IsProcessorFeaturePresent(PF_ARM_V8_CRC32_INSTRUCTIONS_AVAILABLE) != 0;
274#elif defined(__APPLE__)
275 return have_crc32_apple();
276#else
277 return getauxval(AT_HWCAP2) & HWCAP2_CRC32;
278#endif
279}
280
281int have_arm_crypto()
282{
283#if defined(_WIN32)
284 return IsProcessorFeaturePresent(PF_ARM_V8_CRYPTO_INSTRUCTIONS_AVAILABLE) != 0;
285#elif defined(__APPLE__)
286 return have_crypto_apple();
287#else
288 return getauxval(AT_HWCAP2) & HWCAP2_AES;
289#endif
290}
291#endif
#define TRACE(fmt,...)
Definition log.h:25