1 /*
2 Simple DirectMedia Layer
3 Copyright (C) 1997-2021 Sam Lantinga <slouken@libsdl.org>
4
5 This software is provided 'as-is', without any express or implied
6 warranty. In no event will the authors be held liable for any damages
7 arising from the use of this software.
8
9 Permission is granted to anyone to use this software for any purpose,
10 including commercial applications, and to alter it and redistribute it
11 freely, subject to the following restrictions:
12
13 1. The origin of this software must not be misrepresented; you must not
14 claim that you wrote the original software. If you use this software
15 in a product, an acknowledgment in the product documentation would be
16 appreciated but is not required.
17 2. Altered source versions must be plainly marked as such, and must not be
18 misrepresented as being the original software.
19 3. This notice may not be removed or altered from any source distribution.
20 */
21 #ifdef TEST_MAIN
22 #include "SDL_config.h"
23 #else
24 #include "../SDL_internal.h"
25 #endif
26
27 #if defined(__WIN32__) || defined(__WINRT__)
28 #include "../core/windows/SDL_windows.h"
29 #endif
30 #if defined(__OS2__)
31 #undef HAVE_SYSCTLBYNAME
32 #define INCL_DOS
33 #include <os2.h>
34 #ifndef QSV_NUMPROCESSORS
35 #define QSV_NUMPROCESSORS 26
36 #endif
37 #endif
38
39 /* CPU feature detection for SDL */
40
41 #include "SDL_cpuinfo.h"
42 #include "SDL_assert.h"
43
44 #ifdef HAVE_SYSCONF
45 #include <unistd.h>
46 #endif
47 #ifdef HAVE_SYSCTLBYNAME
48 #include <sys/types.h>
49 #include <sys/sysctl.h>
50 #endif
51 #if defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))
52 #include <sys/sysctl.h> /* For AltiVec check */
53 #elif defined(__OpenBSD__) && defined(__powerpc__)
54 #include <sys/param.h>
55 #include <sys/sysctl.h> /* For AltiVec check */
56 #include <machine/cpu.h>
57 #elif defined(__FreeBSD__) && defined(__powerpc__)
58 #include <machine/cpu.h>
59 #include <sys/auxv.h>
60 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
61 #include <signal.h>
62 #include <setjmp.h>
63 #endif
64
65 #if defined(__QNXNTO__)
66 #include <sys/syspage.h>
67 #endif
68
69 #if (defined(__LINUX__) || defined(__ANDROID__)) && defined(__arm__)
70 #include <unistd.h>
71 #include <sys/types.h>
72 #include <sys/stat.h>
73 #include <fcntl.h>
74 #include <elf.h>
75
76 /*#include <asm/hwcap.h>*/
77 #ifndef AT_HWCAP
78 #define AT_HWCAP 16
79 #endif
80 #ifndef AT_PLATFORM
81 #define AT_PLATFORM 15
82 #endif
83 #ifndef HWCAP_NEON
84 #define HWCAP_NEON (1 << 12)
85 #endif
86 #endif
87
88 #if defined(__ANDROID__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
89 #include <cpu-features.h>
90 #endif
91
92 #if defined(HAVE_GETAUXVAL) || defined(HAVE_ELF_AUX_INFO)
93 #include <sys/auxv.h>
94 #endif
95
96 #ifdef __RISCOS__
97 #include <kernel.h>
98 #include <swis.h>
99 #endif
100
101 #define CPU_HAS_RDTSC (1 << 0)
102 #define CPU_HAS_ALTIVEC (1 << 1)
103 #define CPU_HAS_MMX (1 << 2)
104 #define CPU_HAS_3DNOW (1 << 3)
105 #define CPU_HAS_SSE (1 << 4)
106 #define CPU_HAS_SSE2 (1 << 5)
107 #define CPU_HAS_SSE3 (1 << 6)
108 #define CPU_HAS_SSE41 (1 << 7)
109 #define CPU_HAS_SSE42 (1 << 8)
110 #define CPU_HAS_AVX (1 << 9)
111 #define CPU_HAS_AVX2 (1 << 10)
112 #define CPU_HAS_NEON (1 << 11)
113 #define CPU_HAS_AVX512F (1 << 12)
114 #define CPU_HAS_ARM_SIMD (1 << 13)
115
116 #if SDL_ALTIVEC_BLITTERS && HAVE_SETJMP && !__MACOSX__ && !__OpenBSD__ && !__FreeBSD__
117 /* This is the brute force way of detecting instruction sets...
118 the idea is borrowed from the libmpeg2 library - thanks!
119 */
120 static jmp_buf jmpbuf;
121 static void
illegal_instruction(int sig)122 illegal_instruction(int sig)
123 {
124 longjmp(jmpbuf, 1);
125 }
126 #endif /* HAVE_SETJMP */
127
128 static int
CPU_haveCPUID(void)129 CPU_haveCPUID(void)
130 {
131 int has_CPUID = 0;
132
133 /* *INDENT-OFF* */
134 #ifndef SDL_CPUINFO_DISABLED
135 #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
136 __asm__ (
137 " pushfl # Get original EFLAGS \n"
138 " popl %%eax \n"
139 " movl %%eax,%%ecx \n"
140 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
141 " pushl %%eax # Save new EFLAGS value on stack \n"
142 " popfl # Replace current EFLAGS value \n"
143 " pushfl # Get new EFLAGS \n"
144 " popl %%eax # Store new EFLAGS in EAX \n"
145 " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
146 " jz 1f # Processor=80486 \n"
147 " movl $1,%0 # We have CPUID support \n"
148 "1: \n"
149 : "=m" (has_CPUID)
150 :
151 : "%eax", "%ecx"
152 );
153 #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
154 /* Technically, if this is being compiled under __x86_64__ then it has
155 CPUid by definition. But it's nice to be able to prove it. :) */
156 __asm__ (
157 " pushfq # Get original EFLAGS \n"
158 " popq %%rax \n"
159 " movq %%rax,%%rcx \n"
160 " xorl $0x200000,%%eax # Flip ID bit in EFLAGS \n"
161 " pushq %%rax # Save new EFLAGS value on stack \n"
162 " popfq # Replace current EFLAGS value \n"
163 " pushfq # Get new EFLAGS \n"
164 " popq %%rax # Store new EFLAGS in EAX \n"
165 " xorl %%ecx,%%eax # Can not toggle ID bit, \n"
166 " jz 1f # Processor=80486 \n"
167 " movl $1,%0 # We have CPUID support \n"
168 "1: \n"
169 : "=m" (has_CPUID)
170 :
171 : "%rax", "%rcx"
172 );
173 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
174 __asm {
175 pushfd ; Get original EFLAGS
176 pop eax
177 mov ecx, eax
178 xor eax, 200000h ; Flip ID bit in EFLAGS
179 push eax ; Save new EFLAGS value on stack
180 popfd ; Replace current EFLAGS value
181 pushfd ; Get new EFLAGS
182 pop eax ; Store new EFLAGS in EAX
183 xor eax, ecx ; Can not toggle ID bit,
184 jz done ; Processor=80486
185 mov has_CPUID,1 ; We have CPUID support
186 done:
187 }
188 #elif defined(_MSC_VER) && defined(_M_X64)
189 has_CPUID = 1;
190 #elif defined(__sun) && defined(__i386)
191 __asm (
192 " pushfl \n"
193 " popl %eax \n"
194 " movl %eax,%ecx \n"
195 " xorl $0x200000,%eax \n"
196 " pushl %eax \n"
197 " popfl \n"
198 " pushfl \n"
199 " popl %eax \n"
200 " xorl %ecx,%eax \n"
201 " jz 1f \n"
202 " movl $1,-8(%ebp) \n"
203 "1: \n"
204 );
205 #elif defined(__sun) && defined(__amd64)
206 __asm (
207 " pushfq \n"
208 " popq %rax \n"
209 " movq %rax,%rcx \n"
210 " xorl $0x200000,%eax \n"
211 " pushq %rax \n"
212 " popfq \n"
213 " pushfq \n"
214 " popq %rax \n"
215 " xorl %ecx,%eax \n"
216 " jz 1f \n"
217 " movl $1,-8(%rbp) \n"
218 "1: \n"
219 );
220 #endif
221 #endif
222 /* *INDENT-ON* */
223 return has_CPUID;
224 }
225
226 #if (defined(__GNUC__) || defined(__llvm__)) && defined(__i386__)
227 #define cpuid(func, a, b, c, d) \
228 __asm__ __volatile__ ( \
229 " pushl %%ebx \n" \
230 " xorl %%ecx,%%ecx \n" \
231 " cpuid \n" \
232 " movl %%ebx, %%esi \n" \
233 " popl %%ebx \n" : \
234 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
235 #elif (defined(__GNUC__) || defined(__llvm__)) && defined(__x86_64__)
236 #define cpuid(func, a, b, c, d) \
237 __asm__ __volatile__ ( \
238 " pushq %%rbx \n" \
239 " xorq %%rcx,%%rcx \n" \
240 " cpuid \n" \
241 " movq %%rbx, %%rsi \n" \
242 " popq %%rbx \n" : \
243 "=a" (a), "=S" (b), "=c" (c), "=d" (d) : "a" (func))
244 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
245 #define cpuid(func, a, b, c, d) \
246 __asm { \
247 __asm mov eax, func \
248 __asm xor ecx, ecx \
249 __asm cpuid \
250 __asm mov a, eax \
251 __asm mov b, ebx \
252 __asm mov c, ecx \
253 __asm mov d, edx \
254 }
255 #elif defined(_MSC_VER) && defined(_M_X64)
256 #define cpuid(func, a, b, c, d) \
257 { \
258 int CPUInfo[4]; \
259 __cpuid(CPUInfo, func); \
260 a = CPUInfo[0]; \
261 b = CPUInfo[1]; \
262 c = CPUInfo[2]; \
263 d = CPUInfo[3]; \
264 }
265 #else
266 #define cpuid(func, a, b, c, d) \
267 do { a = b = c = d = 0; (void) a; (void) b; (void) c; (void) d; } while (0)
268 #endif
269
270 static int CPU_CPUIDFeatures[4];
271 static int CPU_CPUIDMaxFunction = 0;
272 static SDL_bool CPU_OSSavesYMM = SDL_FALSE;
273 static SDL_bool CPU_OSSavesZMM = SDL_FALSE;
274
275 static void
CPU_calcCPUIDFeatures(void)276 CPU_calcCPUIDFeatures(void)
277 {
278 static SDL_bool checked = SDL_FALSE;
279 if (!checked) {
280 checked = SDL_TRUE;
281 if (CPU_haveCPUID()) {
282 int a, b, c, d;
283 cpuid(0, a, b, c, d);
284 CPU_CPUIDMaxFunction = a;
285 if (CPU_CPUIDMaxFunction >= 1) {
286 cpuid(1, a, b, c, d);
287 CPU_CPUIDFeatures[0] = a;
288 CPU_CPUIDFeatures[1] = b;
289 CPU_CPUIDFeatures[2] = c;
290 CPU_CPUIDFeatures[3] = d;
291
292 /* Check to make sure we can call xgetbv */
293 if (c & 0x08000000) {
294 /* Call xgetbv to see if YMM (etc) register state is saved */
295 #if (defined(__GNUC__) || defined(__llvm__)) && (defined(__i386__) || defined(__x86_64__))
296 __asm__(".byte 0x0f, 0x01, 0xd0" : "=a" (a) : "c" (0) : "%edx");
297 #elif defined(_MSC_VER) && (defined(_M_IX86) || defined(_M_X64)) && (_MSC_FULL_VER >= 160040219) /* VS2010 SP1 */
298 a = (int)_xgetbv(0);
299 #elif (defined(_MSC_VER) && defined(_M_IX86)) || defined(__WATCOMC__)
300 __asm
301 {
302 xor ecx, ecx
303 _asm _emit 0x0f _asm _emit 0x01 _asm _emit 0xd0
304 mov a, eax
305 }
306 #endif
307 CPU_OSSavesYMM = ((a & 6) == 6) ? SDL_TRUE : SDL_FALSE;
308 CPU_OSSavesZMM = (CPU_OSSavesYMM && ((a & 0xe0) == 0xe0)) ? SDL_TRUE : SDL_FALSE;
309 }
310 }
311 }
312 }
313 }
314
315 static int
CPU_haveAltiVec(void)316 CPU_haveAltiVec(void)
317 {
318 volatile int altivec = 0;
319 #ifndef SDL_CPUINFO_DISABLED
320 #if (defined(__MACOSX__) && (defined(__ppc__) || defined(__ppc64__))) || (defined(__OpenBSD__) && defined(__powerpc__))
321 #ifdef __OpenBSD__
322 int selectors[2] = { CTL_MACHDEP, CPU_ALTIVEC };
323 #else
324 int selectors[2] = { CTL_HW, HW_VECTORUNIT };
325 #endif
326 int hasVectorUnit = 0;
327 size_t length = sizeof(hasVectorUnit);
328 int error = sysctl(selectors, 2, &hasVectorUnit, &length, NULL, 0);
329 if (0 == error)
330 altivec = (hasVectorUnit != 0);
331 #elif defined(__FreeBSD__) && defined(__powerpc__)
332 unsigned long cpufeatures = 0;
333 elf_aux_info(AT_HWCAP, &cpufeatures, sizeof(cpufeatures));
334 altivec = cpufeatures & PPC_FEATURE_HAS_ALTIVEC;
335 return altivec;
336 #elif SDL_ALTIVEC_BLITTERS && HAVE_SETJMP
337 void (*handler) (int sig);
338 handler = signal(SIGILL, illegal_instruction);
339 if (setjmp(jmpbuf) == 0) {
340 asm volatile ("mtspr 256, %0\n\t" "vand %%v0, %%v0, %%v0"::"r" (-1));
341 altivec = 1;
342 }
343 signal(SIGILL, handler);
344 #endif
345 #endif
346 return altivec;
347 }
348
349 #if (defined(__ARM_ARCH) && (__ARM_ARCH >= 6)) || defined(__aarch64__)
350 static int
CPU_haveARMSIMD(void)351 CPU_haveARMSIMD(void)
352 {
353 return 1;
354 }
355
356 #elif !defined(__arm__)
357 static int
CPU_haveARMSIMD(void)358 CPU_haveARMSIMD(void)
359 {
360 return 0;
361 }
362
363 #elif defined(__LINUX__)
364 static int
CPU_haveARMSIMD(void)365 CPU_haveARMSIMD(void)
366 {
367 int arm_simd = 0;
368 int fd;
369
370 fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
371 if (fd >= 0)
372 {
373 Elf32_auxv_t aux;
374 while (read(fd, &aux, sizeof aux) == sizeof aux)
375 {
376 if (aux.a_type == AT_PLATFORM)
377 {
378 const char *plat = (const char *) aux.a_un.a_val;
379 if (plat) {
380 arm_simd = SDL_strncmp(plat, "v6l", 3) == 0 ||
381 SDL_strncmp(plat, "v7l", 3) == 0;
382 }
383 }
384 }
385 close(fd);
386 }
387 return arm_simd;
388 }
389
390 #elif defined(__RISCOS__)
391 static int
CPU_haveARMSIMD(void)392 CPU_haveARMSIMD(void)
393 {
394 _kernel_swi_regs regs;
395 regs.r[0] = 0;
396 if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL)
397 return 0;
398
399 if (!(regs.r[0] & (1<<31)))
400 return 0;
401
402 regs.r[0] = 34;
403 regs.r[1] = 29;
404 if (_kernel_swi(OS_PlatformFeatures, ®s, ®s) != NULL)
405 return 0;
406
407 return regs.r[0];
408 }
409
410 #else
411 static int
CPU_haveARMSIMD(void)412 CPU_haveARMSIMD(void)
413 {
414 #warning SDL_HasARMSIMD is not implemented for this ARM platform. Write me.
415 return 0;
416 }
417 #endif
418
419 #if defined(__LINUX__) && defined(__arm__) && !defined(HAVE_GETAUXVAL)
420 static int
readProcAuxvForNeon(void)421 readProcAuxvForNeon(void)
422 {
423 int neon = 0;
424 int fd;
425
426 fd = open("/proc/self/auxv", O_RDONLY | O_CLOEXEC);
427 if (fd >= 0)
428 {
429 Elf32_auxv_t aux;
430 while (read(fd, &aux, sizeof (aux)) == sizeof (aux)) {
431 if (aux.a_type == AT_HWCAP) {
432 neon = (aux.a_un.a_val & HWCAP_NEON) == HWCAP_NEON;
433 break;
434 }
435 }
436 close(fd);
437 }
438 return neon;
439 }
440 #endif
441
442 static int
CPU_haveNEON(void)443 CPU_haveNEON(void)
444 {
445 /* The way you detect NEON is a privileged instruction on ARM, so you have
446 query the OS kernel in a platform-specific way. :/ */
447 #if defined(SDL_CPUINFO_DISABLED)
448 return 0; /* disabled */
449 #elif (defined(__WINDOWS__) || defined(__WINRT__)) && (defined(_M_ARM) || defined(_M_ARM64))
450 /* Visual Studio, for ARM, doesn't define __ARM_ARCH. Handle this first. */
451 /* Seems to have been removed */
452 # if !defined(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE)
453 # define PF_ARM_NEON_INSTRUCTIONS_AVAILABLE 19
454 # endif
455 /* All WinRT ARM devices are required to support NEON, but just in case. */
456 return IsProcessorFeaturePresent(PF_ARM_NEON_INSTRUCTIONS_AVAILABLE) != 0;
457 #elif (defined(__ARM_ARCH) && (__ARM_ARCH >= 8)) || defined(__aarch64__)
458 return 1; /* ARMv8 always has non-optional NEON support. */
459 #elif __VITA__
460 return 1;
461 #elif defined(__APPLE__) && defined(__ARM_ARCH) && (__ARM_ARCH >= 7)
462 /* (note that sysctlbyname("hw.optional.neon") doesn't work!) */
463 return 1; /* all Apple ARMv7 chips and later have NEON. */
464 #elif defined(__APPLE__)
465 return 0; /* assume anything else from Apple doesn't have NEON. */
466 #elif !defined(__arm__)
467 return 0; /* not an ARM CPU at all. */
468 #elif defined(__OpenBSD__)
469 return 1; /* OpenBSD only supports ARMv7 CPUs that have NEON. */
470 #elif defined(HAVE_ELF_AUX_INFO)
471 unsigned long hasneon = 0;
472 if (elf_aux_info(AT_HWCAP, (void *)&hasneon, (int)sizeof(hasneon)) != 0)
473 return 0;
474 return ((hasneon & HWCAP_NEON) == HWCAP_NEON);
475 #elif defined(__QNXNTO__)
476 return SYSPAGE_ENTRY(cpuinfo)->flags & ARM_CPU_FLAG_NEON;
477 #elif (defined(__LINUX__) || defined(__ANDROID__)) && defined(HAVE_GETAUXVAL)
478 return ((getauxval(AT_HWCAP) & HWCAP_NEON) == HWCAP_NEON);
479 #elif defined(__LINUX__)
480 return readProcAuxvForNeon();
481 #elif defined(__ANDROID__)
482 /* Use NDK cpufeatures to read either /proc/self/auxv or /proc/cpuinfo */
483 {
484 AndroidCpuFamily cpu_family = android_getCpuFamily();
485 if (cpu_family == ANDROID_CPU_FAMILY_ARM) {
486 uint64_t cpu_features = android_getCpuFeatures();
487 if ((cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0) {
488 return 1;
489 }
490 }
491 return 0;
492 }
493 #elif defined(__RISCOS__)
494 /* Use the VFPSupport_Features SWI to access the MVFR registers */
495 {
496 _kernel_swi_regs regs;
497 regs.r[0] = 0;
498 if (_kernel_swi(VFPSupport_Features, ®s, ®s) == NULL) {
499 if ((regs.r[2] & 0xFFF000) == 0x111000) {
500 return 1;
501 }
502 }
503 return 0;
504 }
505 #else
506 #warning SDL_HasNEON is not implemented for this ARM platform. Write me.
507 return 0;
508 #endif
509 }
510
511 #if defined(__e2k__)
512 inline int
CPU_have3DNow(void)513 CPU_have3DNow(void)
514 {
515 #if defined(__3dNOW__)
516 return 1;
517 #else
518 return 0;
519 #endif
520 }
521 #else
522 static int
CPU_have3DNow(void)523 CPU_have3DNow(void)
524 {
525 if (CPU_CPUIDMaxFunction > 0) { /* that is, do we have CPUID at all? */
526 int a, b, c, d;
527 cpuid(0x80000000, a, b, c, d);
528 if (a >= 0x80000001) {
529 cpuid(0x80000001, a, b, c, d);
530 return (d & 0x80000000);
531 }
532 }
533 return 0;
534 }
535 #endif
536
537 #if defined(__e2k__)
538 #define CPU_haveRDTSC() (0)
539 #if defined(__MMX__)
540 #define CPU_haveMMX() (1)
541 #else
542 #define CPU_haveMMX() (0)
543 #endif
544 #if defined(__SSE__)
545 #define CPU_haveSSE() (1)
546 #else
547 #define CPU_haveSSE() (0)
548 #endif
549 #if defined(__SSE2__)
550 #define CPU_haveSSE2() (1)
551 #else
552 #define CPU_haveSSE2() (0)
553 #endif
554 #if defined(__SSE3__)
555 #define CPU_haveSSE3() (1)
556 #else
557 #define CPU_haveSSE3() (0)
558 #endif
559 #if defined(__SSE4_1__)
560 #define CPU_haveSSE41() (1)
561 #else
562 #define CPU_haveSSE41() (0)
563 #endif
564 #if defined(__SSE4_2__)
565 #define CPU_haveSSE42() (1)
566 #else
567 #define CPU_haveSSE42() (0)
568 #endif
569 #if defined(__AVX__)
570 #define CPU_haveAVX() (1)
571 #else
572 #define CPU_haveAVX() (0)
573 #endif
574 #else
575 #define CPU_haveRDTSC() (CPU_CPUIDFeatures[3] & 0x00000010)
576 #define CPU_haveMMX() (CPU_CPUIDFeatures[3] & 0x00800000)
577 #define CPU_haveSSE() (CPU_CPUIDFeatures[3] & 0x02000000)
578 #define CPU_haveSSE2() (CPU_CPUIDFeatures[3] & 0x04000000)
579 #define CPU_haveSSE3() (CPU_CPUIDFeatures[2] & 0x00000001)
580 #define CPU_haveSSE41() (CPU_CPUIDFeatures[2] & 0x00080000)
581 #define CPU_haveSSE42() (CPU_CPUIDFeatures[2] & 0x00100000)
582 #define CPU_haveAVX() (CPU_OSSavesYMM && (CPU_CPUIDFeatures[2] & 0x10000000))
583 #endif
584
585 #if defined(__e2k__)
586 inline int
CPU_haveAVX2(void)587 CPU_haveAVX2(void)
588 {
589 #if defined(__AVX2__)
590 return 1;
591 #else
592 return 0;
593 #endif
594 }
595 #else
596 static int
CPU_haveAVX2(void)597 CPU_haveAVX2(void)
598 {
599 if (CPU_OSSavesYMM && (CPU_CPUIDMaxFunction >= 7)) {
600 int a, b, c, d;
601 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
602 cpuid(7, a, b, c, d);
603 return (b & 0x00000020);
604 }
605 return 0;
606 }
607 #endif
608
609 #if defined(__e2k__)
610 inline int
CPU_haveAVX512F(void)611 CPU_haveAVX512F(void)
612 {
613 return 0;
614 }
615 #else
616 static int
CPU_haveAVX512F(void)617 CPU_haveAVX512F(void)
618 {
619 if (CPU_OSSavesZMM && (CPU_CPUIDMaxFunction >= 7)) {
620 int a, b, c, d;
621 (void) a; (void) b; (void) c; (void) d; /* compiler warnings... */
622 cpuid(7, a, b, c, d);
623 return (b & 0x00010000);
624 }
625 return 0;
626 }
627 #endif
628
629 static int SDL_CPUCount = 0;
630
631 int
SDL_GetCPUCount(void)632 SDL_GetCPUCount(void)
633 {
634 if (!SDL_CPUCount) {
635 #ifndef SDL_CPUINFO_DISABLED
636 #if defined(HAVE_SYSCONF) && defined(_SC_NPROCESSORS_ONLN)
637 if (SDL_CPUCount <= 0) {
638 SDL_CPUCount = (int)sysconf(_SC_NPROCESSORS_ONLN);
639 }
640 #endif
641 #ifdef HAVE_SYSCTLBYNAME
642 if (SDL_CPUCount <= 0) {
643 size_t size = sizeof(SDL_CPUCount);
644 sysctlbyname("hw.ncpu", &SDL_CPUCount, &size, NULL, 0);
645 }
646 #endif
647 #ifdef __WIN32__
648 if (SDL_CPUCount <= 0) {
649 SYSTEM_INFO info;
650 GetSystemInfo(&info);
651 SDL_CPUCount = info.dwNumberOfProcessors;
652 }
653 #endif
654 #ifdef __OS2__
655 if (SDL_CPUCount <= 0) {
656 DosQuerySysInfo(QSV_NUMPROCESSORS, QSV_NUMPROCESSORS,
657 &SDL_CPUCount, sizeof(SDL_CPUCount) );
658 }
659 #endif
660 #endif
661 /* There has to be at least 1, right? :) */
662 if (SDL_CPUCount <= 0) {
663 SDL_CPUCount = 1;
664 }
665 }
666 return SDL_CPUCount;
667 }
668
669 #if defined(__e2k__)
670 inline const char *
SDL_GetCPUType(void)671 SDL_GetCPUType(void)
672 {
673 static char SDL_CPUType[13];
674
675 SDL_strlcpy(SDL_CPUType, "E2K MACHINE", sizeof(SDL_CPUType));
676
677 return SDL_CPUType;
678 }
679 #else
680 /* Oh, such a sweet sweet trick, just not very useful. :) */
681 static const char *
SDL_GetCPUType(void)682 SDL_GetCPUType(void)
683 {
684 static char SDL_CPUType[13];
685
686 if (!SDL_CPUType[0]) {
687 int i = 0;
688
689 CPU_calcCPUIDFeatures();
690 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
691 int a, b, c, d;
692 cpuid(0x00000000, a, b, c, d);
693 (void) a;
694 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
695 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
696 SDL_CPUType[i++] = (char)(b & 0xff); b >>= 8;
697 SDL_CPUType[i++] = (char)(b & 0xff);
698
699 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
700 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
701 SDL_CPUType[i++] = (char)(d & 0xff); d >>= 8;
702 SDL_CPUType[i++] = (char)(d & 0xff);
703
704 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
705 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
706 SDL_CPUType[i++] = (char)(c & 0xff); c >>= 8;
707 SDL_CPUType[i++] = (char)(c & 0xff);
708 }
709 if (!SDL_CPUType[0]) {
710 SDL_strlcpy(SDL_CPUType, "Unknown", sizeof(SDL_CPUType));
711 }
712 }
713 return SDL_CPUType;
714 }
715 #endif
716
717
718 #ifdef TEST_MAIN /* !!! FIXME: only used for test at the moment. */
719 #if defined(__e2k__)
720 inline const char *
SDL_GetCPUName(void)721 SDL_GetCPUName(void)
722 {
723 static char SDL_CPUName[48];
724
725 SDL_strlcpy(SDL_CPUName, __builtin_cpu_name(), sizeof(SDL_CPUName));
726
727 return SDL_CPUName;
728 }
729 #else
730 static const char *
SDL_GetCPUName(void)731 SDL_GetCPUName(void)
732 {
733 static char SDL_CPUName[48];
734
735 if (!SDL_CPUName[0]) {
736 int i = 0;
737 int a, b, c, d;
738
739 CPU_calcCPUIDFeatures();
740 if (CPU_CPUIDMaxFunction > 0) { /* do we have CPUID at all? */
741 cpuid(0x80000000, a, b, c, d);
742 if (a >= 0x80000004) {
743 cpuid(0x80000002, a, b, c, d);
744 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
745 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
746 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
747 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
748 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
749 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
750 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
751 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
752 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
753 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
754 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
755 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
756 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
757 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
758 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
759 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
760 cpuid(0x80000003, a, b, c, d);
761 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
762 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
763 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
764 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
765 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
766 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
767 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
768 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
769 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
770 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
771 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
772 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
773 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
774 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
775 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
776 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
777 cpuid(0x80000004, a, b, c, d);
778 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
779 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
780 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
781 SDL_CPUName[i++] = (char)(a & 0xff); a >>= 8;
782 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
783 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
784 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
785 SDL_CPUName[i++] = (char)(b & 0xff); b >>= 8;
786 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
787 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
788 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
789 SDL_CPUName[i++] = (char)(c & 0xff); c >>= 8;
790 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
791 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
792 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
793 SDL_CPUName[i++] = (char)(d & 0xff); d >>= 8;
794 }
795 }
796 if (!SDL_CPUName[0]) {
797 SDL_strlcpy(SDL_CPUName, "Unknown", sizeof(SDL_CPUName));
798 }
799 }
800 return SDL_CPUName;
801 }
802 #endif
803 #endif
804
805 int
SDL_GetCPUCacheLineSize(void)806 SDL_GetCPUCacheLineSize(void)
807 {
808 const char *cpuType = SDL_GetCPUType();
809 int a, b, c, d;
810 (void) a; (void) b; (void) c; (void) d;
811 if (SDL_strcmp(cpuType, "GenuineIntel") == 0 || SDL_strcmp(cpuType, "CentaurHauls") == 0 || SDL_strcmp(cpuType, " Shanghai ") == 0) {
812 cpuid(0x00000001, a, b, c, d);
813 return (((b >> 8) & 0xff) * 8);
814 } else if (SDL_strcmp(cpuType, "AuthenticAMD") == 0 || SDL_strcmp(cpuType, "HygonGenuine") == 0) {
815 cpuid(0x80000005, a, b, c, d);
816 return (c & 0xff);
817 } else {
818 /* Just make a guess here... */
819 return SDL_CACHELINE_SIZE;
820 }
821 }
822
823 static Uint32 SDL_CPUFeatures = 0xFFFFFFFF;
824 static Uint32 SDL_SIMDAlignment = 0xFFFFFFFF;
825
826 static Uint32
SDL_GetCPUFeatures(void)827 SDL_GetCPUFeatures(void)
828 {
829 if (SDL_CPUFeatures == 0xFFFFFFFF) {
830 CPU_calcCPUIDFeatures();
831 SDL_CPUFeatures = 0;
832 SDL_SIMDAlignment = sizeof(void *); /* a good safe base value */
833 if (CPU_haveRDTSC()) {
834 SDL_CPUFeatures |= CPU_HAS_RDTSC;
835 }
836 if (CPU_haveAltiVec()) {
837 SDL_CPUFeatures |= CPU_HAS_ALTIVEC;
838 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
839 }
840 if (CPU_haveMMX()) {
841 SDL_CPUFeatures |= CPU_HAS_MMX;
842 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
843 }
844 if (CPU_have3DNow()) {
845 SDL_CPUFeatures |= CPU_HAS_3DNOW;
846 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 8);
847 }
848 if (CPU_haveSSE()) {
849 SDL_CPUFeatures |= CPU_HAS_SSE;
850 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
851 }
852 if (CPU_haveSSE2()) {
853 SDL_CPUFeatures |= CPU_HAS_SSE2;
854 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
855 }
856 if (CPU_haveSSE3()) {
857 SDL_CPUFeatures |= CPU_HAS_SSE3;
858 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
859 }
860 if (CPU_haveSSE41()) {
861 SDL_CPUFeatures |= CPU_HAS_SSE41;
862 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
863 }
864 if (CPU_haveSSE42()) {
865 SDL_CPUFeatures |= CPU_HAS_SSE42;
866 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
867 }
868 if (CPU_haveAVX()) {
869 SDL_CPUFeatures |= CPU_HAS_AVX;
870 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
871 }
872 if (CPU_haveAVX2()) {
873 SDL_CPUFeatures |= CPU_HAS_AVX2;
874 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 32);
875 }
876 if (CPU_haveAVX512F()) {
877 SDL_CPUFeatures |= CPU_HAS_AVX512F;
878 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 64);
879 }
880 if (CPU_haveARMSIMD()) {
881 SDL_CPUFeatures |= CPU_HAS_ARM_SIMD;
882 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
883 }
884 if (CPU_haveNEON()) {
885 SDL_CPUFeatures |= CPU_HAS_NEON;
886 SDL_SIMDAlignment = SDL_max(SDL_SIMDAlignment, 16);
887 }
888 }
889 return SDL_CPUFeatures;
890 }
891
892 #define CPU_FEATURE_AVAILABLE(f) ((SDL_GetCPUFeatures() & f) ? SDL_TRUE : SDL_FALSE)
893
SDL_HasRDTSC(void)894 SDL_bool SDL_HasRDTSC(void)
895 {
896 return CPU_FEATURE_AVAILABLE(CPU_HAS_RDTSC);
897 }
898
899 SDL_bool
SDL_HasAltiVec(void)900 SDL_HasAltiVec(void)
901 {
902 return CPU_FEATURE_AVAILABLE(CPU_HAS_ALTIVEC);
903 }
904
905 SDL_bool
SDL_HasMMX(void)906 SDL_HasMMX(void)
907 {
908 return CPU_FEATURE_AVAILABLE(CPU_HAS_MMX);
909 }
910
911 SDL_bool
SDL_Has3DNow(void)912 SDL_Has3DNow(void)
913 {
914 return CPU_FEATURE_AVAILABLE(CPU_HAS_3DNOW);
915 }
916
917 SDL_bool
SDL_HasSSE(void)918 SDL_HasSSE(void)
919 {
920 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE);
921 }
922
923 SDL_bool
SDL_HasSSE2(void)924 SDL_HasSSE2(void)
925 {
926 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE2);
927 }
928
929 SDL_bool
SDL_HasSSE3(void)930 SDL_HasSSE3(void)
931 {
932 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE3);
933 }
934
935 SDL_bool
SDL_HasSSE41(void)936 SDL_HasSSE41(void)
937 {
938 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE41);
939 }
940
941 SDL_bool
SDL_HasSSE42(void)942 SDL_HasSSE42(void)
943 {
944 return CPU_FEATURE_AVAILABLE(CPU_HAS_SSE42);
945 }
946
947 SDL_bool
SDL_HasAVX(void)948 SDL_HasAVX(void)
949 {
950 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX);
951 }
952
953 SDL_bool
SDL_HasAVX2(void)954 SDL_HasAVX2(void)
955 {
956 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX2);
957 }
958
959 SDL_bool
SDL_HasAVX512F(void)960 SDL_HasAVX512F(void)
961 {
962 return CPU_FEATURE_AVAILABLE(CPU_HAS_AVX512F);
963 }
964
965 SDL_bool
SDL_HasARMSIMD(void)966 SDL_HasARMSIMD(void)
967 {
968 return CPU_FEATURE_AVAILABLE(CPU_HAS_ARM_SIMD);
969 }
970
971 SDL_bool
SDL_HasNEON(void)972 SDL_HasNEON(void)
973 {
974 return CPU_FEATURE_AVAILABLE(CPU_HAS_NEON);
975 }
976
977 static int SDL_SystemRAM = 0;
978
979 int
SDL_GetSystemRAM(void)980 SDL_GetSystemRAM(void)
981 {
982 if (!SDL_SystemRAM) {
983 #ifndef SDL_CPUINFO_DISABLED
984 #if defined(HAVE_SYSCONF) && defined(_SC_PHYS_PAGES) && defined(_SC_PAGESIZE)
985 if (SDL_SystemRAM <= 0) {
986 SDL_SystemRAM = (int)((Sint64)sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
987 }
988 #endif
989 #ifdef HAVE_SYSCTLBYNAME
990 if (SDL_SystemRAM <= 0) {
991 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__DragonFly__)
992 #ifdef HW_REALMEM
993 int mib[2] = {CTL_HW, HW_REALMEM};
994 #else
995 /* might only report up to 2 GiB */
996 int mib[2] = {CTL_HW, HW_PHYSMEM};
997 #endif /* HW_REALMEM */
998 #else
999 int mib[2] = {CTL_HW, HW_MEMSIZE};
1000 #endif /* __FreeBSD__ || __FreeBSD_kernel__ */
1001 Uint64 memsize = 0;
1002 size_t len = sizeof(memsize);
1003
1004 if (sysctl(mib, 2, &memsize, &len, NULL, 0) == 0) {
1005 SDL_SystemRAM = (int)(memsize / (1024*1024));
1006 }
1007 }
1008 #endif
1009 #ifdef __WIN32__
1010 if (SDL_SystemRAM <= 0) {
1011 MEMORYSTATUSEX stat;
1012 stat.dwLength = sizeof(stat);
1013 if (GlobalMemoryStatusEx(&stat)) {
1014 SDL_SystemRAM = (int)(stat.ullTotalPhys / (1024 * 1024));
1015 }
1016 }
1017 #endif
1018 #ifdef __OS2__
1019 if (SDL_SystemRAM <= 0) {
1020 Uint32 sysram = 0;
1021 DosQuerySysInfo(QSV_TOTPHYSMEM, QSV_TOTPHYSMEM, &sysram, 4);
1022 SDL_SystemRAM = (int) (sysram / 0x100000U);
1023 }
1024 #endif
1025 #ifdef __RISCOS__
1026 if (SDL_SystemRAM <= 0) {
1027 _kernel_swi_regs regs;
1028 regs.r[0] = 0x108;
1029 if (_kernel_swi(OS_Memory, ®s, ®s) == NULL) {
1030 SDL_SystemRAM = (int)(regs.r[1] * regs.r[2] / (1024 * 1024));
1031 }
1032 }
1033 #endif
1034 #ifdef __VITA__
1035 if (SDL_SystemRAM <= 0) {
1036 /* Vita has 512MiB on SoC, that's split into 256MiB(+109MiB in extended memory mode) for app
1037 +26MiB of physically continuous memory, +112MiB of CDRAM(VRAM) + system reserved memory. */
1038 SDL_SystemRAM = 536870912;
1039 }
1040 #endif
1041 #endif
1042 }
1043 return SDL_SystemRAM;
1044 }
1045
1046
1047 size_t
SDL_SIMDGetAlignment(void)1048 SDL_SIMDGetAlignment(void)
1049 {
1050 if (SDL_SIMDAlignment == 0xFFFFFFFF) {
1051 SDL_GetCPUFeatures(); /* make sure this has been calculated */
1052 }
1053 SDL_assert(SDL_SIMDAlignment != 0);
1054 return SDL_SIMDAlignment;
1055 }
1056
1057 void *
SDL_SIMDAlloc(const size_t len)1058 SDL_SIMDAlloc(const size_t len)
1059 {
1060 const size_t alignment = SDL_SIMDGetAlignment();
1061 const size_t padding = alignment - (len % alignment);
1062 const size_t padded = (padding != alignment) ? (len + padding) : len;
1063 Uint8 *retval = NULL;
1064 Uint8 *ptr = (Uint8 *) SDL_malloc(padded + alignment + sizeof (void *));
1065 if (ptr) {
1066 /* store the actual allocated pointer right before our aligned pointer. */
1067 retval = ptr + sizeof (void *);
1068 retval += alignment - (((size_t) retval) % alignment);
1069 *(((void **) retval) - 1) = ptr;
1070 }
1071 return retval;
1072 }
1073
1074 void *
SDL_SIMDRealloc(void * mem,const size_t len)1075 SDL_SIMDRealloc(void *mem, const size_t len)
1076 {
1077 const size_t alignment = SDL_SIMDGetAlignment();
1078 const size_t padding = alignment - (len % alignment);
1079 const size_t padded = (padding != alignment) ? (len + padding) : len;
1080 Uint8 *retval = (Uint8*) mem;
1081 void *oldmem = mem;
1082 size_t memdiff = 0, ptrdiff;
1083 Uint8 *ptr;
1084
1085 if (mem) {
1086 void **realptr = (void **) mem;
1087 realptr--;
1088 mem = *(((void **) mem) - 1);
1089
1090 /* Check the delta between the real pointer and user pointer */
1091 memdiff = ((size_t) oldmem) - ((size_t) mem);
1092 }
1093
1094 ptr = (Uint8 *) SDL_realloc(mem, padded + alignment + sizeof (void *));
1095
1096 if (ptr == NULL) {
1097 return NULL; /* Out of memory, bail! */
1098 }
1099
1100 /* Store the actual allocated pointer right before our aligned pointer. */
1101 retval = ptr + sizeof (void *);
1102 retval += alignment - (((size_t) retval) % alignment);
1103
1104 /* Make sure the delta is the same! */
1105 if (mem) {
1106 ptrdiff = ((size_t) retval) - ((size_t) ptr);
1107 if (memdiff != ptrdiff) { /* Delta has changed, copy to new offset! */
1108 oldmem = (void*) (((uintptr_t) ptr) + memdiff);
1109
1110 /* Even though the data past the old `len` is undefined, this is the
1111 * only length value we have, and it guarantees that we copy all the
1112 * previous memory anyhow.
1113 */
1114 SDL_memmove(retval, oldmem, len);
1115 }
1116 }
1117
1118 /* Actually store the allocated pointer, finally. */
1119 *(((void **) retval) - 1) = ptr;
1120 return retval;
1121 }
1122
1123 void
SDL_SIMDFree(void * ptr)1124 SDL_SIMDFree(void *ptr)
1125 {
1126 if (ptr) {
1127 void **realptr = (void **) ptr;
1128 realptr--;
1129 SDL_free(*(((void **) ptr) - 1));
1130 }
1131 }
1132
1133
1134 #ifdef TEST_MAIN
1135
1136 #include <stdio.h>
1137
1138 int
main()1139 main()
1140 {
1141 printf("CPU count: %d\n", SDL_GetCPUCount());
1142 printf("CPU type: %s\n", SDL_GetCPUType());
1143 printf("CPU name: %s\n", SDL_GetCPUName());
1144 printf("CacheLine size: %d\n", SDL_GetCPUCacheLineSize());
1145 printf("RDTSC: %d\n", SDL_HasRDTSC());
1146 printf("Altivec: %d\n", SDL_HasAltiVec());
1147 printf("MMX: %d\n", SDL_HasMMX());
1148 printf("3DNow: %d\n", SDL_Has3DNow());
1149 printf("SSE: %d\n", SDL_HasSSE());
1150 printf("SSE2: %d\n", SDL_HasSSE2());
1151 printf("SSE3: %d\n", SDL_HasSSE3());
1152 printf("SSE4.1: %d\n", SDL_HasSSE41());
1153 printf("SSE4.2: %d\n", SDL_HasSSE42());
1154 printf("AVX: %d\n", SDL_HasAVX());
1155 printf("AVX2: %d\n", SDL_HasAVX2());
1156 printf("AVX-512F: %d\n", SDL_HasAVX512F());
1157 printf("ARM SIMD: %d\n", SDL_HasARMSIMD());
1158 printf("NEON: %d\n", SDL_HasNEON());
1159 printf("RAM: %d MB\n", SDL_GetSystemRAM());
1160 return 0;
1161 }
1162
1163 #endif /* TEST_MAIN */
1164
1165 /* vi: set ts=4 sw=4 expandtab: */
1166