1 /* Copyright  (C) 2010-2016 The RetroArch team
2  *
3  * ---------------------------------------------------------------------------------------
4  * The following license statement only applies to this file (features_cpu.c).
5  * ---------------------------------------------------------------------------------------
6  *
7  * Permission is hereby granted, free of charge,
8  * to any person obtaining a copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation the rights to
10  * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software,
11  * and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED,
16  * INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
18  * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
19  * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21  */
22 
23 #include <stdio.h>
24 #include <stdlib.h>
25 
26 #if defined(_WIN32)
27 #include <direct.h>
28 #else
29 #include <unistd.h>
30 #endif
31 
32 #include <compat/strl.h>
33 #include <streams/file_stream.h>
34 #include <libretro.h>
35 #include <features/features_cpu.h>
36 
37 #if defined(_WIN32) && !defined(_XBOX)
38 #include <windows.h>
39 #endif
40 
41 #if defined(_XBOX360)
42 #include <PPCIntrinsics.h>
43 #elif defined(_POSIX_MONOTONIC_CLOCK) || defined(ANDROID) || defined(__QNX__)
44 /* POSIX_MONOTONIC_CLOCK is not being defined in Android headers despite support being present. */
45 #include <time.h>
46 #endif
47 
48 #if defined(__QNX__) && !defined(CLOCK_MONOTONIC)
49 #define CLOCK_MONOTONIC 2
50 #endif
51 
52 #if defined(PSP)
53 #include <sys/time.h>
54 #include <psprtc.h>
55 #endif
56 
57 #if defined(VITA)
58 #include <psp2/kernel/processmgr.h>
59 #include <psp2/rtc.h>
60 #endif
61 
62 #if defined(__PSL1GHT__)
63 #include <sys/time.h>
64 #endif
65 
66 #ifdef GEKKO
67 #include <ogc/lwp_watchdog.h>
68 #endif
69 
70 #if defined(_3DS)
71 #include <3ds/svc.h>
72 #include <3ds/os.h>
73 #endif
74 
75 /* iOS/OSX specific. Lacks clock_gettime(), so implement it. */
76 #ifdef __MACH__
77 #include <sys/time.h>
78 
79 #ifndef CLOCK_MONOTONIC
80 #define CLOCK_MONOTONIC 0
81 #endif
82 
83 #ifndef CLOCK_REALTIME
84 #define CLOCK_REALTIME 0
85 #endif
86 
87 /* this function is part of iOS 10 now */
ra_clock_gettime(int clk_ik,struct timespec * t)88 static int ra_clock_gettime(int clk_ik, struct timespec *t)
89 {
90    struct timeval now;
91    int rv = gettimeofday(&now, NULL);
92    if (rv)
93       return rv;
94    t->tv_sec  = now.tv_sec;
95    t->tv_nsec = now.tv_usec * 1000;
96    return 0;
97 }
98 #endif
99 
100 #if defined(__MACH__) && __IPHONE_OS_VERSION_MAX_ALLOWED < 100000
101 #else
102 #define ra_clock_gettime clock_gettime
103 #endif
104 
105 
106 #ifdef EMSCRIPTEN
107 #include <emscripten.h>
108 #endif
109 
110 #if defined(BSD) || defined(__APPLE__)
111 #include <sys/sysctl.h>
112 #endif
113 
114 #include <string.h>
115 
116 /**
117  * cpu_features_get_perf_counter:
118  *
119  * Gets performance counter.
120  *
121  * Returns: performance counter.
122  **/
cpu_features_get_perf_counter(void)123 retro_perf_tick_t cpu_features_get_perf_counter(void)
124 {
125    retro_perf_tick_t time_ticks = 0;
126 #if defined(_WIN32)
127    long tv_sec, tv_usec;
128    static const unsigned __int64 epoch = 11644473600000000ULL;
129    FILETIME file_time;
130    SYSTEMTIME system_time;
131    ULARGE_INTEGER ularge;
132 
133    GetSystemTime(&system_time);
134    SystemTimeToFileTime(&system_time, &file_time);
135    ularge.LowPart  = file_time.dwLowDateTime;
136    ularge.HighPart = file_time.dwHighDateTime;
137 
138    tv_sec     = (long)((ularge.QuadPart - epoch) / 10000000L);
139    tv_usec    = (long)(system_time.wMilliseconds * 1000);
140    time_ticks = (1000000 * tv_sec + tv_usec);
141 #elif defined(_POSIX_MONOTONIC_CLOCK) || defined(__QNX__) || defined(ANDROID) || defined(__MACH__) || defined(__PSL1GHT__)
142    struct timespec tv = {0};
143    if (ra_clock_gettime(CLOCK_MONOTONIC, &tv) == 0)
144       time_ticks = (retro_perf_tick_t)tv.tv_sec * 1000000000 +
145          (retro_perf_tick_t)tv.tv_nsec;
146 
147 #elif defined(__GNUC__) && defined(__i386__) || defined(__i486__) || defined(__i686__)
148    __asm__ volatile ("rdtsc" : "=A" (time_ticks));
149 #elif defined(__GNUC__) && defined(__x86_64__)
150    unsigned a, d;
151    __asm__ volatile ("rdtsc" : "=a" (a), "=d" (d));
152    time_ticks = (retro_perf_tick_t)a | ((retro_perf_tick_t)d << 32);
153 #elif defined(__ARM_ARCH_6__)
154    __asm__ volatile( "mrc p15, 0, %0, c9, c13, 0" : "=r"(time_ticks) );
155 #elif defined(_XBOX360) || defined(__powerpc__) || defined(__ppc__) || defined(__POWERPC__)
156    time_ticks = __mftb();
157 #elif defined(GEKKO)
158    time_ticks = gettime();
159 #elif defined(PSP)
160    sceRtcGetCurrentTick((uint64_t*)&time_ticks);
161 #elif defined(VITA)
162    sceRtcGetCurrentTick((SceRtcTick*)&time_ticks);
163 #elif defined(_3DS)
164    time_ticks = svcGetSystemTick();
165 #elif defined(__mips__)
166    struct timeval tv;
167    gettimeofday(&tv,NULL);
168    time_ticks = (1000000 * tv.tv_sec + tv.tv_usec);
169 #endif
170 
171    return time_ticks;
172 }
173 
174 /**
175  * cpu_features_get_time_usec:
176  *
177  * Gets time in microseconds.
178  *
179  * Returns: time in microseconds.
180  **/
cpu_features_get_time_usec(void)181 retro_time_t cpu_features_get_time_usec(void)
182 {
183 #if defined(_WIN32)
184    static LARGE_INTEGER freq;
185    LARGE_INTEGER count;
186 
187    /* Frequency is guaranteed to not change. */
188    if (!freq.QuadPart && !QueryPerformanceFrequency(&freq))
189       return 0;
190 
191    if (!QueryPerformanceCounter(&count))
192       return 0;
193    return count.QuadPart * 1000000 / freq.QuadPart;
194 #elif defined(GEKKO)
195    return ticks_to_microsecs(gettime());
196 #elif defined(_POSIX_MONOTONIC_CLOCK) || defined(__QNX__) || defined(ANDROID) || defined(__MACH__)
197    struct timespec tv = {0};
198    if (ra_clock_gettime(CLOCK_MONOTONIC, &tv) < 0)
199       return 0;
200    return tv.tv_sec * INT64_C(1000000) + (tv.tv_nsec + 500) / 1000;
201 #elif defined(EMSCRIPTEN)
202    return emscripten_get_now() * 1000;
203 #elif defined(__mips__)
204    struct timeval tv;
205    gettimeofday(&tv,NULL);
206    return (1000000 * tv.tv_sec + tv.tv_usec);
207 #elif defined(_3DS)
208    return osGetTime() * 1000;
209 #elif defined(VITA)
210    return sceKernelGetProcessTimeWide();
211 #else
212 #error "Your platform does not have a timer function implemented in cpu_features_get_time_usec(). Cannot continue."
213 #endif
214 }
215 
216 #if defined(__x86_64__) || defined(__i386__) || defined(__i486__) || defined(__i686__)
217 #define CPU_X86
218 #endif
219 
220 #if defined(_MSC_VER) && !defined(_XBOX)
221 #if (_MSC_VER > 1310)
222 #include <intrin.h>
223 #endif
224 #endif
225 
226 #if defined(CPU_X86) && !defined(__MACH__)
x86_cpuid(int func,int flags[4])227 void x86_cpuid(int func, int flags[4])
228 {
229    /* On Android, we compile RetroArch with PIC, and we
230     * are not allowed to clobber the ebx register. */
231 #ifdef __x86_64__
232 #define REG_b "rbx"
233 #define REG_S "rsi"
234 #else
235 #define REG_b "ebx"
236 #define REG_S "esi"
237 #endif
238 
239 #if defined(__GNUC__)
240    __asm__ volatile (
241          "mov %%" REG_b ", %%" REG_S "\n"
242          "cpuid\n"
243          "xchg %%" REG_b ", %%" REG_S "\n"
244          : "=a"(flags[0]), "=S"(flags[1]), "=c"(flags[2]), "=d"(flags[3])
245          : "a"(func));
246 #elif defined(_MSC_VER)
247    __cpuid(flags, func);
248 #else
249    printf("Unknown compiler. Cannot check CPUID with inline assembly.\n");
250    memset(flags, 0, 4 * sizeof(int));
251 #endif
252 }
253 
254 /* Only runs on i686 and above. Needs to be conditionally run. */
xgetbv_x86(uint32_t idx)255 static uint64_t xgetbv_x86(uint32_t idx)
256 {
257 #if defined(__GNUC__)
258    uint32_t eax, edx;
259    __asm__ volatile (
260          /* Older GCC versions (Apple's GCC for example) do
261           * not understand xgetbv instruction.
262           * Stamp out the machine code directly.
263           */
264          ".byte 0x0f, 0x01, 0xd0\n"
265          : "=a"(eax), "=d"(edx) : "c"(idx));
266    return ((uint64_t)edx << 32) | eax;
267 #elif _MSC_FULL_VER >= 160040219
268    /* Intrinsic only works on 2010 SP1 and above. */
269    return _xgetbv(idx);
270 #else
271    printf("Unknown compiler. Cannot check xgetbv bits.\n");
272    return 0;
273 #endif
274 }
275 #endif
276 
277 #if defined(__ARM_NEON__)
arm_enable_runfast_mode(void)278 static void arm_enable_runfast_mode(void)
279 {
280    /* RunFast mode. Enables flush-to-zero and some
281     * floating point optimizations. */
282    static const unsigned x = 0x04086060;
283    static const unsigned y = 0x03000000;
284    int r;
285    __asm__ volatile(
286          "fmrx	%0, fpscr   \n\t" /* r0 = FPSCR */
287          "and	%0, %0, %1  \n\t" /* r0 = r0 & 0x04086060 */
288          "orr	%0, %0, %2  \n\t" /* r0 = r0 | 0x03000000 */
289          "fmxr	fpscr, %0   \n\t" /* FPSCR = r0 */
290          : "=r"(r)
291          : "r"(x), "r"(y)
292         );
293 }
294 #endif
295 
296 #if defined(__linux__) && !defined(CPU_X86)
check_arm_cpu_feature(const char * feature)297 static unsigned char check_arm_cpu_feature(const char* feature)
298 {
299    char line[1024];
300    unsigned char status = 0;
301    RFILE *fp = filestream_open("/proc/cpuinfo", RFILE_MODE_READ_TEXT, -1);
302 
303    if (!fp)
304       return 0;
305 
306    while (filestream_gets(fp, line, sizeof(line)) != NULL)
307    {
308       if (strncmp(line, "Features\t: ", 11))
309          continue;
310 
311       if (strstr(line + 11, feature) != NULL)
312          status = 1;
313 
314       break;
315    }
316 
317    filestream_close(fp);
318 
319    return status;
320 }
321 
322 #if !defined(_SC_NPROCESSORS_ONLN)
323 /* Parse an decimal integer starting from 'input', but not going further
324  * than 'limit'. Return the value into '*result'.
325  *
326  * NOTE: Does not skip over leading spaces, or deal with sign characters.
327  * NOTE: Ignores overflows.
328  *
329  * The function returns NULL in case of error (bad format), or the new
330  * position after the decimal number in case of success (which will always
331  * be <= 'limit').
332  */
parse_decimal(const char * input,const char * limit,int * result)333 static const char *parse_decimal(const char* input,
334       const char* limit, int* result)
335 {
336     const char* p = input;
337     int       val = 0;
338 
339     while (p < limit)
340     {
341         int d = (*p - '0');
342         if ((unsigned)d >= 10U)
343             break;
344         val = val*10 + d;
345         p++;
346     }
347     if (p == input)
348         return NULL;
349 
350     *result = val;
351     return p;
352 }
353 
354 /* Parse a textual list of cpus and store the result inside a CpuList object.
355  * Input format is the following:
356  * - comma-separated list of items (no spaces)
357  * - each item is either a single decimal number (cpu index), or a range made
358  *   of two numbers separated by a single dash (-). Ranges are inclusive.
359  *
360  * Examples:   0
361  *             2,4-127,128-143
362  *             0-1
363  */
cpulist_parse(CpuList * list,char ** buf,ssize_t length)364 static void cpulist_parse(CpuList* list, char **buf, ssize_t length)
365 {
366    const char* p   = (const char*)buf;
367    const char* end = p + length;
368 
369    /* NOTE: the input line coming from sysfs typically contains a
370     * trailing newline, so take care of it in the code below
371     */
372    while (p < end && *p != '\n')
373    {
374       int val, start_value, end_value;
375       /* Find the end of current item, and put it into 'q' */
376       const char *q = (const char*)memchr(p, ',', end-p);
377 
378       if (!q)
379          q = end;
380 
381       /* Get first value */
382       p = parse_decimal(p, q, &start_value);
383       if (p == NULL)
384          return;
385 
386       end_value = start_value;
387 
388       /* If we're not at the end of the item, expect a dash and
389        * and integer; extract end value.
390        */
391       if (p < q && *p == '-')
392       {
393          p = parse_decimal(p+1, q, &end_value);
394          if (p == NULL)
395             return;
396       }
397 
398       /* Set bits CPU list bits */
399       for (val = start_value; val <= end_value; val++)
400       {
401          if ((unsigned)val < 32)
402             list->mask |= (uint32_t)(1U << val);
403       }
404 
405       /* Jump to next item */
406       p = q;
407       if (p < end)
408          p++;
409    }
410 }
411 
412 /* Read a CPU list from one sysfs file */
cpulist_read_from(CpuList * list,const char * filename)413 static void cpulist_read_from(CpuList* list, const char* filename)
414 {
415    ssize_t length;
416    char *buf  = NULL;
417 
418    list->mask = 0;
419 
420    if (filestream_read_file(filename, (void**)&buf, &length) != 1)
421       return;
422 
423    cpulist_parse(list, &buf, length);
424    if (buf)
425       free(buf);
426    buf = NULL;
427 }
428 #endif
429 
430 #endif
431 
432 /**
433  * cpu_features_get_core_amount:
434  *
435  * Gets the amount of available CPU cores.
436  *
437  * Returns: amount of CPU cores available.
438  **/
cpu_features_get_core_amount(void)439 unsigned cpu_features_get_core_amount(void)
440 {
441 #if defined(_WIN32) && !defined(_XBOX)
442    /* Win32 */
443    SYSTEM_INFO sysinfo;
444    GetSystemInfo(&sysinfo);
445    return sysinfo.dwNumberOfProcessors;
446 #elif defined(GEKKO)
447    return 1;
448 #elif defined(PSP)
449    return 1;
450 #elif defined(VITA)
451    return 4;
452 #elif defined(_3DS)
453    return 1;
454 #elif defined(_SC_NPROCESSORS_ONLN)
455    /* Linux, most UNIX-likes. */
456    long ret = sysconf(_SC_NPROCESSORS_ONLN);
457    if (ret <= 0)
458       return (unsigned)1;
459    return ret;
460 #elif defined(BSD) || defined(__APPLE__)
461    /* BSD */
462    /* Copypasta from stackoverflow, dunno if it works. */
463    int num_cpu = 0;
464    int mib[4];
465    size_t len = sizeof(num_cpu);
466 
467    mib[0] = CTL_HW;
468    mib[1] = HW_AVAILCPU;
469    sysctl(mib, 2, &num_cpu, &len, NULL, 0);
470    if (num_cpu < 1)
471    {
472       mib[1] = HW_NCPU;
473       sysctl(mib, 2, &num_cpu, &len, NULL, 0);
474       if (num_cpu < 1)
475          num_cpu = 1;
476    }
477    return num_cpu;
478 #elif defined(__linux__)
479    CpuList  cpus_present[1];
480    CpuList  cpus_possible[1];
481    int amount = 0;
482 
483    cpulist_read_from(cpus_present, "/sys/devices/system/cpu/present");
484    cpulist_read_from(cpus_possible, "/sys/devices/system/cpu/possible");
485 
486    /* Compute the intersection of both sets to get the actual number of
487     * CPU cores that can be used on this device by the kernel.
488     */
489    cpus_present->mask &= cpus_possible->mask;
490    amount              = __builtin_popcount(cpus_present->mask);
491 
492    if (amount == 0)
493       return 1;
494    return amount;
495 #elif defined(_XBOX360)
496    return 3;
497 #else
498    /* No idea, assume single core. */
499    return 1;
500 #endif
501 }
502 
503 /* According to http://en.wikipedia.org/wiki/CPUID */
504 #define VENDOR_INTEL_b  0x756e6547
505 #define VENDOR_INTEL_c  0x6c65746e
506 #define VENDOR_INTEL_d  0x49656e69
507 
508 /**
509  * cpu_features_get:
510  *
511  * Gets CPU features..
512  *
513  * Returns: bitmask of all CPU features available.
514  **/
cpu_features_get(void)515 uint64_t cpu_features_get(void)
516 {
517    int flags[4];
518    int vendor_shuffle[3];
519    char vendor[13];
520    size_t len          = 0;
521    uint64_t cpu_flags  = 0;
522    uint64_t cpu        = 0;
523    unsigned max_flag   = 0;
524 #if defined(CPU_X86) && !defined(__MACH__)
525    int vendor_is_intel = 0;
526    const int avx_flags = (1 << 27) | (1 << 28);
527 #endif
528 
529    char buf[sizeof(" MMX MMXEXT SSE SSE2 SSE3 SSSE3 SS4 SSE4.2 AES AVX AVX2 NEON VMX VMX128 VFPU PS")];
530 
531    memset(buf, 0, sizeof(buf));
532 
533    (void)len;
534    (void)cpu_flags;
535    (void)flags;
536    (void)max_flag;
537    (void)vendor;
538    (void)vendor_shuffle;
539 
540 #if defined(__MACH__)
541    len     = sizeof(size_t);
542    if (sysctlbyname("hw.optional.mmx", NULL, &len, NULL, 0) == 0)
543    {
544       cpu |= RETRO_SIMD_MMX;
545       cpu |= RETRO_SIMD_MMXEXT;
546    }
547 
548    len            = sizeof(size_t);
549    if (sysctlbyname("hw.optional.floatingpoint", NULL, &len, NULL, 0) == 0)
550    {
551       cpu |= RETRO_SIMD_CMOV;
552    }
553 
554    len            = sizeof(size_t);
555    if (sysctlbyname("hw.optional.sse", NULL, &len, NULL, 0) == 0)
556       cpu |= RETRO_SIMD_SSE;
557 
558    len            = sizeof(size_t);
559    if (sysctlbyname("hw.optional.sse2", NULL, &len, NULL, 0) == 0)
560       cpu |= RETRO_SIMD_SSE2;
561 
562    len            = sizeof(size_t);
563    if (sysctlbyname("hw.optional.sse3", NULL, &len, NULL, 0) == 0)
564       cpu |= RETRO_SIMD_SSE3;
565 
566    len            = sizeof(size_t);
567    if (sysctlbyname("hw.optional.supplementalsse3", NULL, &len, NULL, 0) == 0)
568       cpu |= RETRO_SIMD_SSSE3;
569 
570    len            = sizeof(size_t);
571    if (sysctlbyname("hw.optional.sse4_1", NULL, &len, NULL, 0) == 0)
572       cpu |= RETRO_SIMD_SSE4;
573 
574    len            = sizeof(size_t);
575    if (sysctlbyname("hw.optional.sse4_2", NULL, &len, NULL, 0) == 0)
576       cpu |= RETRO_SIMD_SSE42;
577 
578    len            = sizeof(size_t);
579    if (sysctlbyname("hw.optional.aes", NULL, &len, NULL, 0) == 0)
580       cpu |= RETRO_SIMD_AES;
581 
582    len            = sizeof(size_t);
583    if (sysctlbyname("hw.optional.avx1_0", NULL, &len, NULL, 0) == 0)
584       cpu |= RETRO_SIMD_AVX;
585 
586    len            = sizeof(size_t);
587    if (sysctlbyname("hw.optional.avx2_0", NULL, &len, NULL, 0) == 0)
588       cpu |= RETRO_SIMD_AVX2;
589 
590    len            = sizeof(size_t);
591    if (sysctlbyname("hw.optional.altivec", NULL, &len, NULL, 0) == 0)
592       cpu |= RETRO_SIMD_VMX;
593 
594    len            = sizeof(size_t);
595    if (sysctlbyname("hw.optional.neon", NULL, &len, NULL, 0) == 0)
596       cpu |= RETRO_SIMD_NEON;
597 
598 #elif defined(CPU_X86)
599    (void)avx_flags;
600 
601    x86_cpuid(0, flags);
602    vendor_shuffle[0] = flags[1];
603    vendor_shuffle[1] = flags[3];
604    vendor_shuffle[2] = flags[2];
605 
606    vendor[0]         = '\0';
607    memcpy(vendor, vendor_shuffle, sizeof(vendor_shuffle));
608 
609    /* printf("[CPUID]: Vendor: %s\n", vendor); */
610 
611    vendor_is_intel = (
612          flags[1] == VENDOR_INTEL_b &&
613          flags[2] == VENDOR_INTEL_c &&
614          flags[3] == VENDOR_INTEL_d);
615 
616    max_flag = flags[0];
617    if (max_flag < 1) /* Does CPUID not support func = 1? (unlikely ...) */
618       return 0;
619 
620    x86_cpuid(1, flags);
621 
622    if (flags[3] & (1 << 15))
623       cpu |= RETRO_SIMD_CMOV;
624 
625    if (flags[3] & (1 << 23))
626       cpu |= RETRO_SIMD_MMX;
627 
628    if (flags[3] & (1 << 25))
629    {
630       /* SSE also implies MMXEXT (according to FFmpeg source). */
631       cpu |= RETRO_SIMD_SSE;
632       cpu |= RETRO_SIMD_MMXEXT;
633    }
634 
635 
636    if (flags[3] & (1 << 26))
637       cpu |= RETRO_SIMD_SSE2;
638 
639    if (flags[2] & (1 << 0))
640       cpu |= RETRO_SIMD_SSE3;
641 
642    if (flags[2] & (1 << 9))
643       cpu |= RETRO_SIMD_SSSE3;
644 
645    if (flags[2] & (1 << 19))
646       cpu |= RETRO_SIMD_SSE4;
647 
648    if (flags[2] & (1 << 20))
649       cpu |= RETRO_SIMD_SSE42;
650 
651    if ((flags[2] & (1 << 23)))
652       cpu |= RETRO_SIMD_POPCNT;
653 
654    if (vendor_is_intel && (flags[2] & (1 << 22)))
655       cpu |= RETRO_SIMD_MOVBE;
656 
657    if (flags[2] & (1 << 25))
658       cpu |= RETRO_SIMD_AES;
659 
660 
661    /* Must only perform xgetbv check if we have
662     * AVX CPU support (guaranteed to have at least i686). */
663    if (((flags[2] & avx_flags) == avx_flags)
664          && ((xgetbv_x86(0) & 0x6) == 0x6))
665       cpu |= RETRO_SIMD_AVX;
666 
667    if (max_flag >= 7)
668    {
669       x86_cpuid(7, flags);
670       if (flags[1] & (1 << 5))
671          cpu |= RETRO_SIMD_AVX2;
672    }
673 
674    x86_cpuid(0x80000000, flags);
675    max_flag = flags[0];
676    if (max_flag >= 0x80000001u)
677    {
678       x86_cpuid(0x80000001, flags);
679       if (flags[3] & (1 << 23))
680          cpu |= RETRO_SIMD_MMX;
681       if (flags[3] & (1 << 22))
682          cpu |= RETRO_SIMD_MMXEXT;
683    }
684 #elif defined(__linux__)
685    if (check_arm_cpu_feature("neon"))
686    {
687       cpu |= RETRO_SIMD_NEON;
688 #ifdef __ARM_NEON__
689       arm_enable_runfast_mode();
690 #endif
691    }
692 
693    if (check_arm_cpu_feature("vfpv3"))
694       cpu |= RETRO_SIMD_VFPV3;
695 
696    if (check_arm_cpu_feature("vfpv4"))
697       cpu |= RETRO_SIMD_VFPV4;
698 
699    if (check_arm_cpu_feature("asimd"))
700    {
701       cpu |= RETRO_SIMD_ASIMD;
702 #ifdef __ARM_NEON__
703       cpu |= RETRO_SIMD_NEON;
704       arm_enable_runfast_mode();
705 #endif
706    }
707 
708 #if 0
709     check_arm_cpu_feature("swp");
710     check_arm_cpu_feature("half");
711     check_arm_cpu_feature("thumb");
712     check_arm_cpu_feature("fastmult");
713     check_arm_cpu_feature("vfp");
714     check_arm_cpu_feature("edsp");
715     check_arm_cpu_feature("thumbee");
716     check_arm_cpu_feature("tls");
717     check_arm_cpu_feature("idiva");
718     check_arm_cpu_feature("idivt");
719 #endif
720 
721 #elif defined(__ARM_NEON__)
722    cpu |= RETRO_SIMD_NEON;
723    arm_enable_runfast_mode();
724 #elif defined(__ALTIVEC__)
725    cpu |= RETRO_SIMD_VMX;
726 #elif defined(XBOX360)
727    cpu |= RETRO_SIMD_VMX128;
728 #elif defined(PSP)
729    cpu |= RETRO_SIMD_VFPU;
730 #elif defined(GEKKO)
731    cpu |= RETRO_SIMD_PS;
732 #endif
733 
734    if (cpu & RETRO_SIMD_MMX)    strlcat(buf, " MMX", sizeof(buf));
735    if (cpu & RETRO_SIMD_MMXEXT) strlcat(buf, " MMXEXT", sizeof(buf));
736    if (cpu & RETRO_SIMD_SSE)    strlcat(buf, " SSE", sizeof(buf));
737    if (cpu & RETRO_SIMD_SSE2)   strlcat(buf, " SSE2", sizeof(buf));
738    if (cpu & RETRO_SIMD_SSE3)   strlcat(buf, " SSE3", sizeof(buf));
739    if (cpu & RETRO_SIMD_SSSE3)  strlcat(buf, " SSSE3", sizeof(buf));
740    if (cpu & RETRO_SIMD_SSE4)   strlcat(buf, " SSE4", sizeof(buf));
741    if (cpu & RETRO_SIMD_SSE42)  strlcat(buf, " SSE4.2", sizeof(buf));
742    if (cpu & RETRO_SIMD_AES)    strlcat(buf, " AES", sizeof(buf));
743    if (cpu & RETRO_SIMD_AVX)    strlcat(buf, " AVX", sizeof(buf));
744    if (cpu & RETRO_SIMD_AVX2)   strlcat(buf, " AVX2", sizeof(buf));
745    if (cpu & RETRO_SIMD_NEON)   strlcat(buf, " NEON", sizeof(buf));
746    if (cpu & RETRO_SIMD_VFPV3)  strlcat(buf, " VFPv3", sizeof(buf));
747    if (cpu & RETRO_SIMD_VFPV4)  strlcat(buf, " VFPv4", sizeof(buf));
748    if (cpu & RETRO_SIMD_VMX)    strlcat(buf, " VMX", sizeof(buf));
749    if (cpu & RETRO_SIMD_VMX128) strlcat(buf, " VMX128", sizeof(buf));
750    if (cpu & RETRO_SIMD_VFPU)   strlcat(buf, " VFPU", sizeof(buf));
751    if (cpu & RETRO_SIMD_PS)     strlcat(buf, " PS", sizeof(buf));
752    if (cpu & RETRO_SIMD_ASIMD)  strlcat(buf, " ASIMD", sizeof(buf));
753 
754    return cpu;
755 }
756