1 // Copyright 2011 Google Inc. All Rights Reserved.
2 //
3 // Use of this source code is governed by a BSD-style license
4 // that can be found in the COPYING file in the root of the source
5 // tree. An additional intellectual property rights grant can be found
6 // in the file PATENTS. All contributing project authors may
7 // be found in the AUTHORS file in the root of the source tree.
8 // -----------------------------------------------------------------------------
9 //
10 // CPU detection
11 //
12 // Author: Christian Duvivier (cduvivier@google.com)
13 
14 #include "src/dsp/dsp.h"
15 
16 #if defined(WEBP_HAVE_NEON_RTCD)
17 #include <stdio.h>
18 #include <string.h>
19 #endif
20 
21 #if defined(WEBP_ANDROID_NEON)
22 #include <cpu-features.h>
23 #endif
24 
25 //------------------------------------------------------------------------------
26 // SSE2 detection.
27 //
28 
29 // apple/darwin gcc-4.0.1 defines __PIC__, but not __pic__ with -fPIC.
30 #if (defined(__pic__) || defined(__PIC__)) && defined(__i386__)
GetCPUInfo(int cpu_info[4],int info_type)31 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
32   __asm__ volatile (
33     "mov %%ebx, %%edi\n"
34     "cpuid\n"
35     "xchg %%edi, %%ebx\n"
36     : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
37     : "a"(info_type), "c"(0));
38 }
39 #elif defined(__x86_64__) && \
40       (defined(__code_model_medium__) || defined(__code_model_large__)) && \
41       defined(__PIC__)
GetCPUInfo(int cpu_info[4],int info_type)42 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
43   __asm__ volatile (
44     "xchg{q}\t{%%rbx}, %q1\n"
45     "cpuid\n"
46     "xchg{q}\t{%%rbx}, %q1\n"
47     : "=a"(cpu_info[0]), "=&r"(cpu_info[1]), "=c"(cpu_info[2]),
48       "=d"(cpu_info[3])
49     : "a"(info_type), "c"(0));
50 }
51 #elif defined(__i386__) || defined(__x86_64__)
GetCPUInfo(int cpu_info[4],int info_type)52 static WEBP_INLINE void GetCPUInfo(int cpu_info[4], int info_type) {
53   __asm__ volatile (
54     "cpuid\n"
55     : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
56     : "a"(info_type), "c"(0));
57 }
58 #elif defined(_MSC_VER) && (defined(_M_X64) || defined(_M_IX86))
59 
60 #if defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 150030729  // >= VS2008 SP1
61 #include <intrin.h>
62 #define GetCPUInfo(info, type) __cpuidex(info, type, 0)  // set ecx=0
63 #define WEBP_HAVE_MSC_CPUID
64 #elif _MSC_VER > 1310
65 #include <intrin.h>
66 #define GetCPUInfo __cpuid
67 #define WEBP_HAVE_MSC_CPUID
68 #endif
69 
70 #endif
71 
72 // NaCl has no support for xgetbv or the raw opcode.
73 #if !defined(__native_client__) && (defined(__i386__) || defined(__x86_64__))
xgetbv(void)74 static WEBP_INLINE uint64_t xgetbv(void) {
75   const uint32_t ecx = 0;
76   uint32_t eax, edx;
77   // Use the raw opcode for xgetbv for compatibility with older toolchains.
78   __asm__ volatile (
79     ".byte 0x0f, 0x01, 0xd0\n"
80     : "=a"(eax), "=d"(edx) : "c" (ecx));
81   return ((uint64_t)edx << 32) | eax;
82 }
83 #elif (defined(_M_X64) || defined(_M_IX86)) && \
84       defined(_MSC_FULL_VER) && _MSC_FULL_VER >= 160040219  // >= VS2010 SP1
85 #include <immintrin.h>
86 #define xgetbv() _xgetbv(0)
87 #elif defined(_MSC_VER) && defined(_M_IX86)
xgetbv(void)88 static WEBP_INLINE uint64_t xgetbv(void) {
89   uint32_t eax_, edx_;
90   __asm {
91     xor ecx, ecx  // ecx = 0
92     // Use the raw opcode for xgetbv for compatibility with older toolchains.
93     __asm _emit 0x0f __asm _emit 0x01 __asm _emit 0xd0
94     mov eax_, eax
95     mov edx_, edx
96   }
97   return ((uint64_t)edx_ << 32) | eax_;
98 }
99 #else
100 #define xgetbv() 0U  // no AVX for older x64 or unrecognized toolchains.
101 #endif
102 
103 #if defined(__i386__) || defined(__x86_64__) || defined(WEBP_HAVE_MSC_CPUID)
104 
105 // helper function for run-time detection of slow SSSE3 platforms
CheckSlowModel(int info)106 static int CheckSlowModel(int info) {
107   // Table listing display models with longer latencies for the bsr instruction
108   // (ie 2 cycles vs 10/16 cycles) and some SSSE3 instructions like pshufb.
109   // Refer to Intel 64 and IA-32 Architectures Optimization Reference Manual.
110   static const uint8_t kSlowModels[] = {
111     0x37, 0x4a, 0x4d,  // Silvermont Microarchitecture
112     0x1c, 0x26, 0x27   // Atom Microarchitecture
113   };
114   const uint32_t model = ((info & 0xf0000) >> 12) | ((info >> 4) & 0xf);
115   const uint32_t family = (info >> 8) & 0xf;
116   if (family == 0x06) {
117     size_t i;
118     for (i = 0; i < sizeof(kSlowModels) / sizeof(kSlowModels[0]); ++i) {
119       if (model == kSlowModels[i]) return 1;
120     }
121   }
122   return 0;
123 }
124 
x86CPUInfo(CPUFeature feature)125 static int x86CPUInfo(CPUFeature feature) {
126   int max_cpuid_value;
127   int cpu_info[4];
128   int is_intel = 0;
129 
130   // get the highest feature value cpuid supports
131   GetCPUInfo(cpu_info, 0);
132   max_cpuid_value = cpu_info[0];
133   if (max_cpuid_value < 1) {
134     return 0;
135   } else {
136     const int VENDOR_ID_INTEL_EBX = 0x756e6547;  // uneG
137     const int VENDOR_ID_INTEL_EDX = 0x49656e69;  // Ieni
138     const int VENDOR_ID_INTEL_ECX = 0x6c65746e;  // letn
139     is_intel = (cpu_info[1] == VENDOR_ID_INTEL_EBX &&
140                 cpu_info[2] == VENDOR_ID_INTEL_ECX &&
141                 cpu_info[3] == VENDOR_ID_INTEL_EDX);    // genuine Intel?
142   }
143 
144   GetCPUInfo(cpu_info, 1);
145   if (feature == kSSE2) {
146     return !!(cpu_info[3] & (1 << 26));
147   }
148   if (feature == kSSE3) {
149     return !!(cpu_info[2] & (1 << 0));
150   }
151   if (feature == kSlowSSSE3) {
152     if (is_intel && (cpu_info[2] & (1 << 9))) {   // SSSE3?
153       return CheckSlowModel(cpu_info[0]);
154     }
155     return 0;
156   }
157 
158   if (feature == kSSE4_1) {
159     return !!(cpu_info[2] & (1 << 19));
160   }
161   if (feature == kAVX) {
162     // bits 27 (OSXSAVE) & 28 (256-bit AVX)
163     if ((cpu_info[2] & 0x18000000) == 0x18000000) {
164       // XMM state and YMM state enabled by the OS.
165       return (xgetbv() & 0x6) == 0x6;
166     }
167   }
168   if (feature == kAVX2) {
169     if (x86CPUInfo(kAVX) && max_cpuid_value >= 7) {
170       GetCPUInfo(cpu_info, 7);
171       return !!(cpu_info[1] & (1 << 5));
172     }
173   }
174   return 0;
175 }
176 VP8CPUInfo VP8GetCPUInfo = x86CPUInfo;
177 #elif defined(WEBP_ANDROID_NEON)  // NB: needs to be before generic NEON test.
AndroidCPUInfo(CPUFeature feature)178 static int AndroidCPUInfo(CPUFeature feature) {
179   const AndroidCpuFamily cpu_family = android_getCpuFamily();
180   const uint64_t cpu_features = android_getCpuFeatures();
181   if (feature == kNEON) {
182     return cpu_family == ANDROID_CPU_FAMILY_ARM &&
183            (cpu_features & ANDROID_CPU_ARM_FEATURE_NEON) != 0;
184   }
185   return 0;
186 }
187 VP8CPUInfo VP8GetCPUInfo = AndroidCPUInfo;
188 #elif defined(EMSCRIPTEN) // also needs to be before generic NEON test
189 // Use compile flags as an indicator of SIMD support instead of a runtime check.
wasmCPUInfo(CPUFeature feature)190 static int wasmCPUInfo(CPUFeature feature) {
191   switch (feature) {
192 #ifdef WEBP_HAVE_SSE2
193     case kSSE2:
194       return 1;
195 #endif
196 #ifdef WEBP_HAVE_SSE41
197     case kSSE3:
198     case kSlowSSSE3:
199     case kSSE4_1:
200       return 1;
201 #endif
202 #ifdef WEBP_HAVE_NEON
203     case kNEON:
204       return 1;
205 #endif
206     default:
207       break;
208   }
209   return 0;
210 }
211 VP8CPUInfo VP8GetCPUInfo = wasmCPUInfo;
212 #elif defined(WEBP_HAVE_NEON)
213 // In most cases this function doesn't check for NEON support (it's assumed by
214 // the configuration), but enables turning off NEON at runtime, for testing
215 // purposes, by setting VP8DecGetCPUInfo = NULL.
armCPUInfo(CPUFeature feature)216 static int armCPUInfo(CPUFeature feature) {
217   if (feature != kNEON) return 0;
218 #if defined(__linux__) && defined(WEBP_HAVE_NEON_RTCD)
219   {
220     int has_neon = 0;
221     char line[200];
222     FILE* const cpuinfo = fopen("/proc/cpuinfo", "r");
223     if (cpuinfo == NULL) return 0;
224     while (fgets(line, sizeof(line), cpuinfo)) {
225       if (!strncmp(line, "Features", 8)) {
226         if (strstr(line, " neon ") != NULL) {
227           has_neon = 1;
228           break;
229         }
230       }
231     }
232     fclose(cpuinfo);
233     return has_neon;
234   }
235 #else
236   return 1;
237 #endif
238 }
239 VP8CPUInfo VP8GetCPUInfo = armCPUInfo;
240 #elif defined(WEBP_USE_MIPS32) || defined(WEBP_USE_MIPS_DSP_R2) || \
241       defined(WEBP_USE_MSA)
mipsCPUInfo(CPUFeature feature)242 static int mipsCPUInfo(CPUFeature feature) {
243   if ((feature == kMIPS32) || (feature == kMIPSdspR2) || (feature == kMSA)) {
244     return 1;
245   } else {
246     return 0;
247   }
248 
249 }
250 VP8CPUInfo VP8GetCPUInfo = mipsCPUInfo;
251 #else
252 VP8CPUInfo VP8GetCPUInfo = NULL;
253 #endif
254