1 /* libFLAC - Free Lossless Audio Codec library
2  * Copyright (C) 2001-2009  Josh Coalson
3  * Copyright (C) 2011-2013  Xiph.Org Foundation
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  *
9  * - Redistributions of source code must retain the above copyright
10  * notice, this list of conditions and the following disclaimer.
11  *
12  * - Redistributions in binary form must reproduce the above copyright
13  * notice, this list of conditions and the following disclaimer in the
14  * documentation and/or other materials provided with the distribution.
15  *
16  * - Neither the name of the Xiph.org Foundation nor the names of its
17  * contributors may be used to endorse or promote products derived from
18  * this software without specific prior written permission.
19  *
20  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23  * A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
24  * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
25  * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
26  * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
27  * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
28  * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
29  * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
30  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31  */
32 
33 #ifdef HAVE_CONFIG_H
34 #  include <config.h>
35 #endif
36 
37 #include "private/cpu.h"
38 #include <stdlib.h>
39 #include <stdio.h>
40 
41 #if defined FLAC__CPU_IA32
42 # include <signal.h>
43 #elif defined FLAC__CPU_PPC
44 # if !defined FLAC__NO_ASM
45 #  if defined FLAC__SYS_DARWIN
46 #   include <sys/sysctl.h>
47 #   include <mach/mach.h>
48 #   include <mach/mach_host.h>
49 #   include <mach/host_info.h>
50 #   include <mach/machine.h>
51 #   ifndef CPU_SUBTYPE_POWERPC_970
52 #    define CPU_SUBTYPE_POWERPC_970 ((cpu_subtype_t) 100)
53 #   endif
54 #  else /* FLAC__SYS_DARWIN */
55 
56 #   include <signal.h>
57 #   include <setjmp.h>
58 
59 static sigjmp_buf jmpbuf;
60 static volatile sig_atomic_t canjump = 0;
61 
sigill_handler(int sig)62 static void sigill_handler (int sig)
63 {
64 	if (!canjump) {
65 		signal (sig, SIG_DFL);
66 		raise (sig);
67 	}
68 	canjump = 0;
69 	siglongjmp (jmpbuf, 1);
70 }
71 #  endif /* FLAC__SYS_DARWIN */
72 # endif /* FLAC__NO_ASM */
73 #endif /* FLAC__CPU_PPC */
74 
75 #if defined (__NetBSD__) || defined(__OpenBSD__)
76 #include <sys/param.h>
77 #include <sys/sysctl.h>
78 #include <machine/cpu.h>
79 #endif
80 
81 #if defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__)
82 #include <sys/types.h>
83 #include <sys/sysctl.h>
84 #endif
85 
86 #if defined(__APPLE__)
87 /* how to get sysctlbyname()? */
88 #endif
89 
90 #ifdef FLAC__CPU_IA32
91 /* these are flags in EDX of CPUID AX=00000001 */
92 static const unsigned FLAC__CPUINFO_IA32_CPUID_CMOV = 0x00008000;
93 static const unsigned FLAC__CPUINFO_IA32_CPUID_MMX = 0x00800000;
94 static const unsigned FLAC__CPUINFO_IA32_CPUID_FXSR = 0x01000000;
95 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE = 0x02000000;
96 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE2 = 0x04000000;
97 #endif
98 /* these are flags in ECX of CPUID AX=00000001 */
99 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE3 = 0x00000001;
100 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSSE3 = 0x00000200;
101 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE41 = 0x00080000;
102 static const unsigned FLAC__CPUINFO_IA32_CPUID_SSE42 = 0x00100000;
103 #ifdef FLAC__CPU_IA32
104 /* these are flags in EDX of CPUID AX=80000001 */
105 static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW = 0x80000000;
106 static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW = 0x40000000;
107 static const unsigned FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX = 0x00400000;
108 #endif
109 
110 /*
111  * Extra stuff needed for detection of OS support for SSE on IA-32
112  */
113 #if defined(FLAC__CPU_IA32) && !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN) && !defined FLAC__NO_SSE_OS && !defined FLAC__SSE_OS
114 # if defined(__linux__)
115 /*
116  * If the OS doesn't support SSE, we will get here with a SIGILL.  We
117  * modify the return address to jump over the offending SSE instruction
118  * and also the operation following it that indicates the instruction
119  * executed successfully.  In this way we use no global variables and
120  * stay thread-safe.
121  *
122  * 3 + 3 + 6:
123  *   3 bytes for "xorps xmm0,xmm0"
124  *   3 bytes for estimate of how long the follwing "inc var" instruction is
125  *   6 bytes extra in case our estimate is wrong
126  * 12 bytes puts us in the NOP "landing zone"
127  */
128 #  undef USE_OBSOLETE_SIGCONTEXT_FLAVOR /* #define this to use the older signal handler method */
129 #  ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
sigill_handler_sse_os(int signal,struct sigcontext sc)130 	static void sigill_handler_sse_os(int signal, struct sigcontext sc)
131 	{
132 		(void)signal;
133 		sc.eip += 3 + 3 + 6;
134 	}
135 #  else
136 #   include <sys/ucontext.h>
sigill_handler_sse_os(int signal,siginfo_t * si,void * uc)137 	static void sigill_handler_sse_os(int signal, siginfo_t *si, void *uc)
138 	{
139 		(void)signal, (void)si;
140 		((ucontext_t*)uc)->uc_mcontext.gregs[14/*REG_EIP*/] += 3 + 3 + 6;
141 	}
142 #  endif
143 # elif defined(_MSC_VER)
144 #  include <windows.h>
145 #  define USE_TRY_CATCH_FLAVOR /* sigill_handler flavor resulted in several crash reports on win32 */
146 #  ifdef USE_TRY_CATCH_FLAVOR
147 #  else
sigill_handler_sse_os(EXCEPTION_POINTERS * ep)148 	LONG WINAPI sigill_handler_sse_os(EXCEPTION_POINTERS *ep)
149 	{
150 		if(ep->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
151 			ep->ContextRecord->Eip += 3 + 3 + 6;
152 			return EXCEPTION_CONTINUE_EXECUTION;
153 		}
154 		return EXCEPTION_CONTINUE_SEARCH;
155 	}
156 #  endif
157 # elif defined(_WIN32) && defined(__GNUC__)
158 #  undef USE_FXSR_FLAVOR
159 #  ifdef USE_FXSR_FLAVOR
160   /* not guaranteed to work on some unknown future Intel CPUs */
161 #  else
162   /* exception handler is process-wide; not good for a library */
163 #  include <windows.h>
164 	LONG WINAPI sigill_handler_sse_os(EXCEPTION_POINTERS *ep); /* to suppress GCC warning */
sigill_handler_sse_os(EXCEPTION_POINTERS * ep)165 	LONG WINAPI sigill_handler_sse_os(EXCEPTION_POINTERS *ep)
166 	{
167 		if(ep->ExceptionRecord->ExceptionCode == EXCEPTION_ILLEGAL_INSTRUCTION) {
168 			ep->ContextRecord->Eip += 3 + 3 + 6;
169 			return EXCEPTION_CONTINUE_EXECUTION;
170 		}
171 		return EXCEPTION_CONTINUE_SEARCH;
172 	}
173 #  endif
174 # endif
175 #endif
176 
177 
FLAC__cpu_info(FLAC__CPUInfo * info)178 void FLAC__cpu_info(FLAC__CPUInfo *info)
179 {
180 /*
181  * IA32-specific
182  */
183 #ifdef FLAC__CPU_IA32
184 	info->type = FLAC__CPUINFO_TYPE_IA32;
185 #if !defined FLAC__NO_ASM && (defined FLAC__HAS_NASM || defined FLAC__HAS_X86INTRIN)
186 	info->use_asm = true; /* we assume a minimum of 80386 with FLAC__CPU_IA32 */
187 #ifdef FLAC__HAS_NASM
188 	info->ia32.cpuid = FLAC__cpu_have_cpuid_asm_ia32()? true : false;
189 #else
190 	info->ia32.cpuid = FLAC__cpu_have_cpuid_x86()? true : false;
191 #endif
192 	info->ia32.bswap = info->ia32.cpuid; /* CPUID => BSWAP since it came after */
193 	info->ia32.cmov = false;
194 	info->ia32.mmx = false;
195 	info->ia32.fxsr = false;
196 	info->ia32.sse = false;
197 	info->ia32.sse2 = false;
198 	info->ia32.sse3 = false;
199 	info->ia32.ssse3 = false;
200 	info->ia32.sse41 = false;
201 	info->ia32.sse42 = false;
202 	info->ia32._3dnow = false;
203 	info->ia32.ext3dnow = false;
204 	info->ia32.extmmx = false;
205 	if(info->ia32.cpuid) {
206 		/* http://www.sandpile.org/x86/cpuid.htm */
207 		FLAC__uint32 flags_edx, flags_ecx;
208 #ifdef FLAC__HAS_NASM
209 		FLAC__cpu_info_asm_ia32(&flags_edx, &flags_ecx);
210 #else
211 		FLAC__cpu_info_x86(&flags_edx, &flags_ecx);
212 #endif
213 		info->ia32.cmov  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_CMOV )? true : false;
214 		info->ia32.mmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_MMX  )? true : false;
215 		info->ia32.fxsr  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_FXSR )? true : false;
216 		info->ia32.sse   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE  )? true : false;
217 		info->ia32.sse2  = (flags_edx & FLAC__CPUINFO_IA32_CPUID_SSE2 )? true : false;
218 		info->ia32.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
219 		info->ia32.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
220 		info->ia32.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false;
221 		info->ia32.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false;
222 
223 #if defined FLAC__HAS_NASM && defined FLAC__USE_3DNOW
224 		flags_edx = FLAC__cpu_info_extended_amd_asm_ia32();
225 		info->ia32._3dnow   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_3DNOW   )? true : false;
226 		info->ia32.ext3dnow = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXT3DNOW)? true : false;
227 		info->ia32.extmmx   = (flags_edx & FLAC__CPUINFO_IA32_CPUID_EXTENDED_AMD_EXTMMX  )? true : false;
228 #else
229 		info->ia32._3dnow = info->ia32.ext3dnow = info->ia32.extmmx = false;
230 #endif
231 
232 #ifdef DEBUG
233 		fprintf(stderr, "CPU info (IA-32):\n");
234 		fprintf(stderr, "  CPUID ...... %c\n", info->ia32.cpuid   ? 'Y' : 'n');
235 		fprintf(stderr, "  BSWAP ...... %c\n", info->ia32.bswap   ? 'Y' : 'n');
236 		fprintf(stderr, "  CMOV ....... %c\n", info->ia32.cmov    ? 'Y' : 'n');
237 		fprintf(stderr, "  MMX ........ %c\n", info->ia32.mmx     ? 'Y' : 'n');
238 		fprintf(stderr, "  FXSR ....... %c\n", info->ia32.fxsr    ? 'Y' : 'n');
239 		fprintf(stderr, "  SSE ........ %c\n", info->ia32.sse     ? 'Y' : 'n');
240 		fprintf(stderr, "  SSE2 ....... %c\n", info->ia32.sse2    ? 'Y' : 'n');
241 		fprintf(stderr, "  SSE3 ....... %c\n", info->ia32.sse3    ? 'Y' : 'n');
242 		fprintf(stderr, "  SSSE3 ...... %c\n", info->ia32.ssse3   ? 'Y' : 'n');
243 		fprintf(stderr, "  SSE41 ...... %c\n", info->ia32.sse41   ? 'Y' : 'n');
244 		fprintf(stderr, "  SSE42 ...... %c\n", info->ia32.sse42   ? 'Y' : 'n');
245 		fprintf(stderr, "  3DNow! ..... %c\n", info->ia32._3dnow  ? 'Y' : 'n');
246 		fprintf(stderr, "  3DNow!-ext . %c\n", info->ia32.ext3dnow? 'Y' : 'n');
247 		fprintf(stderr, "  3DNow!-MMX . %c\n", info->ia32.extmmx  ? 'Y' : 'n');
248 #endif
249 
250 		/*
251 		 * now have to check for OS support of SSE instructions
252 		 */
253 		if(info->ia32.sse) {
254 #if defined FLAC__NO_SSE_OS
255 			/* assume user knows better than us; turn it off */
256 			info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
257 #elif defined FLAC__SSE_OS
258 			/* assume user knows better than us; leave as detected above */
259 #elif defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__DragonFly__) || defined(__APPLE__)
260 			int sse = 0;
261 			size_t len;
262 			/* at least one of these must work: */
263 			len = sizeof(sse); sse = sse || (sysctlbyname("hw.instruction_sse", &sse, &len, NULL, 0) == 0 && sse);
264 			len = sizeof(sse); sse = sse || (sysctlbyname("hw.optional.sse"   , &sse, &len, NULL, 0) == 0 && sse); /* __APPLE__ ? */
265 			if(!sse)
266 				info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
267 #elif defined(__NetBSD__) || defined (__OpenBSD__)
268 # if __NetBSD_Version__ >= 105250000 || (defined __OpenBSD__)
269 			int val = 0, mib[2] = { CTL_MACHDEP, CPU_SSE };
270 			size_t len = sizeof(val);
271 			if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
272 				info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
273 			else { /* double-check SSE2 */
274 				mib[1] = CPU_SSE2;
275 				len = sizeof(val);
276 				if(sysctl(mib, 2, &val, &len, NULL, 0) < 0 || !val)
277 					info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
278 			}
279 # else
280 			info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
281 # endif
282 #elif defined(__linux__)
283 			int sse = 0;
284 			struct sigaction sigill_save;
285 #ifdef USE_OBSOLETE_SIGCONTEXT_FLAVOR
286 			if(0 == sigaction(SIGILL, NULL, &sigill_save) && signal(SIGILL, (void (*)(int))sigill_handler_sse_os) != SIG_ERR)
287 #else
288 			struct sigaction sigill_sse;
289 			sigill_sse.sa_sigaction = sigill_handler_sse_os;
290 			__sigemptyset(&sigill_sse.sa_mask);
291 			sigill_sse.sa_flags = SA_SIGINFO | SA_RESETHAND; /* SA_RESETHAND just in case our SIGILL return jump breaks, so we don't get stuck in a loop */
292 			if(0 == sigaction(SIGILL, &sigill_sse, &sigill_save))
293 #endif
294 			{
295 				/* http://www.ibiblio.org/gferg/ldp/GCC-Inline-Assembly-HOWTO.html */
296 				/* see sigill_handler_sse_os() for an explanation of the following: */
297 				asm volatile (
298 					"xorps %%xmm0,%%xmm0\n\t" /* will cause SIGILL if unsupported by OS */
299 					"incl %0\n\t"             /* SIGILL handler will jump over this */
300 					/* landing zone */
301 					"nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
302 					"nop\n\t"
303 					"nop\n\t"
304 					"nop\n\t"
305 					"nop\n\t"
306 					"nop\n\t"
307 					"nop\n\t" /* SIGILL jump lands here if "inc" is 3 bytes (expected) */
308 					"nop\n\t"
309 					"nop"     /* SIGILL jump lands here if "inc" is 1 byte */
310 					: "=r"(sse)
311 					: "0"(sse)
312 				);
313 
314 				sigaction(SIGILL, &sigill_save, NULL);
315 			}
316 
317 			if(!sse)
318 				info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
319 #elif defined(_MSC_VER)
320 # ifdef USE_TRY_CATCH_FLAVOR
321 			__try {
322 				__asm {
323 					xorps xmm0,xmm0
324 				}
325 			}
326 			__except(EXCEPTION_EXECUTE_HANDLER) {
327 				if (_exception_code() == STATUS_ILLEGAL_INSTRUCTION)
328 					info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
329 			}
330 # else
331 			int sse = 0;
332 			/* From MSDN: SetUnhandledExceptionFilter replaces the existing top-level exception filter for all threads in the calling process */
333 			/* So sigill_handler_sse_os() is process-wide and affects other threads as well (not a good thing for a library in a multi-threaded process) */
334 			LPTOP_LEVEL_EXCEPTION_FILTER save = SetUnhandledExceptionFilter(sigill_handler_sse_os);
335 			/* see GCC version above for explanation */
336 			/*  http://msdn.microsoft.com/en-us/library/4ks26t93.aspx */
337 			/*  http://www.codeproject.com/Articles/5267/Inline-Assembly-in-GCC-Vs-VC */
338 			__asm {
339 				xorps xmm0,xmm0
340 				inc sse
341 				nop
342 				nop
343 				nop
344 				nop
345 				nop
346 				nop
347 				nop
348 				nop
349 				nop
350 			}
351 			SetUnhandledExceptionFilter(save);
352 			if(!sse)
353 				info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
354 # endif
355 #elif defined(_WIN32) && defined(__GNUC__)
356 # ifdef USE_FXSR_FLAVOR
357 			int sse = 0;
358 			/* Based on the idea described in Agner Fog's manual "Optimizing subroutines in assembly language" */
359 			/* In theory, not guaranteed to detect lack of OS SSE support on some future Intel CPUs, but in practice works (see the aforementioned manual) */
360 			if (info->ia32.fxsr) {
361 				struct {
362 					FLAC__uint32 buff[128];
363 				} __attribute__((aligned(16))) fxsr;
364 				FLAC__uint32 old_val, new_val;
365 
366 				asm volatile ("fxsave %0"  : "=m" (fxsr) : "m" (fxsr));
367 				old_val = fxsr.buff[50];
368 				fxsr.buff[50] ^= 0x0013c0de;                             /* change value in the buffer */
369 				asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr));  /* try to change SSE register */
370 				fxsr.buff[50] = old_val;                                 /* restore old value in the buffer */
371 				asm volatile ("fxsave %0 " : "=m" (fxsr) : "m" (fxsr));  /* old value will be overwritten if SSE register was changed */
372 				new_val = fxsr.buff[50];                                 /* == old_val if FXRSTOR didn't change SSE register and (old_val ^ 0x0013c0de) otherwise */
373 				fxsr.buff[50] = old_val;                                 /* again restore old value in the buffer */
374 				asm volatile ("fxrstor %0" : "=m" (fxsr) : "m" (fxsr));  /* restore old values of registers */
375 
376 				if ((old_val^new_val) == 0x0013c0de)
377 					sse = 1;
378 			}
379 			if(!sse)
380 				info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
381 # else
382 			int sse = 0;
383 			LPTOP_LEVEL_EXCEPTION_FILTER save = SetUnhandledExceptionFilter(sigill_handler_sse_os);
384 			/* see MSVC version above for explanation */
385 			asm volatile (
386 				"xorps %%xmm0,%%xmm0\n\t"
387 				"incl %0\n\t"
388 				"nop\n\t" /* SIGILL jump lands here if "inc" is 9 bytes */
389 				"nop\n\t"
390 				"nop\n\t"
391 				"nop\n\t"
392 				"nop\n\t"
393 				"nop\n\t"
394 				"nop\n\t"
395 				"nop\n\t"
396 				"nop"     /* SIGILL jump lands here if "inc" is 1 byte  */
397 				: "=r"(sse)
398 				: "0"(sse)
399 			);
400 			SetUnhandledExceptionFilter(save);
401 			if(!sse)
402 				info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
403 # endif
404 #else
405 			/* no way to test, disable to be safe */
406 			info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
407 #endif
408 #ifdef DEBUG
409 			fprintf(stderr, "  SSE OS sup . %c\n", info->ia32.sse     ? 'Y' : 'n');
410 #endif
411 		}
412 		else /* info->ia32.sse == false */
413 			info->ia32.fxsr = info->ia32.sse = info->ia32.sse2 = info->ia32.sse3 = info->ia32.ssse3 = info->ia32.sse41 = info->ia32.sse42 = false;
414 	}
415 #else
416 	info->use_asm = false;
417 #endif
418 
419 /*
420  * x86-64-specific
421  */
422 #elif defined FLAC__CPU_X86_64
423 	info->type = FLAC__CPUINFO_TYPE_X86_64;
424 #if !defined FLAC__NO_ASM && defined FLAC__HAS_X86INTRIN
425 	info->use_asm = true;
426 	{
427 		/* http://www.sandpile.org/x86/cpuid.htm */
428 		FLAC__uint32 flags_edx, flags_ecx;
429 		FLAC__cpu_info_x86(&flags_edx, &flags_ecx);
430 		info->x86_64.sse3  = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE3 )? true : false;
431 		info->x86_64.ssse3 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSSE3)? true : false;
432 		info->x86_64.sse41 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE41)? true : false;
433 		info->x86_64.sse42 = (flags_ecx & FLAC__CPUINFO_IA32_CPUID_SSE42)? true : false;
434 	}
435 #ifdef DEBUG
436 	fprintf(stderr, "CPU info (x86-64):\n");
437 	fprintf(stderr, "  SSE3 ....... %c\n", info->x86_64.sse3    ? 'Y' : 'n');
438 	fprintf(stderr, "  SSSE3 ...... %c\n", info->x86_64.ssse3   ? 'Y' : 'n');
439 	fprintf(stderr, "  SSE41 ...... %c\n", info->x86_64.sse41   ? 'Y' : 'n');
440 	fprintf(stderr, "  SSE42 ...... %c\n", info->x86_64.sse42   ? 'Y' : 'n');
441 #endif
442 
443 #else
444 	info->use_asm = false;
445 #endif
446 
447 /*
448  * PPC-specific
449  */
450 #elif defined FLAC__CPU_PPC
451 	info->type = FLAC__CPUINFO_TYPE_PPC;
452 # if !defined FLAC__NO_ASM
453 	info->use_asm = true;
454 #if defined(__ALTIVEC__)
455 #   if defined FLAC__SYS_DARWIN
456 	{
457 		int val = 0, mib[2] = { CTL_HW, HW_VECTORUNIT };
458 		size_t len = sizeof(val);
459 		info->ppc.altivec = !(sysctl(mib, 2, &val, &len, NULL, 0) || !val);
460 	}
461 	{
462 		host_basic_info_data_t hostInfo;
463 		mach_msg_type_number_t infoCount;
464 
465 		infoCount = HOST_BASIC_INFO_COUNT;
466 		host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&hostInfo, &infoCount);
467 
468 		info->ppc.ppc64 = (hostInfo.cpu_type == CPU_TYPE_POWERPC) && (hostInfo.cpu_subtype == CPU_SUBTYPE_POWERPC_970);
469 	}
470 #   else
471 	{
472 		/* no Darwin, do it the brute-force way */
473 		/* @@@@@@ this is not thread-safe; replace with SSE OS method above or remove */
474 		info->ppc.altivec = 0;
475 		info->ppc.ppc64 = 0;
476 
477 		signal (SIGILL, sigill_handler);
478 		canjump = 0;
479 		if (!sigsetjmp (jmpbuf, 1)) {
480 			canjump = 1;
481 
482 			asm volatile (
483 				"mtspr 256, %0\n\t"
484 				"vand %%v0, %%v0, %%v0"
485 				:
486 				: "r" (-1)
487 			);
488 
489 			info->ppc.altivec = 1;
490 		}
491 		canjump = 0;
492 		if (!sigsetjmp (jmpbuf, 1)) {
493 			int x = 0;
494 			canjump = 1;
495 
496 			/* PPC64 hardware implements the cntlzd instruction */
497 			asm volatile ("cntlzd %0, %1" : "=r" (x) : "r" (x) );
498 
499 			info->ppc.ppc64 = 1;
500 		}
501 		signal (SIGILL, SIG_DFL); /*@@@@@@ should save and restore old signal */
502 	}
503 #   endif
504 #  else
505 	info->ppc.altivec = 0;
506 	info->ppc.ppc64 = 0;
507 #  endif
508 # else
509 	info->use_asm = false;
510 # endif
511 
512 /*
513  * unknown CPU
514  */
515 #else
516 	info->type = FLAC__CPUINFO_TYPE_UNKNOWN;
517 	info->use_asm = false;
518 #endif
519 }
520 
521 #if (defined FLAC__CPU_IA32 || defined FLAC__CPU_X86_64) && defined FLAC__HAS_X86INTRIN
522 
523 #if defined _MSC_VER
524 #include <intrin.h> /* for __cpuid() */
525 #elif defined __GNUC__ && defined HAVE_CPUID_H
526 #include <cpuid.h> /* for __get_cpuid() and __get_cpuid_max() */
527 #endif
528 
FLAC__cpu_have_cpuid_x86(void)529 FLAC__uint32 FLAC__cpu_have_cpuid_x86(void)
530 {
531 #ifdef FLAC__CPU_X86_64
532 	return 1;
533 #else
534 # if defined _MSC_VER || defined __INTEL_COMPILER /* Do they support CPUs w/o CPUID support (or OSes that work on those CPUs)? */
535 	FLAC__uint32 flags1, flags2;
536 	__asm {
537 		pushfd
538 		pushfd
539 		pop		eax
540 		mov		flags1, eax
541 		xor		eax, 0x200000
542 		push	eax
543 		popfd
544 		pushfd
545 		pop		eax
546 		mov		flags2, eax
547 		popfd
548 	}
549 	if (((flags1^flags2) & 0x200000) != 0)
550 		return 1;
551 	else
552 		return 0;
553 # elif defined __GNUC__ && defined HAVE_CPUID_H
554 	if (__get_cpuid_max(0, 0) != 0)
555 		return 1;
556 	else
557 		return 0;
558 # else
559 	return 0;
560 # endif
561 #endif
562 }
563 
FLAC__cpu_info_x86(FLAC__uint32 * flags_edx,FLAC__uint32 * flags_ecx)564 void FLAC__cpu_info_x86(FLAC__uint32 *flags_edx, FLAC__uint32 *flags_ecx)
565 {
566 #if defined _MSC_VER || defined __INTEL_COMPILER
567 	int cpuinfo[4];
568 	__cpuid(cpuinfo, 1);
569 	*flags_ecx = cpuinfo[2];
570 	*flags_edx = cpuinfo[3];
571 #elif defined __GNUC__ && defined HAVE_CPUID_H
572 	FLAC__uint32 flags_eax, flags_ebx;
573 	if (0 == __get_cpuid(1, &flags_eax, &flags_ebx, flags_ecx, flags_edx))
574 		*flags_ecx = *flags_edx = 0;
575 #else
576 	*flags_ecx = *flags_edx = 0;
577 #endif
578 }
579 
580 #endif /* (FLAC__CPU_IA32 || FLAC__CPU_X86_64) && FLAC__HAS_X86INTRIN */
581