1 /* GEGL - The GEGL Library
2 * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
3 *
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 3 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
13 *
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the
16 * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 * Boston, MA 02110-1301, USA.
18 */
19
20 /*
21 * x86 bits Copyright (C) Manish Singh <yosh@gimp.org>
22 */
23
24 /*
25 * PPC CPU acceleration detection was taken from DirectFB but seems to be
26 * originating from mpeg2dec with the following copyright:
27 *
28 * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
29 */
30
31 #include "config.h"
32
33 #include <string.h>
34 #include <signal.h>
35 #include <setjmp.h>
36
37 #include <glib.h>
38
39 #include "gegl-cpuaccel-private.h"
40
41
42 static GeglCpuAccelFlags cpu_accel (void) G_GNUC_CONST;
43
44
45 static gboolean use_cpu_accel = TRUE;
46
47
48 /**
49 * gegl_cpu_accel_get_support:
50 *
51 * Query for CPU acceleration support.
52 *
53 * Return value: #GeglCpuAccelFlags as supported by the CPU.
54 *
55 * Since: GEGL 2.4
56 */
57 GeglCpuAccelFlags
gegl_cpu_accel_get_support(void)58 gegl_cpu_accel_get_support (void)
59 {
60 return use_cpu_accel ? cpu_accel () : GEGL_CPU_ACCEL_NONE;
61 }
62
63 /**
64 * gegl_cpu_accel_set_use:
65 * @use: whether to use CPU acceleration features or not
66 *
67 * This function is for internal use only.
68 *
69 * Since: GEGL 2.4
70 */
71 void
gegl_cpu_accel_set_use(gboolean use)72 gegl_cpu_accel_set_use (gboolean use)
73 {
74 use_cpu_accel = use ? TRUE : FALSE;
75 }
76
77
78 #if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
79
80 #define HAVE_ACCEL 1
81
82
83 typedef enum
84 {
85 ARCH_X86_VENDOR_NONE,
86 ARCH_X86_VENDOR_INTEL,
87 ARCH_X86_VENDOR_AMD,
88 ARCH_X86_VENDOR_CENTAUR,
89 ARCH_X86_VENDOR_CYRIX,
90 ARCH_X86_VENDOR_NSC,
91 ARCH_X86_VENDOR_TRANSMETA,
92 ARCH_X86_VENDOR_NEXGEN,
93 ARCH_X86_VENDOR_RISE,
94 ARCH_X86_VENDOR_UMC,
95 ARCH_X86_VENDOR_SIS,
96 ARCH_X86_VENDOR_HYGON,
97 ARCH_X86_VENDOR_UNKNOWN = 0xff
98 } X86Vendor;
99
100 enum
101 {
102 ARCH_X86_INTEL_FEATURE_MMX = 1 << 23,
103 ARCH_X86_INTEL_FEATURE_XMM = 1 << 25,
104 ARCH_X86_INTEL_FEATURE_XMM2 = 1 << 26,
105
106 ARCH_X86_AMD_FEATURE_MMXEXT = 1 << 22,
107 ARCH_X86_AMD_FEATURE_3DNOW = 1 << 31,
108
109 ARCH_X86_CENTAUR_FEATURE_MMX = 1 << 23,
110 ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
111 ARCH_X86_CENTAUR_FEATURE_3DNOW = 1 << 31,
112
113 ARCH_X86_CYRIX_FEATURE_MMX = 1 << 23,
114 ARCH_X86_CYRIX_FEATURE_MMXEXT = 1 << 24
115 };
116
117 enum
118 {
119 ARCH_X86_INTEL_FEATURE_PNI = 1 << 0
120 };
121
122 #if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
123 #define cpuid(op,eax,ebx,ecx,edx) \
124 __asm__ ("movl %%ebx, %%esi\n\t" \
125 "cpuid\n\t" \
126 "xchgl %%ebx,%%esi" \
127 : "=a" (eax), \
128 "=S" (ebx), \
129 "=c" (ecx), \
130 "=d" (edx) \
131 : "0" (op))
132 #else
133 #define cpuid(op,eax,ebx,ecx,edx) \
134 __asm__ ("cpuid" \
135 : "=a" (eax), \
136 "=b" (ebx), \
137 "=c" (ecx), \
138 "=d" (edx) \
139 : "0" (op))
140 #endif
141
142
143 static X86Vendor
arch_get_vendor(void)144 arch_get_vendor (void)
145 {
146 guint32 eax, ebx, ecx, edx;
147 guint32 id32[4];
148 char *id = (char *) id32;
149
150 #ifndef ARCH_X86_64
151 /* Only need to check this on ia32 */
152 __asm__ ("pushfl\n\t"
153 "pushfl\n\t"
154 "popl %0\n\t"
155 "movl %0,%1\n\t"
156 "xorl $0x200000,%0\n\t"
157 "pushl %0\n\t"
158 "popfl\n\t"
159 "pushfl\n\t"
160 "popl %0\n\t"
161 "popfl"
162 : "=a" (eax),
163 "=c" (ecx)
164 :
165 : "cc");
166
167 if (eax == ecx)
168 return ARCH_X86_VENDOR_NONE;
169 #endif
170
171 cpuid (0, eax, ebx, ecx, edx);
172
173 if (eax == 0)
174 return ARCH_X86_VENDOR_NONE;
175
176 id32[0] = ebx;
177 id32[1] = edx;
178 id32[2] = ecx;
179
180 id[12] = '\0';
181
182 #ifdef ARCH_X86_64
183 if (strcmp (id, "AuthenticAMD") == 0)
184 return ARCH_X86_VENDOR_AMD;
185 else if (strcmp (id, "HygonGenuine") == 0)
186 return ARCH_X86_VENDOR_HYGON;
187 else if (strcmp (id, "GenuineIntel") == 0)
188 return ARCH_X86_VENDOR_INTEL;
189 #else
190 if (strcmp (id, "GenuineIntel") == 0)
191 return ARCH_X86_VENDOR_INTEL;
192 else if (strcmp (id, "AuthenticAMD") == 0)
193 return ARCH_X86_VENDOR_AMD;
194 else if (strcmp (id, "HygonGenuine") == 0)
195 return ARCH_X86_VENDOR_HYGON;
196 else if (strcmp (id, "CentaurHauls") == 0)
197 return ARCH_X86_VENDOR_CENTAUR;
198 else if (strcmp (id, "CyrixInstead") == 0)
199 return ARCH_X86_VENDOR_CYRIX;
200 else if (strcmp (id, "Geode by NSC") == 0)
201 return ARCH_X86_VENDOR_NSC;
202 else if (strcmp (id, "GenuineTMx86") == 0 ||
203 strcmp (id, "TransmetaCPU") == 0)
204 return ARCH_X86_VENDOR_TRANSMETA;
205 else if (strcmp (id, "NexGenDriven") == 0)
206 return ARCH_X86_VENDOR_NEXGEN;
207 else if (strcmp (id, "RiseRiseRise") == 0)
208 return ARCH_X86_VENDOR_RISE;
209 else if (strcmp (id, "UMC UMC UMC ") == 0)
210 return ARCH_X86_VENDOR_UMC;
211 else if (strcmp (id, "SiS SiS SiS ") == 0)
212 return ARCH_X86_VENDOR_SIS;
213 #endif
214
215 return ARCH_X86_VENDOR_UNKNOWN;
216 }
217
218 static guint32
arch_accel_intel(void)219 arch_accel_intel (void)
220 {
221 guint32 caps = 0;
222
223 #ifdef USE_MMX
224 {
225 guint32 eax, ebx, ecx, edx;
226
227 cpuid (1, eax, ebx, ecx, edx);
228
229 if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
230 return 0;
231
232 caps = GEGL_CPU_ACCEL_X86_MMX;
233
234 #ifdef USE_SSE
235 if (edx & ARCH_X86_INTEL_FEATURE_XMM)
236 caps |= GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_MMXEXT;
237
238 if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
239 caps |= GEGL_CPU_ACCEL_X86_SSE2;
240
241 if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
242 caps |= GEGL_CPU_ACCEL_X86_SSE3;
243 #endif /* USE_SSE */
244 }
245 #endif /* USE_MMX */
246
247 return caps;
248 }
249
250 static guint32
arch_accel_amd(void)251 arch_accel_amd (void)
252 {
253 guint32 caps;
254
255 caps = arch_accel_intel ();
256
257 #ifdef USE_MMX
258 {
259 guint32 eax, ebx, ecx, edx;
260
261 cpuid (0x80000000, eax, ebx, ecx, edx);
262
263 if (eax < 0x80000001)
264 return caps;
265
266 #ifdef USE_SSE
267 cpuid (0x80000001, eax, ebx, ecx, edx);
268
269 if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
270 caps |= GEGL_CPU_ACCEL_X86_3DNOW;
271
272 if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
273 caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
274 #endif /* USE_SSE */
275 }
276 #endif /* USE_MMX */
277
278 return caps;
279 }
280
281 static guint32
arch_accel_centaur(void)282 arch_accel_centaur (void)
283 {
284 guint32 caps;
285
286 caps = arch_accel_intel ();
287
288 #ifdef USE_MMX
289 {
290 guint32 eax, ebx, ecx, edx;
291
292 cpuid (0x80000000, eax, ebx, ecx, edx);
293
294 if (eax < 0x80000001)
295 return caps;
296
297 cpuid (0x80000001, eax, ebx, ecx, edx);
298
299 if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
300 caps |= GEGL_CPU_ACCEL_X86_MMX;
301
302 #ifdef USE_SSE
303 if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
304 caps |= GEGL_CPU_ACCEL_X86_3DNOW;
305
306 if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
307 caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
308 #endif /* USE_SSE */
309 }
310 #endif /* USE_MMX */
311
312 return caps;
313 }
314
315 static guint32
arch_accel_cyrix(void)316 arch_accel_cyrix (void)
317 {
318 guint32 caps;
319
320 caps = arch_accel_intel ();
321
322 #ifdef USE_MMX
323 {
324 guint32 eax, ebx, ecx, edx;
325
326 cpuid (0, eax, ebx, ecx, edx);
327
328 if (eax != 2)
329 return caps;
330
331 cpuid (0x80000001, eax, ebx, ecx, edx);
332
333 if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
334 caps |= GEGL_CPU_ACCEL_X86_MMX;
335
336 #ifdef USE_SSE
337 if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
338 caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
339 #endif /* USE_SSE */
340 }
341 #endif /* USE_MMX */
342
343 return caps;
344 }
345
346 #ifdef USE_SSE
347 static jmp_buf sigill_return;
348
349 static void
sigill_handler(gint n)350 sigill_handler (gint n)
351 {
352 longjmp (sigill_return, 1);
353 }
354
355 static gboolean
arch_accel_sse_os_support(void)356 arch_accel_sse_os_support (void)
357 {
358 if (setjmp (sigill_return))
359 {
360 return FALSE;
361 }
362 else
363 {
364 signal (SIGILL, sigill_handler);
365 __asm__ __volatile__ ("xorps %xmm0, %xmm0");
366 signal (SIGILL, SIG_DFL);
367 }
368
369 return TRUE;
370 }
371 #endif /* USE_SSE */
372
373 static guint32
arch_accel(void)374 arch_accel (void)
375 {
376 guint32 caps;
377 X86Vendor vendor;
378
379 vendor = arch_get_vendor ();
380
381 switch (vendor)
382 {
383 case ARCH_X86_VENDOR_NONE:
384 caps = 0;
385 break;
386
387 case ARCH_X86_VENDOR_AMD:
388 case ARCH_X86_VENDOR_HYGON:
389 caps = arch_accel_amd ();
390 break;
391
392 case ARCH_X86_VENDOR_CENTAUR:
393 caps = arch_accel_centaur ();
394 break;
395
396 case ARCH_X86_VENDOR_CYRIX:
397 case ARCH_X86_VENDOR_NSC:
398 caps = arch_accel_cyrix ();
399 break;
400
401 /* check for what Intel speced, even if UNKNOWN */
402 default:
403 caps = arch_accel_intel ();
404 break;
405 }
406
407 #ifdef USE_SSE
408 if ((caps & GEGL_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
409 caps &= ~(GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_SSE2);
410 #endif
411
412 return caps;
413 }
414
415 #endif /* ARCH_X86 && USE_MMX && __GNUC__ */
416
417
418 #if defined(ARCH_PPC) && defined (USE_ALTIVEC)
419
420 #if defined(HAVE_ALTIVEC_SYSCTL)
421
422 #include <sys/sysctl.h>
423
424 #define HAVE_ACCEL 1
425
426 static guint32
arch_accel(void)427 arch_accel (void)
428 {
429 gint sels[2] = { CTL_HW, HW_VECTORUNIT };
430 gboolean has_vu = FALSE;
431 gsize length = sizeof(has_vu);
432 gint err;
433
434 err = sysctl (sels, 2, &has_vu, &length, NULL, 0);
435
436 if (err == 0 && has_vu)
437 return GEGL_CPU_ACCEL_PPC_ALTIVEC;
438
439 return 0;
440 }
441
442 #elif defined(__GNUC__)
443
444 #define HAVE_ACCEL 1
445
446 static sigjmp_buf jmpbuf;
447 static volatile sig_atomic_t canjump = 0;
448
449 static void
sigill_handler(gint sig)450 sigill_handler (gint sig)
451 {
452 if (!canjump)
453 {
454 signal (sig, SIG_DFL);
455 raise (sig);
456 }
457
458 canjump = 0;
459 siglongjmp (jmpbuf, 1);
460 }
461
462 static guint32
arch_accel(void)463 arch_accel (void)
464 {
465 signal (SIGILL, sigill_handler);
466
467 if (sigsetjmp (jmpbuf, 1))
468 {
469 signal (SIGILL, SIG_DFL);
470 return 0;
471 }
472
473 canjump = 1;
474
475 asm volatile ("mtspr 256, %0\n\t"
476 "vand %%v0, %%v0, %%v0"
477 :
478 : "r" (-1));
479
480 signal (SIGILL, SIG_DFL);
481
482 return GEGL_CPU_ACCEL_PPC_ALTIVEC;
483 }
484 #endif /* __GNUC__ */
485
486 #endif /* ARCH_PPC && USE_ALTIVEC */
487
488
489 static GeglCpuAccelFlags
cpu_accel(void)490 cpu_accel (void)
491 {
492 #ifdef HAVE_ACCEL
493 static guint32 accel = ~0U;
494
495 if (accel != ~0U)
496 return accel;
497
498 accel = arch_accel ();
499
500 return (GeglCpuAccelFlags) accel;
501
502 #else /* !HAVE_ACCEL */
503 return GEGL_CPU_ACCEL_NONE;
504 #endif
505 }
506