1 /* GEGL - The GEGL Library
2  * Copyright (C) 1995-1997 Peter Mattis and Spencer Kimball
3  *
4  * This library is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU Lesser General Public
6  * License as published by the Free Software Foundation; either
7  * version 3 of the License, or (at your option) any later version.
8  *
9  * This library is distributed in the hope that it will be useful,
10  * but WITHOUT ANY WARRANTY; without even the implied warranty of
11  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12  * Lesser General Public License for more details.
13  *
14  * You should have received a copy of the GNU Lesser General Public
15  * License along with this library; if not, write to the
16  * Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17  * Boston, MA 02110-1301, USA.
18  */
19 
20 /*
21  * x86 bits Copyright (C) Manish Singh <yosh@gimp.org>
22  */
23 
24 /*
25  * PPC CPU acceleration detection was taken from DirectFB but seems to be
26  * originating from mpeg2dec with the following copyright:
27  *
28  * Copyright (C) 1999-2001 Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
29  */
30 
31 #include "config.h"
32 
33 #include <string.h>
34 #include <signal.h>
35 #include <setjmp.h>
36 
37 #include <glib.h>
38 
39 #include "gegl-cpuaccel-private.h"
40 
41 
42 static GeglCpuAccelFlags  cpu_accel (void) G_GNUC_CONST;
43 
44 
45 static gboolean  use_cpu_accel = TRUE;
46 
47 
48 /**
49  * gegl_cpu_accel_get_support:
50  *
51  * Query for CPU acceleration support.
52  *
53  * Return value: #GeglCpuAccelFlags as supported by the CPU.
54  *
55  * Since: GEGL 2.4
56  */
57 GeglCpuAccelFlags
gegl_cpu_accel_get_support(void)58 gegl_cpu_accel_get_support (void)
59 {
60   return use_cpu_accel ? cpu_accel () : GEGL_CPU_ACCEL_NONE;
61 }
62 
63 /**
64  * gegl_cpu_accel_set_use:
65  * @use:  whether to use CPU acceleration features or not
66  *
67  * This function is for internal use only.
68  *
69  * Since: GEGL 2.4
70  */
71 void
gegl_cpu_accel_set_use(gboolean use)72 gegl_cpu_accel_set_use (gboolean use)
73 {
74   use_cpu_accel = use ? TRUE : FALSE;
75 }
76 
77 
78 #if defined(ARCH_X86) && defined(USE_MMX) && defined(__GNUC__)
79 
80 #define HAVE_ACCEL 1
81 
82 
83 typedef enum
84 {
85   ARCH_X86_VENDOR_NONE,
86   ARCH_X86_VENDOR_INTEL,
87   ARCH_X86_VENDOR_AMD,
88   ARCH_X86_VENDOR_CENTAUR,
89   ARCH_X86_VENDOR_CYRIX,
90   ARCH_X86_VENDOR_NSC,
91   ARCH_X86_VENDOR_TRANSMETA,
92   ARCH_X86_VENDOR_NEXGEN,
93   ARCH_X86_VENDOR_RISE,
94   ARCH_X86_VENDOR_UMC,
95   ARCH_X86_VENDOR_SIS,
96   ARCH_X86_VENDOR_HYGON,
97   ARCH_X86_VENDOR_UNKNOWN    = 0xff
98 } X86Vendor;
99 
100 enum
101 {
102   ARCH_X86_INTEL_FEATURE_MMX      = 1 << 23,
103   ARCH_X86_INTEL_FEATURE_XMM      = 1 << 25,
104   ARCH_X86_INTEL_FEATURE_XMM2     = 1 << 26,
105 
106   ARCH_X86_AMD_FEATURE_MMXEXT     = 1 << 22,
107   ARCH_X86_AMD_FEATURE_3DNOW      = 1 << 31,
108 
109   ARCH_X86_CENTAUR_FEATURE_MMX    = 1 << 23,
110   ARCH_X86_CENTAUR_FEATURE_MMXEXT = 1 << 24,
111   ARCH_X86_CENTAUR_FEATURE_3DNOW  = 1 << 31,
112 
113   ARCH_X86_CYRIX_FEATURE_MMX      = 1 << 23,
114   ARCH_X86_CYRIX_FEATURE_MMXEXT   = 1 << 24
115 };
116 
117 enum
118 {
119   ARCH_X86_INTEL_FEATURE_PNI      = 1 << 0
120 };
121 
122 #if !defined(ARCH_X86_64) && (defined(PIC) || defined(__PIC__))
123 #define cpuid(op,eax,ebx,ecx,edx)  \
124   __asm__ ("movl %%ebx, %%esi\n\t" \
125            "cpuid\n\t"             \
126            "xchgl %%ebx,%%esi"     \
127            : "=a" (eax),           \
128              "=S" (ebx),           \
129              "=c" (ecx),           \
130              "=d" (edx)            \
131            : "0" (op))
132 #else
133 #define cpuid(op,eax,ebx,ecx,edx)  \
134   __asm__ ("cpuid"                 \
135            : "=a" (eax),           \
136              "=b" (ebx),           \
137              "=c" (ecx),           \
138              "=d" (edx)            \
139            : "0" (op))
140 #endif
141 
142 
143 static X86Vendor
arch_get_vendor(void)144 arch_get_vendor (void)
145 {
146   guint32 eax, ebx, ecx, edx;
147   guint32 id32[4];
148   char *id = (char *) id32;
149 
150 #ifndef ARCH_X86_64
151   /* Only need to check this on ia32 */
152   __asm__ ("pushfl\n\t"
153            "pushfl\n\t"
154            "popl %0\n\t"
155            "movl %0,%1\n\t"
156            "xorl $0x200000,%0\n\t"
157            "pushl %0\n\t"
158            "popfl\n\t"
159            "pushfl\n\t"
160            "popl %0\n\t"
161            "popfl"
162            : "=a" (eax),
163              "=c" (ecx)
164            :
165            : "cc");
166 
167   if (eax == ecx)
168     return ARCH_X86_VENDOR_NONE;
169 #endif
170 
171   cpuid (0, eax, ebx, ecx, edx);
172 
173   if (eax == 0)
174     return ARCH_X86_VENDOR_NONE;
175 
176   id32[0] = ebx;
177   id32[1] = edx;
178   id32[2] = ecx;
179 
180   id[12] = '\0';
181 
182 #ifdef ARCH_X86_64
183   if (strcmp (id, "AuthenticAMD") == 0)
184     return ARCH_X86_VENDOR_AMD;
185   else if (strcmp (id, "HygonGenuine") == 0)
186     return ARCH_X86_VENDOR_HYGON;
187   else if (strcmp (id, "GenuineIntel") == 0)
188     return ARCH_X86_VENDOR_INTEL;
189 #else
190   if (strcmp (id, "GenuineIntel") == 0)
191     return ARCH_X86_VENDOR_INTEL;
192   else if (strcmp (id, "AuthenticAMD") == 0)
193     return ARCH_X86_VENDOR_AMD;
194   else if (strcmp (id, "HygonGenuine") == 0)
195     return ARCH_X86_VENDOR_HYGON;
196   else if (strcmp (id, "CentaurHauls") == 0)
197     return ARCH_X86_VENDOR_CENTAUR;
198   else if (strcmp (id, "CyrixInstead") == 0)
199     return ARCH_X86_VENDOR_CYRIX;
200   else if (strcmp (id, "Geode by NSC") == 0)
201     return ARCH_X86_VENDOR_NSC;
202   else if (strcmp (id, "GenuineTMx86") == 0 ||
203            strcmp (id, "TransmetaCPU") == 0)
204     return ARCH_X86_VENDOR_TRANSMETA;
205   else if (strcmp (id, "NexGenDriven") == 0)
206     return ARCH_X86_VENDOR_NEXGEN;
207   else if (strcmp (id, "RiseRiseRise") == 0)
208     return ARCH_X86_VENDOR_RISE;
209   else if (strcmp (id, "UMC UMC UMC ") == 0)
210     return ARCH_X86_VENDOR_UMC;
211   else if (strcmp (id, "SiS SiS SiS ") == 0)
212     return ARCH_X86_VENDOR_SIS;
213 #endif
214 
215   return ARCH_X86_VENDOR_UNKNOWN;
216 }
217 
218 static guint32
arch_accel_intel(void)219 arch_accel_intel (void)
220 {
221   guint32 caps = 0;
222 
223 #ifdef USE_MMX
224   {
225     guint32 eax, ebx, ecx, edx;
226 
227     cpuid (1, eax, ebx, ecx, edx);
228 
229     if ((edx & ARCH_X86_INTEL_FEATURE_MMX) == 0)
230       return 0;
231 
232     caps = GEGL_CPU_ACCEL_X86_MMX;
233 
234 #ifdef USE_SSE
235     if (edx & ARCH_X86_INTEL_FEATURE_XMM)
236       caps |= GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_MMXEXT;
237 
238     if (edx & ARCH_X86_INTEL_FEATURE_XMM2)
239       caps |= GEGL_CPU_ACCEL_X86_SSE2;
240 
241     if (ecx & ARCH_X86_INTEL_FEATURE_PNI)
242       caps |= GEGL_CPU_ACCEL_X86_SSE3;
243 #endif /* USE_SSE */
244   }
245 #endif /* USE_MMX */
246 
247   return caps;
248 }
249 
250 static guint32
arch_accel_amd(void)251 arch_accel_amd (void)
252 {
253   guint32 caps;
254 
255   caps = arch_accel_intel ();
256 
257 #ifdef USE_MMX
258   {
259     guint32 eax, ebx, ecx, edx;
260 
261     cpuid (0x80000000, eax, ebx, ecx, edx);
262 
263     if (eax < 0x80000001)
264       return caps;
265 
266 #ifdef USE_SSE
267     cpuid (0x80000001, eax, ebx, ecx, edx);
268 
269     if (edx & ARCH_X86_AMD_FEATURE_3DNOW)
270       caps |= GEGL_CPU_ACCEL_X86_3DNOW;
271 
272     if (edx & ARCH_X86_AMD_FEATURE_MMXEXT)
273       caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
274 #endif /* USE_SSE */
275   }
276 #endif /* USE_MMX */
277 
278   return caps;
279 }
280 
281 static guint32
arch_accel_centaur(void)282 arch_accel_centaur (void)
283 {
284   guint32 caps;
285 
286   caps = arch_accel_intel ();
287 
288 #ifdef USE_MMX
289   {
290     guint32 eax, ebx, ecx, edx;
291 
292     cpuid (0x80000000, eax, ebx, ecx, edx);
293 
294     if (eax < 0x80000001)
295       return caps;
296 
297     cpuid (0x80000001, eax, ebx, ecx, edx);
298 
299     if (edx & ARCH_X86_CENTAUR_FEATURE_MMX)
300       caps |= GEGL_CPU_ACCEL_X86_MMX;
301 
302 #ifdef USE_SSE
303     if (edx & ARCH_X86_CENTAUR_FEATURE_3DNOW)
304       caps |= GEGL_CPU_ACCEL_X86_3DNOW;
305 
306     if (edx & ARCH_X86_CENTAUR_FEATURE_MMXEXT)
307       caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
308 #endif /* USE_SSE */
309   }
310 #endif /* USE_MMX */
311 
312   return caps;
313 }
314 
315 static guint32
arch_accel_cyrix(void)316 arch_accel_cyrix (void)
317 {
318   guint32 caps;
319 
320   caps = arch_accel_intel ();
321 
322 #ifdef USE_MMX
323   {
324     guint32 eax, ebx, ecx, edx;
325 
326     cpuid (0, eax, ebx, ecx, edx);
327 
328     if (eax != 2)
329       return caps;
330 
331     cpuid (0x80000001, eax, ebx, ecx, edx);
332 
333     if (edx & ARCH_X86_CYRIX_FEATURE_MMX)
334       caps |= GEGL_CPU_ACCEL_X86_MMX;
335 
336 #ifdef USE_SSE
337     if (edx & ARCH_X86_CYRIX_FEATURE_MMXEXT)
338       caps |= GEGL_CPU_ACCEL_X86_MMXEXT;
339 #endif /* USE_SSE */
340   }
341 #endif /* USE_MMX */
342 
343   return caps;
344 }
345 
346 #ifdef USE_SSE
347 static jmp_buf sigill_return;
348 
349 static void
sigill_handler(gint n)350 sigill_handler (gint n)
351 {
352   longjmp (sigill_return, 1);
353 }
354 
355 static gboolean
arch_accel_sse_os_support(void)356 arch_accel_sse_os_support (void)
357 {
358   if (setjmp (sigill_return))
359     {
360       return FALSE;
361     }
362   else
363     {
364       signal (SIGILL, sigill_handler);
365       __asm__ __volatile__ ("xorps %xmm0, %xmm0");
366       signal (SIGILL, SIG_DFL);
367     }
368 
369   return TRUE;
370 }
371 #endif /* USE_SSE */
372 
373 static guint32
arch_accel(void)374 arch_accel (void)
375 {
376   guint32 caps;
377   X86Vendor vendor;
378 
379   vendor = arch_get_vendor ();
380 
381   switch (vendor)
382     {
383     case ARCH_X86_VENDOR_NONE:
384       caps = 0;
385       break;
386 
387     case ARCH_X86_VENDOR_AMD:
388     case ARCH_X86_VENDOR_HYGON:
389       caps = arch_accel_amd ();
390       break;
391 
392     case ARCH_X86_VENDOR_CENTAUR:
393       caps = arch_accel_centaur ();
394       break;
395 
396     case ARCH_X86_VENDOR_CYRIX:
397     case ARCH_X86_VENDOR_NSC:
398       caps = arch_accel_cyrix ();
399       break;
400 
401     /* check for what Intel speced, even if UNKNOWN */
402     default:
403       caps = arch_accel_intel ();
404       break;
405     }
406 
407 #ifdef USE_SSE
408   if ((caps & GEGL_CPU_ACCEL_X86_SSE) && !arch_accel_sse_os_support ())
409     caps &= ~(GEGL_CPU_ACCEL_X86_SSE | GEGL_CPU_ACCEL_X86_SSE2);
410 #endif
411 
412   return caps;
413 }
414 
415 #endif /* ARCH_X86 && USE_MMX && __GNUC__ */
416 
417 
418 #if defined(ARCH_PPC) && defined (USE_ALTIVEC)
419 
420 #if defined(HAVE_ALTIVEC_SYSCTL)
421 
422 #include <sys/sysctl.h>
423 
424 #define HAVE_ACCEL 1
425 
426 static guint32
arch_accel(void)427 arch_accel (void)
428 {
429   gint     sels[2] = { CTL_HW, HW_VECTORUNIT };
430   gboolean has_vu  = FALSE;
431   gsize    length  = sizeof(has_vu);
432   gint     err;
433 
434   err = sysctl (sels, 2, &has_vu, &length, NULL, 0);
435 
436   if (err == 0 && has_vu)
437     return GEGL_CPU_ACCEL_PPC_ALTIVEC;
438 
439   return 0;
440 }
441 
442 #elif defined(__GNUC__)
443 
444 #define HAVE_ACCEL 1
445 
446 static          sigjmp_buf   jmpbuf;
447 static volatile sig_atomic_t canjump = 0;
448 
449 static void
sigill_handler(gint sig)450 sigill_handler (gint sig)
451 {
452   if (!canjump)
453     {
454       signal (sig, SIG_DFL);
455       raise (sig);
456     }
457 
458   canjump = 0;
459   siglongjmp (jmpbuf, 1);
460 }
461 
462 static guint32
arch_accel(void)463 arch_accel (void)
464 {
465   signal (SIGILL, sigill_handler);
466 
467   if (sigsetjmp (jmpbuf, 1))
468     {
469       signal (SIGILL, SIG_DFL);
470       return 0;
471     }
472 
473   canjump = 1;
474 
475   asm volatile ("mtspr 256, %0\n\t"
476                 "vand %%v0, %%v0, %%v0"
477                 :
478                 : "r" (-1));
479 
480   signal (SIGILL, SIG_DFL);
481 
482   return GEGL_CPU_ACCEL_PPC_ALTIVEC;
483 }
484 #endif /* __GNUC__ */
485 
486 #endif /* ARCH_PPC && USE_ALTIVEC */
487 
488 
489 static GeglCpuAccelFlags
cpu_accel(void)490 cpu_accel (void)
491 {
492 #ifdef HAVE_ACCEL
493   static guint32 accel = ~0U;
494 
495   if (accel != ~0U)
496     return accel;
497 
498   accel = arch_accel ();
499 
500   return (GeglCpuAccelFlags) accel;
501 
502 #else /* !HAVE_ACCEL */
503   return GEGL_CPU_ACCEL_NONE;
504 #endif
505 }
506