1 /*
2  * accore.c -- core aclib functions
3  * Written by Andrew Church <achurch@achurch.org>
4  *
5  * This file is part of transcode, a video stream processing tool.
6  * transcode is free software, distributable under the terms of the GNU
7  * General Public License (version 2 or later).  See the file COPYING
8  * for details.
9  */
10 
11 #include "ac.h"
12 #include "ac_internal.h"
13 #include "imgconvert.h"
14 
15 #include <stdio.h>
16 #include <string.h>
17 
18 #if defined(ARCH_X86) || defined(ARCH_X86_64)
19 static int cpuinfo_x86(void);
20 #endif
21 
22 /*************************************************************************/
23 
24 /* Library initialization function.  Determines CPU features, then calls
25  * all initialization subfunctions with appropriate flags.  Returns 1 on
26  * success, 0 on failure.  This function can be called multiple times to
27  * change the set of acceleration features to be used. */
28 
ac_init(int accel)29 int ac_init(int accel)
30 {
31     accel &= ac_cpuinfo();
32     if (!ac_average_init(accel)
33      || !ac_imgconvert_init(accel)
34      || !ac_memcpy_init(accel)
35      || !ac_rescale_init(accel)
36     ) {
37         return 0;
38     }
39     return 1;
40 }
41 
42 /*************************************************************************/
43 
44 /* Returns the set of acceleration features supported by this CPU. */
45 
ac_cpuinfo(void)46 int ac_cpuinfo(void)
47 {
48 #if defined(ARCH_X86) || defined(ARCH_X86_64)
49     return cpuinfo_x86();
50 #else
51     return 0;
52 #endif
53 }
54 
55 /*************************************************************************/
56 
57 /* Returns the endianness of this CPU (AC_BIG_ENDIAN or AC_LITTLE_ENDIAN). */
58 
ac_endian(void)59 int ac_endian(void)
60 {
61     volatile int test;
62 
63     test = 1;
64     if (*((uint8_t *)&test))
65         return AC_LITTLE_ENDIAN;
66     else
67         return AC_BIG_ENDIAN;
68 }
69 
70 /*************************************************************************/
71 
72 /* Utility routine to convert a set of flags to a descriptive string.  The
73  * string is stored in a static buffer overwritten each call.  `filter'
74  * selects whether to filter out flags not supported by the CPU. */
75 
ac_flagstotext(int accel)76 const char *ac_flagstotext(int accel)
77 {
78     static char retbuf[1000];
79     if (!accel)
80         return "none";
81     snprintf(retbuf, sizeof(retbuf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
82              accel & AC_SSE5                  ? " sse5"     : "",
83              accel & AC_SSE4A                 ? " sse4a"    : "",
84              accel & AC_SSE42                 ? " sse42"    : "",
85              accel & AC_SSE41                 ? " sse41"    : "",
86              accel & AC_SSSE3                 ? " ssse3"    : "",
87              accel & AC_SSE3                  ? " sse3"     : "",
88              accel & AC_SSE2                  ? " sse2"     : "",
89              accel & AC_SSE                   ? " sse"      : "",
90              accel & AC_3DNOWEXT              ? " 3dnowext" : "",
91              accel & AC_3DNOW                 ? " 3dnow"    : "",
92              accel & AC_MMXEXT                ? " mmxext"   : "",
93              accel & AC_MMX                   ? " mmx"      : "",
94              accel & AC_CMOVE                 ? " cmove"    : "",
95              accel & (AC_IA32ASM|AC_AMD64ASM) ? " asm"      : "");
96     return *retbuf ? retbuf+1 : retbuf;  /* skip initial space */
97 }
98 
99 /* Utility routine to parse a comma-separate descriptive string to the
100    corrisponding flag. The reverse of ac_flagstotext.
101    Returns 1 on success, 0 on failure */
102 
103 #define AC_FLAG_LEN     16
104 
ac_parseflags(const char * text,int * accel)105 int ac_parseflags(const char *text, int *accel)
106 {
107     int parsed = 1, done = 0;
108     if (!text || !accel)
109         return 0;
110 #if defined(ARCH_X86) || defined(ARCH_X86_64)
111     *accel = 0;
112 
113     while (parsed && !done) {
114         char buf[AC_FLAG_LEN + 1] = { '\0' };
115         const char *comma = strchr(text, ',');
116         if (!comma) {
117             strncpy(buf, text, AC_FLAG_LEN);
118             done = 1;
119         } else {
120             /* parse the remaining and exit*/
121             size_t len = (comma - text);
122             if (len > AC_FLAG_LEN)
123                 len = AC_FLAG_LEN;
124             strncpy(buf, text, len);
125         }
126 //fprintf(stderr, "(%s) buf=[%s]\n", __func__, buf);
127         if (strcasecmp(buf, "C") == 0)  // dummy for "no accel"
128             *accel |= 0;
129 #ifdef ARCH_X86
130         else if (strcasecmp(buf, "asm"     ) == 0)
131             *accel |= AC_IA32ASM;
132 #endif
133 #ifdef ARCH_X86_64
134         else if (strcasecmp(buf, "asm"     ) == 0)
135             *accel |= AC_AMD64ASM;
136 #endif
137         else if (strcasecmp(buf, "mmx"     ) == 0)
138             *accel |= AC_MMX;
139         else if (strcasecmp(buf, "mmxext"  ) == 0)
140             *accel |= AC_MMXEXT;
141         else if (strcasecmp(buf, "3dnow"   ) == 0)
142             *accel |= AC_3DNOW;
143         else if (strcasecmp(buf, "3dnowext") == 0)
144             *accel |= AC_3DNOWEXT;
145         else if (strcasecmp(buf, "sse"     ) == 0)
146             *accel |= AC_SSE;
147         else if (strcasecmp(buf, "sse2"    ) == 0)
148             *accel |= AC_SSE2;
149         else if (strcasecmp(buf, "sse3"    ) == 0)
150             *accel |= AC_SSE3;
151         else if (strcasecmp(buf, "ssse3"   ) == 0)
152             *accel |= AC_SSSE3;
153         else if (strcasecmp(buf, "sse41"   ) == 0)
154             *accel |= AC_SSE41;
155         else if (strcasecmp(buf, "sse42"   ) == 0)
156             *accel |= AC_SSE42;
157         else if (strcasecmp(buf, "sse4a"   ) == 0)
158             *accel |= AC_SSE4A;
159         else if (strcasecmp(buf, "sse5"    ) == 0)
160             *accel |= AC_SSE5;
161         else
162             parsed = 0;
163         text = comma + 1;
164     }
165 #endif
166     return parsed;
167 }
168 
169 #undef AC_FLAG_LEN
170 
171 /*************************************************************************/
172 /*************************************************************************/
173 
174 /* Private functions to return acceleration flags corresponding to available
175  * CPU features for various CPUs.  Currently only x86 is supported. */
176 
177 /*************************************************************************/
178 
179 #if defined(ARCH_X86) || defined(ARCH_X86_64)
180 
181 #ifdef ARCH_X86_64
182 # define EAX "%%rax"
183 # define EBX "%%rbx"
184 # define ESI "%%rsi"
185 # define PUSHF "pushfq"
186 # define POPF "popfq"
187 #else
188 # define EAX "%%eax"
189 # define EBX "%%ebx"
190 # define ESI "%%esi"
191 # define PUSHF "pushfl"
192 # define POPF "popfl"
193 #endif
194 
195 /* Macro to execute the CPUID instruction with EAX = func.  Results are
196  * placed in ret_a (EAX), ret_b (EBX), ret_c (ECX), and ret_d (EDX), which
197  * must be lvalues.  Note that we save and restore EBX (RBX on x86-64)
198  * because it is the PIC register. */
199 #define CPUID(func,ret_a,ret_b,ret_c,ret_d)                             \
200     asm("mov "EBX", "ESI"; cpuid; xchg "EBX", "ESI                      \
201         : "=a" (ret_a), "=S" (ret_b), "=c" (ret_c), "=d" (ret_d)        \
202         : "a" (func))
203 
204 /* Various CPUID flags.  The second word of the macro name indicates the
205  * function (1: function 1, X1: function 0x80000001) and register (D: EDX)
206  * to which the value belongs. */
207 #define CPUID_1D_CMOVE          (1UL<<15)
208 #define CPUID_1D_MMX            (1UL<<23)
209 #define CPUID_1D_SSE            (1UL<<25)
210 #define CPUID_1D_SSE2           (1UL<<26)
211 #define CPUID_1C_SSE3           (1UL<< 0)
212 #define CPUID_1C_SSSE3          (1UL<< 9)
213 #define CPUID_1C_SSE41          (1UL<<19)
214 #define CPUID_1C_SSE42          (1UL<<20)
215 #define CPUID_X1D_AMD_MMXEXT    (1UL<<22)  /* AMD only */
216 #define CPUID_X1D_AMD_3DNOW     (1UL<<31)  /* AMD only */
217 #define CPUID_X1D_AMD_3DNOWEXT  (1UL<<30)  /* AMD only */
218 #define CPUID_X1D_CYRIX_MMXEXT  (1UL<<24)  /* Cyrix only */
219 #define CPUID_X1C_AMD_SSE4A     (1UL<< 6)  /* AMD only */
220 #define CPUID_X1C_AMD_SSE5      (1UL<<11)  /* AMD only */
221 
cpuinfo_x86(void)222 static int cpuinfo_x86(void)
223 {
224     uint32_t eax, ebx, ecx, edx;
225     uint32_t cpuid_max, cpuid_ext_max;  /* Maximum CPUID function numbers */
226     union {
227         char string[13];
228         struct { uint32_t ebx, edx, ecx; } regs;
229     } cpu_vendor;  /* 12-byte CPU vendor string + trailing null */
230     uint32_t cpuid_1D, cpuid_1C, cpuid_X1C, cpuid_X1D;
231     int accel;
232 
233     /* First see if the CPUID instruction is even available.  We try to
234      * toggle bit 21 (ID) of the flags register; if the bit changes, then
235      * CPUID is available. */
236     asm(PUSHF"                  \n\
237         pop "EAX"               \n\
238         mov %%eax, %%edx        \n\
239         xor $0x200000, %%eax    \n\
240         push "EAX"              \n\
241         "POPF"                  \n\
242         "PUSHF"                 \n\
243         pop "EAX"               \n\
244         xor %%edx, %%eax"
245         : "=a" (eax) : : "edx");
246     if (!eax)
247         return 0;
248 
249     /* Determine the maximum function number available, and save the vendor
250      * string */
251     CPUID(0, cpuid_max, ebx, ecx, edx);
252     cpu_vendor.regs.ebx = ebx;
253     cpu_vendor.regs.ecx = ecx;
254     cpu_vendor.regs.edx = edx;
255     cpu_vendor.string[12] = 0;
256     cpuid_ext_max = 0;  /* FIXME: how do early CPUs respond to 0x80000000? */
257     CPUID(0x80000000, cpuid_ext_max, ebx, ecx, edx);
258 
259     /* Read available features */
260     cpuid_1D = cpuid_1C = cpuid_X1C = cpuid_X1D = 0;
261     if (cpuid_max >= 1)
262         CPUID(1, eax, ebx, cpuid_1C, cpuid_1D);
263     if (cpuid_ext_max >= 0x80000001)
264         CPUID(0x80000001, eax, ebx, cpuid_X1C, cpuid_X1D);
265 
266     /* Convert to acceleration flags */
267 #ifdef ARCH_X86_64
268     accel = AC_AMD64ASM;  /* but not IA32! (register size issues) */
269 #else
270     accel = AC_IA32ASM;
271 #endif
272     if (cpuid_1D & CPUID_1D_CMOVE)
273         accel |= AC_CMOVE;
274     if (cpuid_1D & CPUID_1D_MMX)
275         accel |= AC_MMX;
276     if (cpuid_1D & CPUID_1D_SSE)
277         accel |= AC_SSE;
278     if (cpuid_1D & CPUID_1D_SSE2)
279         accel |= AC_SSE2;
280     if (cpuid_1C & CPUID_1C_SSE3)
281         accel |= AC_SSE3;
282     if (cpuid_1C & CPUID_1C_SSSE3)
283         accel |= AC_SSSE3;
284     if (cpuid_1C & CPUID_1C_SSE41)
285         accel |= AC_SSE41;
286     if (cpuid_1C & CPUID_1C_SSE42)
287         accel |= AC_SSE42;
288     if (strcmp(cpu_vendor.string, "AuthenticAMD") == 0) {
289         if (cpuid_X1D & CPUID_X1D_AMD_MMXEXT)
290             accel |= AC_MMXEXT;
291         if (cpuid_X1D & CPUID_X1D_AMD_3DNOW)
292             accel |= AC_3DNOW;
293         if (cpuid_X1D & CPUID_X1D_AMD_3DNOWEXT)
294             accel |= AC_3DNOWEXT;
295         if (cpuid_X1C & CPUID_X1C_AMD_SSE4A)
296             accel |= AC_SSE4A;
297         if (cpuid_X1C & CPUID_X1C_AMD_SSE5)
298             accel |= AC_SSE5;
299     } else if (strcmp(cpu_vendor.string, "CyrixInstead") == 0) {
300         if (cpuid_X1D & CPUID_X1D_CYRIX_MMXEXT)
301             accel |= AC_MMXEXT;
302     }
303 
304     /* And return */
305     return accel;
306 }
307 
308 #endif  /* ARCH_X86 || ARCH_X86_64 */
309 
310 /*************************************************************************/
311 
312 /*
313  * Local variables:
314  *   c-file-style: "stroustrup"
315  *   c-file-offsets: ((case-label . *) (statement-case-intro . *))
316  *   indent-tabs-mode: nil
317  * End:
318  *
319  * vim: expandtab shiftwidth=4:
320  */
321