1 /*
2 * accore.c -- core aclib functions
3 * Written by Andrew Church <achurch@achurch.org>
4 *
5 * This file is part of transcode, a video stream processing tool.
6 * transcode is free software, distributable under the terms of the GNU
7 * General Public License (version 2 or later). See the file COPYING
8 * for details.
9 */
10
11 #include "ac.h"
12 #include "ac_internal.h"
13 #include "imgconvert.h"
14
15 #include <stdio.h>
16 #include <string.h>
17
18 #if defined(ARCH_X86) || defined(ARCH_X86_64)
19 static int cpuinfo_x86(void);
20 #endif
21
22 /*************************************************************************/
23
24 /* Library initialization function. Determines CPU features, then calls
25 * all initialization subfunctions with appropriate flags. Returns 1 on
26 * success, 0 on failure. This function can be called multiple times to
27 * change the set of acceleration features to be used. */
28
ac_init(int accel)29 int ac_init(int accel)
30 {
31 accel &= ac_cpuinfo();
32 if (!ac_average_init(accel)
33 || !ac_imgconvert_init(accel)
34 || !ac_memcpy_init(accel)
35 || !ac_rescale_init(accel)
36 ) {
37 return 0;
38 }
39 return 1;
40 }
41
42 /*************************************************************************/
43
44 /* Returns the set of acceleration features supported by this CPU. */
45
ac_cpuinfo(void)46 int ac_cpuinfo(void)
47 {
48 #if defined(ARCH_X86) || defined(ARCH_X86_64)
49 return cpuinfo_x86();
50 #else
51 return 0;
52 #endif
53 }
54
55 /*************************************************************************/
56
57 /* Returns the endianness of this CPU (AC_BIG_ENDIAN or AC_LITTLE_ENDIAN). */
58
ac_endian(void)59 int ac_endian(void)
60 {
61 volatile int test;
62
63 test = 1;
64 if (*((uint8_t *)&test))
65 return AC_LITTLE_ENDIAN;
66 else
67 return AC_BIG_ENDIAN;
68 }
69
70 /*************************************************************************/
71
72 /* Utility routine to convert a set of flags to a descriptive string. The
73 * string is stored in a static buffer overwritten each call. `filter'
74 * selects whether to filter out flags not supported by the CPU. */
75
ac_flagstotext(int accel)76 const char *ac_flagstotext(int accel)
77 {
78 static char retbuf[1000];
79 if (!accel)
80 return "none";
81 snprintf(retbuf, sizeof(retbuf), "%s%s%s%s%s%s%s%s%s%s%s%s%s%s",
82 accel & AC_SSE5 ? " sse5" : "",
83 accel & AC_SSE4A ? " sse4a" : "",
84 accel & AC_SSE42 ? " sse42" : "",
85 accel & AC_SSE41 ? " sse41" : "",
86 accel & AC_SSSE3 ? " ssse3" : "",
87 accel & AC_SSE3 ? " sse3" : "",
88 accel & AC_SSE2 ? " sse2" : "",
89 accel & AC_SSE ? " sse" : "",
90 accel & AC_3DNOWEXT ? " 3dnowext" : "",
91 accel & AC_3DNOW ? " 3dnow" : "",
92 accel & AC_MMXEXT ? " mmxext" : "",
93 accel & AC_MMX ? " mmx" : "",
94 accel & AC_CMOVE ? " cmove" : "",
95 accel & (AC_IA32ASM|AC_AMD64ASM) ? " asm" : "");
96 return *retbuf ? retbuf+1 : retbuf; /* skip initial space */
97 }
98
99 /* Utility routine to parse a comma-separate descriptive string to the
100 corrisponding flag. The reverse of ac_flagstotext.
101 Returns 1 on success, 0 on failure */
102
103 #define AC_FLAG_LEN 16
104
ac_parseflags(const char * text,int * accel)105 int ac_parseflags(const char *text, int *accel)
106 {
107 int parsed = 1, done = 0;
108 if (!text || !accel)
109 return 0;
110 #if defined(ARCH_X86) || defined(ARCH_X86_64)
111 *accel = 0;
112
113 while (parsed && !done) {
114 char buf[AC_FLAG_LEN + 1] = { '\0' };
115 const char *comma = strchr(text, ',');
116 if (!comma) {
117 strncpy(buf, text, AC_FLAG_LEN);
118 done = 1;
119 } else {
120 /* parse the remaining and exit*/
121 size_t len = (comma - text);
122 if (len > AC_FLAG_LEN)
123 len = AC_FLAG_LEN;
124 strncpy(buf, text, len);
125 }
126 //fprintf(stderr, "(%s) buf=[%s]\n", __func__, buf);
127 if (strcasecmp(buf, "C") == 0) // dummy for "no accel"
128 *accel |= 0;
129 #ifdef ARCH_X86
130 else if (strcasecmp(buf, "asm" ) == 0)
131 *accel |= AC_IA32ASM;
132 #endif
133 #ifdef ARCH_X86_64
134 else if (strcasecmp(buf, "asm" ) == 0)
135 *accel |= AC_AMD64ASM;
136 #endif
137 else if (strcasecmp(buf, "mmx" ) == 0)
138 *accel |= AC_MMX;
139 else if (strcasecmp(buf, "mmxext" ) == 0)
140 *accel |= AC_MMXEXT;
141 else if (strcasecmp(buf, "3dnow" ) == 0)
142 *accel |= AC_3DNOW;
143 else if (strcasecmp(buf, "3dnowext") == 0)
144 *accel |= AC_3DNOWEXT;
145 else if (strcasecmp(buf, "sse" ) == 0)
146 *accel |= AC_SSE;
147 else if (strcasecmp(buf, "sse2" ) == 0)
148 *accel |= AC_SSE2;
149 else if (strcasecmp(buf, "sse3" ) == 0)
150 *accel |= AC_SSE3;
151 else if (strcasecmp(buf, "ssse3" ) == 0)
152 *accel |= AC_SSSE3;
153 else if (strcasecmp(buf, "sse41" ) == 0)
154 *accel |= AC_SSE41;
155 else if (strcasecmp(buf, "sse42" ) == 0)
156 *accel |= AC_SSE42;
157 else if (strcasecmp(buf, "sse4a" ) == 0)
158 *accel |= AC_SSE4A;
159 else if (strcasecmp(buf, "sse5" ) == 0)
160 *accel |= AC_SSE5;
161 else
162 parsed = 0;
163 text = comma + 1;
164 }
165 #endif
166 return parsed;
167 }
168
169 #undef AC_FLAG_LEN
170
171 /*************************************************************************/
172 /*************************************************************************/
173
174 /* Private functions to return acceleration flags corresponding to available
175 * CPU features for various CPUs. Currently only x86 is supported. */
176
177 /*************************************************************************/
178
179 #if defined(ARCH_X86) || defined(ARCH_X86_64)
180
181 #ifdef ARCH_X86_64
182 # define EAX "%%rax"
183 # define EBX "%%rbx"
184 # define ESI "%%rsi"
185 # define PUSHF "pushfq"
186 # define POPF "popfq"
187 #else
188 # define EAX "%%eax"
189 # define EBX "%%ebx"
190 # define ESI "%%esi"
191 # define PUSHF "pushfl"
192 # define POPF "popfl"
193 #endif
194
195 /* Macro to execute the CPUID instruction with EAX = func. Results are
196 * placed in ret_a (EAX), ret_b (EBX), ret_c (ECX), and ret_d (EDX), which
197 * must be lvalues. Note that we save and restore EBX (RBX on x86-64)
198 * because it is the PIC register. */
199 #define CPUID(func,ret_a,ret_b,ret_c,ret_d) \
200 asm("mov "EBX", "ESI"; cpuid; xchg "EBX", "ESI \
201 : "=a" (ret_a), "=S" (ret_b), "=c" (ret_c), "=d" (ret_d) \
202 : "a" (func))
203
204 /* Various CPUID flags. The second word of the macro name indicates the
205 * function (1: function 1, X1: function 0x80000001) and register (D: EDX)
206 * to which the value belongs. */
207 #define CPUID_1D_CMOVE (1UL<<15)
208 #define CPUID_1D_MMX (1UL<<23)
209 #define CPUID_1D_SSE (1UL<<25)
210 #define CPUID_1D_SSE2 (1UL<<26)
211 #define CPUID_1C_SSE3 (1UL<< 0)
212 #define CPUID_1C_SSSE3 (1UL<< 9)
213 #define CPUID_1C_SSE41 (1UL<<19)
214 #define CPUID_1C_SSE42 (1UL<<20)
215 #define CPUID_X1D_AMD_MMXEXT (1UL<<22) /* AMD only */
216 #define CPUID_X1D_AMD_3DNOW (1UL<<31) /* AMD only */
217 #define CPUID_X1D_AMD_3DNOWEXT (1UL<<30) /* AMD only */
218 #define CPUID_X1D_CYRIX_MMXEXT (1UL<<24) /* Cyrix only */
219 #define CPUID_X1C_AMD_SSE4A (1UL<< 6) /* AMD only */
220 #define CPUID_X1C_AMD_SSE5 (1UL<<11) /* AMD only */
221
cpuinfo_x86(void)222 static int cpuinfo_x86(void)
223 {
224 uint32_t eax, ebx, ecx, edx;
225 uint32_t cpuid_max, cpuid_ext_max; /* Maximum CPUID function numbers */
226 union {
227 char string[13];
228 struct { uint32_t ebx, edx, ecx; } regs;
229 } cpu_vendor; /* 12-byte CPU vendor string + trailing null */
230 uint32_t cpuid_1D, cpuid_1C, cpuid_X1C, cpuid_X1D;
231 int accel;
232
233 /* First see if the CPUID instruction is even available. We try to
234 * toggle bit 21 (ID) of the flags register; if the bit changes, then
235 * CPUID is available. */
236 asm(PUSHF" \n\
237 pop "EAX" \n\
238 mov %%eax, %%edx \n\
239 xor $0x200000, %%eax \n\
240 push "EAX" \n\
241 "POPF" \n\
242 "PUSHF" \n\
243 pop "EAX" \n\
244 xor %%edx, %%eax"
245 : "=a" (eax) : : "edx");
246 if (!eax)
247 return 0;
248
249 /* Determine the maximum function number available, and save the vendor
250 * string */
251 CPUID(0, cpuid_max, ebx, ecx, edx);
252 cpu_vendor.regs.ebx = ebx;
253 cpu_vendor.regs.ecx = ecx;
254 cpu_vendor.regs.edx = edx;
255 cpu_vendor.string[12] = 0;
256 cpuid_ext_max = 0; /* FIXME: how do early CPUs respond to 0x80000000? */
257 CPUID(0x80000000, cpuid_ext_max, ebx, ecx, edx);
258
259 /* Read available features */
260 cpuid_1D = cpuid_1C = cpuid_X1C = cpuid_X1D = 0;
261 if (cpuid_max >= 1)
262 CPUID(1, eax, ebx, cpuid_1C, cpuid_1D);
263 if (cpuid_ext_max >= 0x80000001)
264 CPUID(0x80000001, eax, ebx, cpuid_X1C, cpuid_X1D);
265
266 /* Convert to acceleration flags */
267 #ifdef ARCH_X86_64
268 accel = AC_AMD64ASM; /* but not IA32! (register size issues) */
269 #else
270 accel = AC_IA32ASM;
271 #endif
272 if (cpuid_1D & CPUID_1D_CMOVE)
273 accel |= AC_CMOVE;
274 if (cpuid_1D & CPUID_1D_MMX)
275 accel |= AC_MMX;
276 if (cpuid_1D & CPUID_1D_SSE)
277 accel |= AC_SSE;
278 if (cpuid_1D & CPUID_1D_SSE2)
279 accel |= AC_SSE2;
280 if (cpuid_1C & CPUID_1C_SSE3)
281 accel |= AC_SSE3;
282 if (cpuid_1C & CPUID_1C_SSSE3)
283 accel |= AC_SSSE3;
284 if (cpuid_1C & CPUID_1C_SSE41)
285 accel |= AC_SSE41;
286 if (cpuid_1C & CPUID_1C_SSE42)
287 accel |= AC_SSE42;
288 if (strcmp(cpu_vendor.string, "AuthenticAMD") == 0) {
289 if (cpuid_X1D & CPUID_X1D_AMD_MMXEXT)
290 accel |= AC_MMXEXT;
291 if (cpuid_X1D & CPUID_X1D_AMD_3DNOW)
292 accel |= AC_3DNOW;
293 if (cpuid_X1D & CPUID_X1D_AMD_3DNOWEXT)
294 accel |= AC_3DNOWEXT;
295 if (cpuid_X1C & CPUID_X1C_AMD_SSE4A)
296 accel |= AC_SSE4A;
297 if (cpuid_X1C & CPUID_X1C_AMD_SSE5)
298 accel |= AC_SSE5;
299 } else if (strcmp(cpu_vendor.string, "CyrixInstead") == 0) {
300 if (cpuid_X1D & CPUID_X1D_CYRIX_MMXEXT)
301 accel |= AC_MMXEXT;
302 }
303
304 /* And return */
305 return accel;
306 }
307
308 #endif /* ARCH_X86 || ARCH_X86_64 */
309
310 /*************************************************************************/
311
312 /*
313 * Local variables:
314 * c-file-style: "stroustrup"
315 * c-file-offsets: ((case-label . *) (statement-case-intro . *))
316 * indent-tabs-mode: nil
317 * End:
318 *
319 * vim: expandtab shiftwidth=4:
320 */
321