1 /*
2  * x86.cpp
3  *
4  *  Created on: 28 февр. 2020 г.
5  *      Author: Vladimir Sadovnikov <lsp.plugin@gmail.com>
6  *
7  * This file is part of tamgamp.lv2 <https://github.com/sadko4u/tamgamp.lv2>.
8  *
9  * This program is free software; you can redistribute it and/or
10  * modify it under the terms of the GNU Lesser General Public
11  * License as published by the Free Software Foundation; either
12  * version 3 of the License, or (at your option) any later version.
13  *
14  * This program is distributed in the hope that it will be useful,
15  * but WITHOUT ANY WARRANTY; without even the implied warranty of
16  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
17  * Lesser General Public License for more details.
18  *
19  * You should have received a copy of the GNU Lesser General Public License
20  * along with this program; if not, write to the Free Software Foundation,
21  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
22  */
23 
24 #include <dsp/dsp.h>
25 
26 #ifdef ARCH_X86
27 
28 #include <string.h>
29 #include <stdlib.h>
30 #include <stdio.h>
31 
32 #include <dsp/arch/x86/features.h>
33 #include <dsp/arch/x86/cpuid.h>
34 #include <dsp/arch/x86/fpu.h>
35 
36 namespace sse
37 {
38     extern void dsp_init(const x86::cpu_features_t *f);
39 }
40 
41 namespace avx
42 {
43     extern void dsp_init(const x86::cpu_features_t *f);
44 }
45 
46 namespace x86
47 {
48 #pragma pack(push, 1)
49     typedef union vendor_sig_t
50     {
51         char    sig[12];
52         struct {
53             uint32_t    ebx, edx, ecx;
54         } reg;
55     } vendor_sig_t;
56 #pragma pack(pop)
57 
58     typedef struct cpu_vendor_id_t
59     {
60         const char *signature;
61         size_t      vendor_id;
62     } vendors_t;
63 
64 
65     static const cpu_vendor_id_t cpu_vendor_ids[] =
66     {
67         { "AMDisbetter!", CPU_VENDOR_AMD },
68         { "AuthenticAMD", CPU_VENDOR_AMD },
69         { "CentaurHauls", CPU_VENDOR_VIA },
70         { "Geode by NSC", CPU_VENDOR_NSC },
71         { "GenuineIntel", CPU_VENDOR_INTEL },
72         { "GenuineTMx86", CPU_VENDOR_TRANSMETA },
73         { "HygonGenuine", CPU_VENDOR_HYGON },
74         { "TransmetaCPU", CPU_VENDOR_TRANSMETA },
75         { "VIA VIA VIA ", CPU_VENDOR_VIA }
76     };
77 
read_brand_string(cpuid_info_t * info,uint32_t max_ext_cpuid,char * brand)78     void read_brand_string(cpuid_info_t *info, uint32_t max_ext_cpuid, char *brand)
79     {
80         // FUNCTION 0x80000002 - 0x80000004
81         if (max_ext_cpuid < 0x80000004)
82         {
83             strcpy(brand, "Generic " ARCH_STRING " processor");
84             return;
85         }
86 
87         uint32_t *dst = reinterpret_cast<uint32_t *>(brand);
88         for (size_t i=0x80000002; i<=0x80000004; ++i)
89         {
90             cpuid(info, i, 0);
91             *(dst++)    = info->eax;
92             *(dst++)    = info->ebx;
93             *(dst++)    = info->ecx;
94             *(dst++)    = info->edx;
95         }
96         *dst        = 0;
97 
98         // Cut the end of the string if there are spaces
99         char *end   = &brand[3 * 16 - 1];
100         while ((end >= brand) && (((*end) == ' ') || ((*end) == '\0')))
101             *(end--) = '\0';
102         char *start = brand;
103         while ((start < end) && ((*start) == ' '))
104             start++;
105         if (start > brand)
106             memmove(brand, start, end - start + 1);
107     }
108 
do_intel_cpuid(cpu_features_t * f,size_t max_cpuid,size_t max_ext_cpuid)109     void do_intel_cpuid(cpu_features_t *f, size_t max_cpuid, size_t max_ext_cpuid)
110     {
111         cpuid_info_t info;
112         uint64_t xcr0 = 0;
113 
114         // FUNCTION 1
115         if (max_cpuid >= 1)
116         {
117             cpuid(&info, 1, 0);
118 
119             if (info.edx & X86_CPUID1_INTEL_EDX_FPU)
120                 f->features     |= CPU_OPTION_FPU;
121             if (info.edx & X86_CPUID1_INTEL_EDX_CMOV)
122                 f->features     |= CPU_OPTION_CMOV;
123             if (info.edx & X86_CPUID1_INTEL_EDX_MMX)
124                 f->features     |= CPU_OPTION_MMX;
125             if (info.edx & X86_CPUID1_INTEL_EDX_SSE)
126                 f->features     |= CPU_OPTION_SSE;
127             if (info.edx & X86_CPUID1_INTEL_EDX_SSE2)
128                 f->features     |= CPU_OPTION_SSE2;
129 
130             if (info.ecx & X86_CPUID1_INTEL_ECX_SSE3)
131                 f->features     |= CPU_OPTION_SSE3;
132             if (info.ecx & X86_CPUID1_INTEL_ECX_SSSE3)
133                 f->features     |= CPU_OPTION_SSSE3;
134             if (info.ecx & X86_CPUID1_INTEL_ECX_SSE4_1)
135                 f->features     |= CPU_OPTION_SSE4_1;
136             if (info.ecx & X86_CPUID1_INTEL_ECX_SSE4_2)
137                 f->features     |= CPU_OPTION_SSE4_2;
138             if (info.ecx & X86_CPUID1_INTEL_ECX_XSAVE)
139                 f->features     |= CPU_OPTION_FXSAVE;
140             if (info.ecx & X86_CPUID1_INTEL_ECX_OSXSAVE)
141             {
142                 f->features     |= CPU_OPTION_OSXSAVE;
143 
144                 xcr0         = read_xcr(0);
145 
146                 // Additional check for AVX support
147                 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX)
148                 {
149                     if (info.ecx & X86_CPUID1_INTEL_ECX_FMA3)
150                         f->features     |= CPU_OPTION_FMA3;
151                     if (info.ecx & X86_CPUID1_INTEL_ECX_AVX)
152                         f->features     |= CPU_OPTION_AVX;
153                 }
154             }
155         }
156 
157         // FUNCTION 7
158         if (max_cpuid >= 7)
159         {
160             cpuid(&info, 7, 0);
161 
162             if (f->features & CPU_OPTION_OSXSAVE)
163             {
164                 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX)
165                 {
166                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX2)
167                         f->features     |= CPU_OPTION_AVX2;
168                 }
169 
170                 if ((xcr0 & XCR_FLAGS_AVX512) == XCR_FLAGS_AVX512)
171                 {
172                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512F)
173                         f->features     |= CPU_OPTION_AVX512F;
174                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512DQ)
175                         f->features     |= CPU_OPTION_AVX512DQ;
176                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512IFMA)
177                         f->features     |= CPU_OPTION_AVX512IFMA;
178                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512PF)
179                         f->features     |= CPU_OPTION_AVX512PF;
180                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512ER)
181                         f->features     |= CPU_OPTION_AVX512ER;
182                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512CD)
183                         f->features     |= CPU_OPTION_AVX512CD;
184                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512BW)
185                         f->features     |= CPU_OPTION_AVX512BW;
186                     if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512VL)
187                         f->features     |= CPU_OPTION_AVX512VL;
188 
189                     if (info.ecx & X86_CPUID7_INTEL_ECX_AVX512VBMI)
190                         f->features     |= CPU_OPTION_AVX512VBMI;
191                 }
192             }
193         }
194 
195         read_brand_string(&info, max_ext_cpuid, f->brand);
196     }
197 
do_amd_cpuid(cpu_features_t * f,size_t max_cpuid,size_t max_ext_cpuid)198     void do_amd_cpuid(cpu_features_t *f, size_t max_cpuid, size_t max_ext_cpuid)
199     {
200         cpuid_info_t info;
201         uint64_t xcr0 = 0;
202 
203         // FUNCTION 1
204         if (max_cpuid >= 1)
205         {
206             cpuid(&info, 1, 0);
207 
208             if (info.edx & X86_CPUID1_AMD_EDX_FPU)
209                 f->features     |= CPU_OPTION_FPU;
210             if (info.edx & X86_CPUID1_AMD_EDX_CMOV)
211                 f->features     |= CPU_OPTION_CMOV;
212             if (info.edx & X86_CPUID1_AMD_EDX_MMX)
213                 f->features     |= CPU_OPTION_MMX;
214             if (info.edx & X86_CPUID1_AMD_EDX_SSE)
215                 f->features     |= CPU_OPTION_SSE;
216             if (info.edx & X86_CPUID1_AMD_EDX_SSE2)
217                 f->features     |= CPU_OPTION_SSE2;
218 
219             if (info.ecx & X86_CPUID1_AMD_ECX_SSE3)
220                 f->features     |= CPU_OPTION_SSE3;
221             if (info.ecx & X86_CPUID1_AMD_ECX_SSSE3)
222                 f->features     |= CPU_OPTION_SSSE3;
223             if (info.ecx & X86_CPUID1_AMD_ECX_SSE4_1)
224                 f->features     |= CPU_OPTION_SSE4_1;
225             if (info.ecx & X86_CPUID1_AMD_ECX_SSE4_2)
226                 f->features     |= CPU_OPTION_SSE4_2;
227             if (info.ecx & X86_CPUID1_AMD_ECX_XSAVE)
228                 f->features     |= CPU_OPTION_FXSAVE;
229             if (info.ecx & X86_CPUID1_AMD_ECX_OSXSAVE)
230             {
231                 f->features     |= CPU_OPTION_OSXSAVE;
232 
233                 xcr0 = read_xcr(0);
234 
235                 // Additional check for AVX support
236                 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX)
237                 {
238                     if (info.ecx & X86_CPUID1_AMD_ECX_FMA3)
239                         f->features     |= CPU_OPTION_FMA3;
240                     if (info.ecx & X86_CPUID1_AMD_ECX_AVX)
241                         f->features     |= CPU_OPTION_AVX;
242                 }
243             }
244         }
245 
246         // FUNCTION 7
247         if (max_cpuid >= 7)
248         {
249             cpuid(&info, 7, 0);
250 
251             if (info.ebx & X86_CPUID7_AMD_EBX_AVX2)
252                 f->features     |= CPU_OPTION_AVX2;
253         }
254 
255         // FUNCTION 0x80000001
256         if (max_ext_cpuid >= 0x80000001)
257         {
258             cpuid(&info, 0x80000001, 0);
259 
260             if (info.ecx & X86_XCPUID1_AMD_ECX_SSE4A)
261                 f->features     |= CPU_OPTION_SSE4A;
262 
263             if (info.edx & X86_XCPUID1_AMD_EDX_FPU)
264                 f->features     |= CPU_OPTION_FPU;
265             if (info.edx & X86_XCPUID1_AMD_EDX_CMOV)
266                 f->features     |= CPU_OPTION_CMOV;
267             if (info.edx & X86_XCPUID1_AMD_EDX_MMX)
268                 f->features     |= CPU_OPTION_MMX;
269 
270             if (f->features & CPU_OPTION_OSXSAVE)
271             {
272                 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX)
273                 {
274                     if (info.ecx & X86_XCPUID1_AMD_ECX_FMA4)
275                         f->features     |= CPU_OPTION_FMA4;
276                 }
277             }
278         }
279 
280         read_brand_string(&info, max_ext_cpuid, f->brand);
281     }
282 
detect_options(cpu_features_t * f)283     void detect_options(cpu_features_t *f)
284     {
285         // Initialize structure
286         f->vendor       = CPU_VENDOR_UNKNOWN;
287         f->family       = 0;
288         f->model        = 0;
289         f->features     = 0;
290 
291         // X86-family code
292         if (!cpuid_supported())
293             return;
294 
295         // Check max CPUID
296         cpuid_info_t info;
297         cpuid(&info, 0, 0);
298 
299         // Detect vendor
300         vendor_sig_t sig;
301         sig.reg.ebx     = info.ebx;
302         sig.reg.ecx     = info.ecx;
303         sig.reg.edx     = info.edx;
304 
305         for (size_t i=0, n=sizeof(cpu_vendor_ids)/sizeof(cpu_vendor_id_t); i<n; ++i)
306         {
307             if (!memcmp(sig.sig, cpu_vendor_ids[i].signature, sizeof(vendor_sig_t)))
308             {
309                 f->vendor   = cpu_vendor_ids[i].vendor_id;
310                 break;
311             }
312         }
313 
314         size_t max_cpuid    = info.eax;
315         if (max_cpuid <= 0)
316             return;
317 
318         // Get model and family
319         cpuid(&info, 1, 0);
320         f->family           = (info.eax >> 8) & 0x0f;
321         f->model            = (info.eax >> 4) & 0x0f;
322 
323         if (f->family == 0x0f)
324             f->family           += (info.eax >> 20) & 0xff;
325         if ((f->family == 0x0f) || (f->family == 0x06))
326             f->model            += (info.eax >> 12) & 0xf0;
327 
328         // Get maximum available extended CPUID
329         cpuid(&info, 0x80000000, 0);
330         size_t max_ext_cpuid = info.eax;
331 
332         switch (f->vendor)
333         {
334             case CPU_VENDOR_INTEL:
335                 do_intel_cpuid(f, max_cpuid, max_ext_cpuid);
336                 break;
337 
338             case CPU_VENDOR_AMD:
339             case CPU_VENDOR_HYGON:
340                 do_amd_cpuid(f, max_cpuid, max_ext_cpuid);
341                 break;
342 
343             default:
344                 break;
345         }
346     }
347 
348     static dsp::start_t     dsp_start       = NULL;
349     static dsp::finish_t    dsp_finish      = NULL;
350 
start(dsp::context_t * ctx)351     static void start(dsp::context_t *ctx)
352     {
353         dsp_start(ctx);
354         uint32_t cr                 = fpu_read_cr();
355         ctx->data[ctx->top++]       = cr;
356 
357         fpu_write_cr(cr);
358     }
359 
finish(dsp::context_t * ctx)360     static void finish(dsp::context_t *ctx)
361     {
362         fpu_write_cr(ctx->data[--ctx->top]);
363         dsp_finish(ctx);
364     }
365 
366     static const char *cpu_vendors[] =
367     {
368         "Unknown",
369         "AMD",
370         "Hygon",
371         "Intel",
372         "NSC",
373         "Transmeta",
374         "VIA"
375     };
376 
377     static const char *cpu_features[] =
378     {
379         "FPU", "CMOV", "MMX", "FXSAVE",
380         "SSE", "SSE2", "SSE3", "SSSE3",
381         "SSE4.1", "SSE4.2", "SSE4A", "XSAVE",
382         "FMA3", "FMA4", "AVX", "AVX2",
383         "AVX512F", "AVX512DQ", "AVX512IFMA", "AVX512PF",
384         "AVX512ER", "AVX512CD", "AVX512BW", "AVX512VL",
385         "AVX512VBMI"
386     };
387 
estimate_features_size(const cpu_features_t * f)388     static size_t estimate_features_size(const cpu_features_t *f)
389     {
390         // Estimate the string length
391         size_t estimate = 1; // End of string character
392         for (size_t x = f->features, i=0; x > 0; i++)
393         {
394             if (x & 1)
395             {
396                 estimate += strlen(cpu_features[i]);
397                 x >>= 1;
398                 if (x)
399                     estimate ++; // Space character
400             }
401             else
402                 x >>= 1;
403         }
404         return estimate;
405     }
406 
build_features_list(char * dst,const cpu_features_t * f)407     static char *build_features_list(char *dst, const cpu_features_t *f)
408     {
409         // Build string
410         char *s = dst;
411 
412         for (size_t x = f->features, i=0; x > 0; i++)
413         {
414             if (x & 1)
415             {
416                 s = stpcpy(s, cpu_features[i]);
417                 x >>= 1;
418                 if (x)
419                     *(s++) = ' ';
420             }
421             else
422                 x >>= 1;
423         }
424         *s = '\0';
425 
426         return s;
427     }
428 
info()429     dsp::info_t *info()
430     {
431         cpu_features_t f;
432         detect_options(&f);
433 
434         char *model     = NULL;
435         int n = asprintf(&model, "vendor=%s, family=0x%x, model=0x%x", cpu_vendors[f.vendor], int(f.family), int(f.model));
436         if ((n < 0) || (model == NULL))
437             return NULL;
438 
439         size_t size     = sizeof(dsp::info_t);
440         size           += strlen(ARCH_STRING) + 1;
441         size           += strlen(f.brand) + 1;
442         size           += strlen(model) + 1;
443         size           += estimate_features_size(&f);
444 
445         dsp::info_t *res = reinterpret_cast<dsp::info_t *>(malloc(size));
446         if (res == NULL)
447         {
448             free(model);
449             return res;
450         }
451 
452         char *text      = reinterpret_cast<char *>(&res[1]);
453         res->arch       = text;
454         text            = stpcpy(text, ARCH_STRING) + 1;
455         res->cpu        = text;
456         text            = stpcpy(text, f.brand) + 1;
457         res->model      = text;
458         text            = stpcpy(text, model) + 1;
459         res->features   = text;
460         build_features_list(text, &f);
461 
462         free(model);
463         return res;
464     }
465 
feature_check(const cpu_features_t * f,feature_t ops)466     bool feature_check(const cpu_features_t *f, feature_t ops)
467     {
468         switch (ops)
469         {
470             case FEAT_FAST_MOVS:
471                 if (f->vendor == CPU_VENDOR_INTEL)
472                 {
473                     if ((f->family == 0x6) && (f->model >= 0x5e)) // Should be some Core i3 microarchitecture...
474                         return true;
475                 }
476                 break;
477             case FEAT_FAST_AVX:
478                 if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX
479                     return true;
480                 if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON))
481                     return (f->family >= AMD_FAMILY_ZEN); // Only starting with ZEN architecture AMD's implementation of AVX is fast enough
482                 break;
483             case FEAT_FAST_FMA3:
484                 if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX
485                     return true;
486                 // AMD: maybe once FMA3 will be faster
487                 break;
488             default:
489                 break;
490         }
491 
492         return false;
493     }
494 
495     #define EXPORT2(function, export)           dsp::function = x86::export;
496     #define EXPORT1(function)                   EXPORT2(function, function)
497 
dsp_init()498     void dsp_init()
499     {
500         // Dectect CPU options
501         cpu_features_t f;
502         detect_options(&f);
503 
504         // Save previous entry points
505         dsp_start                   = dsp::start;
506         dsp_finish                  = dsp::finish;
507 
508         // Export functions
509         EXPORT1(start);
510         EXPORT1(finish);
511         EXPORT1(info);
512 
513         // Initialize extensions
514         sse::dsp_init(&f);
515         avx::dsp_init(&f);
516     }
517 
518     #undef EXPORT1
519     #undef EXPORT2
520 }
521 
522 #endif
523