1 /* 2 * x86.cpp 3 * 4 * Created on: 28 февр. 2020 г. 5 * Author: Vladimir Sadovnikov <lsp.plugin@gmail.com> 6 * 7 * This file is part of tamgamp.lv2 <https://github.com/sadko4u/tamgamp.lv2>. 8 * 9 * This program is free software; you can redistribute it and/or 10 * modify it under the terms of the GNU Lesser General Public 11 * License as published by the Free Software Foundation; either 12 * version 3 of the License, or (at your option) any later version. 13 * 14 * This program is distributed in the hope that it will be useful, 15 * but WITHOUT ANY WARRANTY; without even the implied warranty of 16 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 17 * Lesser General Public License for more details. 18 * 19 * You should have received a copy of the GNU Lesser General Public License 20 * along with this program; if not, write to the Free Software Foundation, 21 * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. 22 */ 23 24 #include <dsp/dsp.h> 25 26 #ifdef ARCH_X86 27 28 #include <string.h> 29 #include <stdlib.h> 30 #include <stdio.h> 31 32 #include <dsp/arch/x86/features.h> 33 #include <dsp/arch/x86/cpuid.h> 34 #include <dsp/arch/x86/fpu.h> 35 36 namespace sse 37 { 38 extern void dsp_init(const x86::cpu_features_t *f); 39 } 40 41 namespace avx 42 { 43 extern void dsp_init(const x86::cpu_features_t *f); 44 } 45 46 namespace x86 47 { 48 #pragma pack(push, 1) 49 typedef union vendor_sig_t 50 { 51 char sig[12]; 52 struct { 53 uint32_t ebx, edx, ecx; 54 } reg; 55 } vendor_sig_t; 56 #pragma pack(pop) 57 58 typedef struct cpu_vendor_id_t 59 { 60 const char *signature; 61 size_t vendor_id; 62 } vendors_t; 63 64 65 static const cpu_vendor_id_t cpu_vendor_ids[] = 66 { 67 { "AMDisbetter!", CPU_VENDOR_AMD }, 68 { "AuthenticAMD", CPU_VENDOR_AMD }, 69 { "CentaurHauls", CPU_VENDOR_VIA }, 70 { "Geode by NSC", CPU_VENDOR_NSC }, 71 { "GenuineIntel", CPU_VENDOR_INTEL }, 72 { "GenuineTMx86", CPU_VENDOR_TRANSMETA }, 73 { "HygonGenuine", CPU_VENDOR_HYGON }, 74 { "TransmetaCPU", CPU_VENDOR_TRANSMETA }, 75 { "VIA VIA VIA ", CPU_VENDOR_VIA } 76 }; 77 read_brand_string(cpuid_info_t * info,uint32_t max_ext_cpuid,char * brand)78 void read_brand_string(cpuid_info_t *info, uint32_t max_ext_cpuid, char *brand) 79 { 80 // FUNCTION 0x80000002 - 0x80000004 81 if (max_ext_cpuid < 0x80000004) 82 { 83 strcpy(brand, "Generic " ARCH_STRING " processor"); 84 return; 85 } 86 87 uint32_t *dst = reinterpret_cast<uint32_t *>(brand); 88 for (size_t i=0x80000002; i<=0x80000004; ++i) 89 { 90 cpuid(info, i, 0); 91 *(dst++) = info->eax; 92 *(dst++) = info->ebx; 93 *(dst++) = info->ecx; 94 *(dst++) = info->edx; 95 } 96 *dst = 0; 97 98 // Cut the end of the string if there are spaces 99 char *end = &brand[3 * 16 - 1]; 100 while ((end >= brand) && (((*end) == ' ') || ((*end) == '\0'))) 101 *(end--) = '\0'; 102 char *start = brand; 103 while ((start < end) && ((*start) == ' ')) 104 start++; 105 if (start > brand) 106 memmove(brand, start, end - start + 1); 107 } 108 do_intel_cpuid(cpu_features_t * f,size_t max_cpuid,size_t max_ext_cpuid)109 void do_intel_cpuid(cpu_features_t *f, size_t max_cpuid, size_t max_ext_cpuid) 110 { 111 cpuid_info_t info; 112 uint64_t xcr0 = 0; 113 114 // FUNCTION 1 115 if (max_cpuid >= 1) 116 { 117 cpuid(&info, 1, 0); 118 119 if (info.edx & X86_CPUID1_INTEL_EDX_FPU) 120 f->features |= CPU_OPTION_FPU; 121 if (info.edx & X86_CPUID1_INTEL_EDX_CMOV) 122 f->features |= CPU_OPTION_CMOV; 123 if (info.edx & X86_CPUID1_INTEL_EDX_MMX) 124 f->features |= CPU_OPTION_MMX; 125 if (info.edx & X86_CPUID1_INTEL_EDX_SSE) 126 f->features |= CPU_OPTION_SSE; 127 if (info.edx & X86_CPUID1_INTEL_EDX_SSE2) 128 f->features |= CPU_OPTION_SSE2; 129 130 if (info.ecx & X86_CPUID1_INTEL_ECX_SSE3) 131 f->features |= CPU_OPTION_SSE3; 132 if (info.ecx & X86_CPUID1_INTEL_ECX_SSSE3) 133 f->features |= CPU_OPTION_SSSE3; 134 if (info.ecx & X86_CPUID1_INTEL_ECX_SSE4_1) 135 f->features |= CPU_OPTION_SSE4_1; 136 if (info.ecx & X86_CPUID1_INTEL_ECX_SSE4_2) 137 f->features |= CPU_OPTION_SSE4_2; 138 if (info.ecx & X86_CPUID1_INTEL_ECX_XSAVE) 139 f->features |= CPU_OPTION_FXSAVE; 140 if (info.ecx & X86_CPUID1_INTEL_ECX_OSXSAVE) 141 { 142 f->features |= CPU_OPTION_OSXSAVE; 143 144 xcr0 = read_xcr(0); 145 146 // Additional check for AVX support 147 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX) 148 { 149 if (info.ecx & X86_CPUID1_INTEL_ECX_FMA3) 150 f->features |= CPU_OPTION_FMA3; 151 if (info.ecx & X86_CPUID1_INTEL_ECX_AVX) 152 f->features |= CPU_OPTION_AVX; 153 } 154 } 155 } 156 157 // FUNCTION 7 158 if (max_cpuid >= 7) 159 { 160 cpuid(&info, 7, 0); 161 162 if (f->features & CPU_OPTION_OSXSAVE) 163 { 164 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX) 165 { 166 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX2) 167 f->features |= CPU_OPTION_AVX2; 168 } 169 170 if ((xcr0 & XCR_FLAGS_AVX512) == XCR_FLAGS_AVX512) 171 { 172 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512F) 173 f->features |= CPU_OPTION_AVX512F; 174 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512DQ) 175 f->features |= CPU_OPTION_AVX512DQ; 176 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512IFMA) 177 f->features |= CPU_OPTION_AVX512IFMA; 178 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512PF) 179 f->features |= CPU_OPTION_AVX512PF; 180 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512ER) 181 f->features |= CPU_OPTION_AVX512ER; 182 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512CD) 183 f->features |= CPU_OPTION_AVX512CD; 184 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512BW) 185 f->features |= CPU_OPTION_AVX512BW; 186 if (info.ebx & X86_CPUID7_INTEL_EBX_AVX512VL) 187 f->features |= CPU_OPTION_AVX512VL; 188 189 if (info.ecx & X86_CPUID7_INTEL_ECX_AVX512VBMI) 190 f->features |= CPU_OPTION_AVX512VBMI; 191 } 192 } 193 } 194 195 read_brand_string(&info, max_ext_cpuid, f->brand); 196 } 197 do_amd_cpuid(cpu_features_t * f,size_t max_cpuid,size_t max_ext_cpuid)198 void do_amd_cpuid(cpu_features_t *f, size_t max_cpuid, size_t max_ext_cpuid) 199 { 200 cpuid_info_t info; 201 uint64_t xcr0 = 0; 202 203 // FUNCTION 1 204 if (max_cpuid >= 1) 205 { 206 cpuid(&info, 1, 0); 207 208 if (info.edx & X86_CPUID1_AMD_EDX_FPU) 209 f->features |= CPU_OPTION_FPU; 210 if (info.edx & X86_CPUID1_AMD_EDX_CMOV) 211 f->features |= CPU_OPTION_CMOV; 212 if (info.edx & X86_CPUID1_AMD_EDX_MMX) 213 f->features |= CPU_OPTION_MMX; 214 if (info.edx & X86_CPUID1_AMD_EDX_SSE) 215 f->features |= CPU_OPTION_SSE; 216 if (info.edx & X86_CPUID1_AMD_EDX_SSE2) 217 f->features |= CPU_OPTION_SSE2; 218 219 if (info.ecx & X86_CPUID1_AMD_ECX_SSE3) 220 f->features |= CPU_OPTION_SSE3; 221 if (info.ecx & X86_CPUID1_AMD_ECX_SSSE3) 222 f->features |= CPU_OPTION_SSSE3; 223 if (info.ecx & X86_CPUID1_AMD_ECX_SSE4_1) 224 f->features |= CPU_OPTION_SSE4_1; 225 if (info.ecx & X86_CPUID1_AMD_ECX_SSE4_2) 226 f->features |= CPU_OPTION_SSE4_2; 227 if (info.ecx & X86_CPUID1_AMD_ECX_XSAVE) 228 f->features |= CPU_OPTION_FXSAVE; 229 if (info.ecx & X86_CPUID1_AMD_ECX_OSXSAVE) 230 { 231 f->features |= CPU_OPTION_OSXSAVE; 232 233 xcr0 = read_xcr(0); 234 235 // Additional check for AVX support 236 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX) 237 { 238 if (info.ecx & X86_CPUID1_AMD_ECX_FMA3) 239 f->features |= CPU_OPTION_FMA3; 240 if (info.ecx & X86_CPUID1_AMD_ECX_AVX) 241 f->features |= CPU_OPTION_AVX; 242 } 243 } 244 } 245 246 // FUNCTION 7 247 if (max_cpuid >= 7) 248 { 249 cpuid(&info, 7, 0); 250 251 if (info.ebx & X86_CPUID7_AMD_EBX_AVX2) 252 f->features |= CPU_OPTION_AVX2; 253 } 254 255 // FUNCTION 0x80000001 256 if (max_ext_cpuid >= 0x80000001) 257 { 258 cpuid(&info, 0x80000001, 0); 259 260 if (info.ecx & X86_XCPUID1_AMD_ECX_SSE4A) 261 f->features |= CPU_OPTION_SSE4A; 262 263 if (info.edx & X86_XCPUID1_AMD_EDX_FPU) 264 f->features |= CPU_OPTION_FPU; 265 if (info.edx & X86_XCPUID1_AMD_EDX_CMOV) 266 f->features |= CPU_OPTION_CMOV; 267 if (info.edx & X86_XCPUID1_AMD_EDX_MMX) 268 f->features |= CPU_OPTION_MMX; 269 270 if (f->features & CPU_OPTION_OSXSAVE) 271 { 272 if ((xcr0 & XCR_FLAGS_AVX) == XCR_FLAGS_AVX) 273 { 274 if (info.ecx & X86_XCPUID1_AMD_ECX_FMA4) 275 f->features |= CPU_OPTION_FMA4; 276 } 277 } 278 } 279 280 read_brand_string(&info, max_ext_cpuid, f->brand); 281 } 282 detect_options(cpu_features_t * f)283 void detect_options(cpu_features_t *f) 284 { 285 // Initialize structure 286 f->vendor = CPU_VENDOR_UNKNOWN; 287 f->family = 0; 288 f->model = 0; 289 f->features = 0; 290 291 // X86-family code 292 if (!cpuid_supported()) 293 return; 294 295 // Check max CPUID 296 cpuid_info_t info; 297 cpuid(&info, 0, 0); 298 299 // Detect vendor 300 vendor_sig_t sig; 301 sig.reg.ebx = info.ebx; 302 sig.reg.ecx = info.ecx; 303 sig.reg.edx = info.edx; 304 305 for (size_t i=0, n=sizeof(cpu_vendor_ids)/sizeof(cpu_vendor_id_t); i<n; ++i) 306 { 307 if (!memcmp(sig.sig, cpu_vendor_ids[i].signature, sizeof(vendor_sig_t))) 308 { 309 f->vendor = cpu_vendor_ids[i].vendor_id; 310 break; 311 } 312 } 313 314 size_t max_cpuid = info.eax; 315 if (max_cpuid <= 0) 316 return; 317 318 // Get model and family 319 cpuid(&info, 1, 0); 320 f->family = (info.eax >> 8) & 0x0f; 321 f->model = (info.eax >> 4) & 0x0f; 322 323 if (f->family == 0x0f) 324 f->family += (info.eax >> 20) & 0xff; 325 if ((f->family == 0x0f) || (f->family == 0x06)) 326 f->model += (info.eax >> 12) & 0xf0; 327 328 // Get maximum available extended CPUID 329 cpuid(&info, 0x80000000, 0); 330 size_t max_ext_cpuid = info.eax; 331 332 switch (f->vendor) 333 { 334 case CPU_VENDOR_INTEL: 335 do_intel_cpuid(f, max_cpuid, max_ext_cpuid); 336 break; 337 338 case CPU_VENDOR_AMD: 339 case CPU_VENDOR_HYGON: 340 do_amd_cpuid(f, max_cpuid, max_ext_cpuid); 341 break; 342 343 default: 344 break; 345 } 346 } 347 348 static dsp::start_t dsp_start = NULL; 349 static dsp::finish_t dsp_finish = NULL; 350 start(dsp::context_t * ctx)351 static void start(dsp::context_t *ctx) 352 { 353 dsp_start(ctx); 354 uint32_t cr = fpu_read_cr(); 355 ctx->data[ctx->top++] = cr; 356 357 fpu_write_cr(cr); 358 } 359 finish(dsp::context_t * ctx)360 static void finish(dsp::context_t *ctx) 361 { 362 fpu_write_cr(ctx->data[--ctx->top]); 363 dsp_finish(ctx); 364 } 365 366 static const char *cpu_vendors[] = 367 { 368 "Unknown", 369 "AMD", 370 "Hygon", 371 "Intel", 372 "NSC", 373 "Transmeta", 374 "VIA" 375 }; 376 377 static const char *cpu_features[] = 378 { 379 "FPU", "CMOV", "MMX", "FXSAVE", 380 "SSE", "SSE2", "SSE3", "SSSE3", 381 "SSE4.1", "SSE4.2", "SSE4A", "XSAVE", 382 "FMA3", "FMA4", "AVX", "AVX2", 383 "AVX512F", "AVX512DQ", "AVX512IFMA", "AVX512PF", 384 "AVX512ER", "AVX512CD", "AVX512BW", "AVX512VL", 385 "AVX512VBMI" 386 }; 387 estimate_features_size(const cpu_features_t * f)388 static size_t estimate_features_size(const cpu_features_t *f) 389 { 390 // Estimate the string length 391 size_t estimate = 1; // End of string character 392 for (size_t x = f->features, i=0; x > 0; i++) 393 { 394 if (x & 1) 395 { 396 estimate += strlen(cpu_features[i]); 397 x >>= 1; 398 if (x) 399 estimate ++; // Space character 400 } 401 else 402 x >>= 1; 403 } 404 return estimate; 405 } 406 build_features_list(char * dst,const cpu_features_t * f)407 static char *build_features_list(char *dst, const cpu_features_t *f) 408 { 409 // Build string 410 char *s = dst; 411 412 for (size_t x = f->features, i=0; x > 0; i++) 413 { 414 if (x & 1) 415 { 416 s = stpcpy(s, cpu_features[i]); 417 x >>= 1; 418 if (x) 419 *(s++) = ' '; 420 } 421 else 422 x >>= 1; 423 } 424 *s = '\0'; 425 426 return s; 427 } 428 info()429 dsp::info_t *info() 430 { 431 cpu_features_t f; 432 detect_options(&f); 433 434 char *model = NULL; 435 int n = asprintf(&model, "vendor=%s, family=0x%x, model=0x%x", cpu_vendors[f.vendor], int(f.family), int(f.model)); 436 if ((n < 0) || (model == NULL)) 437 return NULL; 438 439 size_t size = sizeof(dsp::info_t); 440 size += strlen(ARCH_STRING) + 1; 441 size += strlen(f.brand) + 1; 442 size += strlen(model) + 1; 443 size += estimate_features_size(&f); 444 445 dsp::info_t *res = reinterpret_cast<dsp::info_t *>(malloc(size)); 446 if (res == NULL) 447 { 448 free(model); 449 return res; 450 } 451 452 char *text = reinterpret_cast<char *>(&res[1]); 453 res->arch = text; 454 text = stpcpy(text, ARCH_STRING) + 1; 455 res->cpu = text; 456 text = stpcpy(text, f.brand) + 1; 457 res->model = text; 458 text = stpcpy(text, model) + 1; 459 res->features = text; 460 build_features_list(text, &f); 461 462 free(model); 463 return res; 464 } 465 feature_check(const cpu_features_t * f,feature_t ops)466 bool feature_check(const cpu_features_t *f, feature_t ops) 467 { 468 switch (ops) 469 { 470 case FEAT_FAST_MOVS: 471 if (f->vendor == CPU_VENDOR_INTEL) 472 { 473 if ((f->family == 0x6) && (f->model >= 0x5e)) // Should be some Core i3 microarchitecture... 474 return true; 475 } 476 break; 477 case FEAT_FAST_AVX: 478 if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX 479 return true; 480 if ((f->vendor == CPU_VENDOR_AMD) || (f->vendor == CPU_VENDOR_HYGON)) 481 return (f->family >= AMD_FAMILY_ZEN); // Only starting with ZEN architecture AMD's implementation of AVX is fast enough 482 break; 483 case FEAT_FAST_FMA3: 484 if (f->vendor == CPU_VENDOR_INTEL) // Any Intel CPU is good enough with AVX 485 return true; 486 // AMD: maybe once FMA3 will be faster 487 break; 488 default: 489 break; 490 } 491 492 return false; 493 } 494 495 #define EXPORT2(function, export) dsp::function = x86::export; 496 #define EXPORT1(function) EXPORT2(function, function) 497 dsp_init()498 void dsp_init() 499 { 500 // Dectect CPU options 501 cpu_features_t f; 502 detect_options(&f); 503 504 // Save previous entry points 505 dsp_start = dsp::start; 506 dsp_finish = dsp::finish; 507 508 // Export functions 509 EXPORT1(start); 510 EXPORT1(finish); 511 EXPORT1(info); 512 513 // Initialize extensions 514 sse::dsp_init(&f); 515 avx::dsp_init(&f); 516 } 517 518 #undef EXPORT1 519 #undef EXPORT2 520 } 521 522 #endif 523