1 /* 2 * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved. 3 * SPDX-License-Identifier: MIT 4 * 5 * Permission is hereby granted, free of charge, to any person obtaining a 6 * copy of this software and associated documentation files (the "Software"), 7 * to deal in the Software without restriction, including without limitation 8 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 * and/or sell copies of the Software, and to permit persons to whom the 10 * Software is furnished to do so, subject to the following conditions: 11 * 12 * The above copyright notice and this permission notice shall be included in 13 * all copies or substantial portions of the Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /******************* CPU Information Gather Routines ***********************\ 25 * * 26 * One time initialization code to setup the Processor type * 27 * * 28 \***************************************************************************/ 29 30 #include "cpuopsys.h" 31 32 #include "Nvcm.h" 33 #include "os/os.h" 34 #include "core/system.h" 35 36 #include "ctrl/ctrl0000/ctrl0000system.h" 37 38 39 #if NVCPU_IS_AARCH64 40 41 #include "cpu_arm_def.h" 42 43 #if defined(__GNUC__) 44 45 #define CP_READ_REGISTER(reg) \ 46 ({ \ 47 NvU32 __res; \ 48 \ 49 asm("mrs %0, " reg "\r\t" \ 50 : "=r" (__res) \ 51 ); \ 52 \ 53 __res; \ 54 }) 55 56 #define CP_WRITE_REGISTER(reg, val) \ 57 ({ \ 58 asm("msr " reg ", %0\r\t" \ 59 : \ 60 : "r" (val) \ 61 ); \ 62 }) 63 64 #endif //end defined(__GNUC__) 65 66 static void DecodeAarch64Cache(OBJSYS *pSys) 67 { 68 NvU32 val, field, numsets, assoc, linesize; 69 70 // Select level 1 data cache 71 CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE); 72 73 // Retrieve data cache information 74 val = CP_READ_CCSIDR_REGISTER(); 75 76 field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val); 77 numsets = field + 1; 78 field = GET_BITMASK(CCSIDR_CACHE_ASSOCIATIVITY, val); 79 assoc = field + 1; 80 field = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val); 81 linesize = 1 << (field + 4); 82 83 pSys->cpuInfo.dataCacheLineSize = linesize; 84 pSys->cpuInfo.l1DataCacheSize = (numsets * assoc * linesize) >> 10; 85 86 // Select level 2 data cache 87 CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE_LEVEL2); 88 89 // Retrieve data cache information 90 val = CP_READ_CCSIDR_REGISTER(); 91 92 field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val); 93 numsets = field + 1; 94 field = GET_BITMASK(CCSIDR_CACHE_ASSOCIATIVITY, val); 95 assoc = field + 1; 96 field = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val); 97 linesize = 1 << (field + 4); 98 99 pSys->cpuInfo.l2DataCacheSize = (numsets * assoc * linesize) >> 10; 100 } 101 102 /* 103 * ID the CPU. 104 */ 105 void RmInitCpuInfo(void) 106 { 107 #define AARCH64_VENDOR_PART_NUMBER(v, p) \ 108 (((v)<<16)|(p)) 109 #define AARCH64_VENDOR_PART(v, p) \ 110 AARCH64_VENDOR_PART_NUMBER(CP_MIDR_IMPLEMENTER_##v, CP_MIDR_PRIMARY_PART_NUM_##p) 111 112 OBJSYS *pSys = SYS_GET_INSTANCE(); 113 114 if (pSys->cpuInfo.bInitialized) 115 { 116 return; 117 } 118 119 // Init structure to default 120 portMemSet(&pSys->cpuInfo, 0, sizeof(pSys->cpuInfo)); 121 122 // ARM has the equivalent of a fence instruction (DSB) 123 124 // Leave this here for MODS 125 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN; 126 pSys->cpuInfo.caps = (NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE | 127 NV0000_CTRL_SYSTEM_CPU_CAP_WRITE_COMBINING); 128 129 // Calculate the frequency 130 pSys->cpuInfo.clock = osGetCpuFrequency(); 131 132 // Number of core is available from SCU configuration. 133 pSys->cpuInfo.numPhysicalCpus = osGetCpuCount(); 134 135 // There is no hyper-threading on ARM 136 pSys->cpuInfo.numLogicalCpus = pSys->cpuInfo.numPhysicalCpus; 137 pSys->cpuInfo.maxLogicalCpus = pSys->cpuInfo.numPhysicalCpus; 138 139 // Zero out the vendor-specific family, model & stepping 140 pSys->cpuInfo.family = 0; 141 pSys->cpuInfo.model = 0; 142 pSys->cpuInfo.stepping = 0; 143 144 NvU32 val; 145 NvU32 impl; 146 NvU32 part; 147 148 // Retrieve Main ID register 149 val = CP_READ_MIDR_REGISTER(); 150 151 impl = GET_BITMASK(MIDR_IMPLEMENTER, val); 152 part = GET_BITMASK(MIDR_PRIMARY_PART_NUM, val); 153 154 switch(AARCH64_VENDOR_PART_NUMBER(impl, part)) 155 { 156 case AARCH64_VENDOR_PART(NVIDIA, DENVER_1): 157 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_NV_DENVER_1_0; 158 break; 159 case AARCH64_VENDOR_PART(NVIDIA, DENVER_2): 160 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_NV_DENVER_2_0; 161 break; 162 163 case AARCH64_VENDOR_PART(NVIDIA, CARMEL): 164 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARMV8A_GENERIC; 165 break; 166 /* 167 * Applied Micro is now Ampere computing, and the Ampere eMag 168 * vendor/part ids are the same as AMCC XGENE 169 */ 170 case AARCH64_VENDOR_PART(AMCC, XGENE): 171 case AARCH64_VENDOR_PART(ARM, CORTEX_A76): 172 case AARCH64_VENDOR_PART(MARVELL, THUNDER_X2): 173 case AARCH64_VENDOR_PART(HUAWEI, KUNPENG_920): 174 case AARCH64_VENDOR_PART(ARM, BLUEFIELD): 175 // The Neoverse N1 is the same as Gravitron 176 case AARCH64_VENDOR_PART(ARM, GRAVITRON2): 177 case AARCH64_VENDOR_PART(FUJITSU, A64FX): 178 case AARCH64_VENDOR_PART(PHYTIUM, FT2000): 179 case AARCH64_VENDOR_PART(PHYTIUM, S2500): 180 case AARCH64_VENDOR_PART(AMPERE, ALTRA): 181 case AARCH64_VENDOR_PART(MARVELL, OCTEON_CN96XX): 182 case AARCH64_VENDOR_PART(MARVELL, OCTEON_CN98XX): 183 case AARCH64_VENDOR_PART(ARM, CORTEX_A57): 184 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARMV8A_GENERIC; 185 break; 186 default: 187 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARMV8A_GENERIC; 188 portDbgPrintf( 189 "NVRM: CPUID: unknown implementer/part 0x%x/0x%x.\n", impl, part); 190 portDbgPrintf( 191 "The NVIDIA GPU driver for AArch64 has not been qualified on this CPU\n" 192 "and therefore it is not recommended or intended for use in any production\n" 193 "environment.\n"); 194 break; 195 } 196 DecodeAarch64Cache(pSys); 197 198 // Host native page size 199 #ifdef PAGE_SIZE 200 pSys->cpuInfo.hostPageSize = PAGE_SIZE; 201 #else 202 pSys->cpuInfo.hostPageSize = 4096; 203 #endif 204 205 pSys->cpuInfo.bInitialized = NV_TRUE; 206 #undef AARCH64_VENDOR_PART 207 #undef AARCH64_VENDOR_PART_NUMBER 208 } 209 210 #endif // NVCPU_IS_AARCH64 211 212 213 /***************************************************************************/ 214 215 216 #if NVCPU_IS_ARM 217 218 #include "cpu_arm_def.h" 219 220 #if defined(__GNUC__) 221 222 #define CP_READ_REGISTER(reg) \ 223 ({ \ 224 NvU32 __res; \ 225 \ 226 asm("mrc p15, " reg ", %0, c0, c0, 0\r\t" \ 227 : "=r" (__res) \ 228 : \ 229 : "cc"); \ 230 \ 231 __res; \ 232 }) 233 234 #define CP_WRITE_REGISTER(reg, val) \ 235 ({ \ 236 asm("mcr p15, " reg ", %0, c0, c0, 0\r\t" \ 237 : \ 238 : "r"(val)); \ 239 }) 240 241 #endif //end defined(__GNUC__) 242 243 /* 244 * Documentation: 245 * 246 * https://developer.arm.com/documentation/ddi0388/f/CIHHDACH 247 */ 248 static void DecodeCortexA9Cache(OBJSYS *pSys) 249 { 250 NvU32 val, field; 251 252 // Select data cache 253 CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE); 254 255 // Retrieve data cache information 256 val = CP_READ_CCSIDR_REGISTER(); 257 258 // L1 Data Cache Size (from KB to KB) 259 field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val); 260 261 if (field == CP_CCSIDR_CACHE_NUM_SETS_16KB) 262 { 263 pSys->cpuInfo.l1DataCacheSize = 16; 264 } 265 else if (field == CP_CCSIDR_CACHE_NUM_SETS_32KB) 266 { 267 pSys->cpuInfo.l1DataCacheSize = 32; 268 } 269 else if (field == CP_CCSIDR_CACHE_NUM_SETS_64KB) 270 { 271 pSys->cpuInfo.l1DataCacheSize = 64; 272 } 273 else 274 { 275 NV_PRINTF(LEVEL_ERROR, "CPUID: Couldn't find L1DataCacheSize.\n"); 276 } 277 278 // There is only one level of cache in the Cortex-A9 processor 279 pSys->cpuInfo.l2DataCacheSize = 0; 280 281 // Data Cache Line (from W to B) 282 field = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val); 283 284 if (field & CP_CCSIDR_CACHE_LINE_SIZE_8W) 285 { 286 pSys->cpuInfo.dataCacheLineSize = 8 * 4; 287 } 288 else 289 { 290 NV_PRINTF(LEVEL_ERROR, "CPUID: Couldn't find DataCacheLineSize.\n"); 291 } 292 } 293 294 static NvU32 DecodeCortexA15CacheSize(NvU32 field) 295 { 296 switch(field) 297 { 298 case CP_CCSIDR_CACHE_NUM_SETS_A15_32KB: 299 return 32; 300 case CP_CCSIDR_CACHE_NUM_SETS_A15_512KB: 301 return 512; 302 case CP_CCSIDR_CACHE_NUM_SETS_A15_1024KB: 303 return 1024; 304 case CP_CCSIDR_CACHE_NUM_SETS_A15_2048KB: 305 return 2048; 306 case CP_CCSIDR_CACHE_NUM_SETS_A15_4096KB: 307 return 4096; 308 default: 309 NV_PRINTF(LEVEL_ERROR, "CPUID: Couldn't find DataCacheSize.\n"); 310 return 0; 311 } 312 } 313 314 static void DecodeCortexA15Cache(OBJSYS *pSys) 315 { 316 NvU32 val, field; 317 318 // Select level 1 data cache 319 CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE); 320 321 // Retrieve data cache information 322 val = CP_READ_CCSIDR_REGISTER(); 323 324 // L1 Data Cache Size (from KB to KB) 325 field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val); 326 327 pSys->cpuInfo.l1DataCacheSize = DecodeCortexA15CacheSize(field); 328 329 // Data Cache Line (from W to B) 330 field = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val); 331 332 // line size = 2 ** (field + 2) words 333 pSys->cpuInfo.dataCacheLineSize = 4 * (1 << (field + 2)); 334 335 // Select level 2 data cache 336 CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE_LEVEL2); 337 338 // Retrieve data cache information 339 val = CP_READ_CCSIDR_REGISTER(); 340 341 // L2 Data Cache Size (from KB to KB) 342 field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val); 343 344 pSys->cpuInfo.l2DataCacheSize = DecodeCortexA15CacheSize(field); 345 } 346 347 /* 348 * ID the CPU. 349 */ 350 void RmInitCpuInfo(void) 351 { 352 OBJSYS *pSys = SYS_GET_INSTANCE(); 353 354 if (pSys->cpuInfo.bInitialized) 355 { 356 return; 357 } 358 359 // Init structure to default 360 portMemSet(&pSys->cpuInfo, 0, sizeof(pSys->cpuInfo)); 361 362 // ARM has the equivalent of a fence instruction (DSB) 363 364 // Leave this here for MODS 365 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN; 366 pSys->cpuInfo.caps = (NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE | 367 NV0000_CTRL_SYSTEM_CPU_CAP_WRITE_COMBINING); 368 369 // Calculate the frequency 370 pSys->cpuInfo.clock = osGetCpuFrequency(); 371 372 // Number of core is available from SCU configuration. 373 pSys->cpuInfo.numPhysicalCpus = osGetCpuCount(); 374 pSys->cpuInfo.maxLogicalCpus = pSys->cpuInfo.numPhysicalCpus; 375 376 // There is no hyper-threading on ARM 377 pSys->cpuInfo.numLogicalCpus = pSys->cpuInfo.numPhysicalCpus; 378 379 // Zero out the vendor-specific family, model & stepping 380 pSys->cpuInfo.family = 0; 381 pSys->cpuInfo.model = 0; 382 pSys->cpuInfo.stepping = 0; 383 384 NvU32 val; 385 NvU32 field; 386 387 // Retrieve Main ID register 388 val = CP_READ_MIDR_REGISTER(); 389 390 field = GET_BITMASK(MIDR_PRIMARY_PART_NUM, val); 391 392 switch(field) 393 { 394 case CP_MIDR_PRIMARY_PART_NUM_A9: 395 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARM_A9; 396 DecodeCortexA9Cache(pSys); 397 break; 398 case CP_MIDR_PRIMARY_PART_NUM_A15: 399 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARM_A15; 400 DecodeCortexA15Cache(pSys); 401 break; 402 default: 403 // Narrow down to an unknown arm cpu 404 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARM_UNKNOWN; 405 NV_PRINTF(LEVEL_ERROR, "CPUID: unknown part number 0x%x.\n", 406 field); 407 break; 408 } 409 410 // Host native page size 411 #ifdef PAGE_SIZE 412 pSys->cpuInfo.hostPageSize = PAGE_SIZE; 413 #else 414 pSys->cpuInfo.hostPageSize = 4096; 415 #endif 416 417 pSys->cpuInfo.bInitialized = NV_TRUE; 418 } 419 420 #endif // NVCPU_IS_ARM 421 422 423 /***************************************************************************/ 424 425 426 #if NVCPU_IS_PPC64LE 427 428 /* 429 * ID the CPU. 430 */ 431 void RmInitCpuInfo(void) 432 { 433 OBJSYS *pSys = SYS_GET_INSTANCE(); 434 435 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_POWERN; 436 pSys->cpuInfo.caps = 0; 437 438 // Zero out the vendor-specific family, model & stepping 439 pSys->cpuInfo.family = 0; 440 pSys->cpuInfo.model = 0; 441 pSys->cpuInfo.stepping = 0; 442 443 // Calculate the frequency 444 pSys->cpuInfo.clock = osGetCpuFrequency(); 445 446 // Number of CPUs. 447 // Should maybe take into account SMT, etc. 448 pSys->cpuInfo.numPhysicalCpus = osGetCpuCount(); 449 pSys->cpuInfo.numLogicalCpus = pSys->cpuInfo.numPhysicalCpus; 450 pSys->cpuInfo.maxLogicalCpus = pSys->cpuInfo.numPhysicalCpus; 451 452 // host native page size 453 pSys->cpuInfo.hostPageSize = 64 * 1024; 454 455 return; 456 } 457 458 #endif // NVCPU_IS_PPC64LE 459 460 461 /***************************************************************************/ 462 463 464 #if NVCPU_IS_RISCV64 465 466 /* 467 * ID the CPU. (stub) 468 */ 469 void RmInitCpuInfo( 470 void 471 ) 472 { 473 OBJSYS *pSys = SYS_GET_INSTANCE(); 474 475 // XXX 476 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN; 477 478 // Zero out the vendor-specific family, model & stepping 479 pSys->cpuInfo.family = 0; 480 pSys->cpuInfo.model = 0; 481 pSys->cpuInfo.stepping = 0; 482 483 // Calculate the frequency 484 pSys->cpuInfo.clock = 1; 485 486 // host native page size 487 pSys->cpuInfo.hostPageSize = 4096; 488 } 489 490 #endif // NVCPU_IS_RISCV64 491 492 493 /***************************************************************************/ 494 495 496 #if NVCPU_IS_X86 || NVCPU_IS_X86_64 497 498 #include "platform/cpu.h" 499 500 #if defined(_M_IX86) || defined(NVCPU_X86) || defined(AMD64) || defined(NVCPU_X86_64) 501 502 // bits returned in EDX register by CPUID instruction with EAX=1 503 #define CPU_STD_TSC NVBIT(4) 504 #define CPU_STD_CMOV NVBIT(15) // Supports conditional move instructions. 505 #define CPU_STD_CLFSH NVBIT(19) // Supports CLFLUSH instruction. 506 #define CPU_STD_MMX NVBIT(23) 507 #define CPU_STD_FXSR NVBIT(24) // Indicates CR4.OSFXSR is available. 508 #define CPU_STD_SSE NVBIT(25) // Katmai 509 #define CPU_STD_SSE2 NVBIT(26) // Willamette NI 510 511 // bits returned in ECX register by CPUID instruction with EAX=1 512 #define CPU_STD2_SSE3 NVBIT(0) 513 #define CPU_STD2_SSE41 NVBIT(19) 514 #define CPU_STD2_SSE42 NVBIT(20) 515 #define CPU_STD2_OSXSAVE NVBIT(27) // Indicates the OS supports XSAVE/XRESTOR 516 #define CPU_STD2_AVX NVBIT(28) 517 518 // "Extended Feature Flags" - bits returned in EDX register by CPUID 519 // instruction with EAX=0x80000001 520 #define CPU_EXT_3DNOW NVBIT(31) // 3DNow 521 #define CPU_EXT_AMD_3DNOW_EXT NVBIT(30) // 3DNow, with Extensions (AMD specific) 522 #define CPU_EXT_AMD_MMX_EXT NVBIT(22) // MMX, with Extensions (AMD specific) 523 524 // "Structured Extended Feature Identifiers" - bits returned in EBX 525 // register by CPUID instruction with EAX=7 526 #define CPU_EXT2_ERMS NVBIT(9) 527 528 /* 529 * Identify chip foundry. 530 * IS_INTEL = "GenuineIntel" 531 * IS_AMD = "AuthenticAMD" 532 * IS_WINCHIP = "CentaurHauls" 533 * IS_CYRIX = "CyrixInstead" 534 * IS_TRANSM = "GenuineTMx86" // Transmeta 535 */ 536 #define IS_INTEL(fndry) (((fndry).StrID[0]==0x756E6547)&&((fndry).StrID[1]==0x49656E69)&&((fndry).StrID[2]==0x6C65746E)) 537 #define IS_AMD(fndry) (((fndry).StrID[0]==0x68747541)&&((fndry).StrID[1]==0x69746E65)&&((fndry).StrID[2]==0x444D4163)) 538 #define IS_WINCHIP(fndry) (((fndry).StrID[0]==0x746E6543)&&((fndry).StrID[1]==0x48727561)&&((fndry).StrID[2]==0x736C7561)) 539 #define IS_CYRIX(fndry) (((fndry).StrID[0]==0x69727943)&&((fndry).StrID[1]==0x736E4978)&&((fndry).StrID[2]==0x64616574)) 540 #define IS_TRANSM(fndry) (((fndry).StrID[0]==0x756E6547)&&((fndry).StrID[1]==0x54656E69)&&((fndry).StrID[2]==0x3638784D)) 541 542 // CPUID Info 543 // Used internally in this source. 544 545 typedef struct _def_CPUID_info 546 { 547 union 548 { 549 NvU8 String[12]; 550 NvU32 StrID[3]; 551 } Foundry; 552 553 NvU32 StandardFeatures; 554 NvU32 ExtendedFeatures; 555 556 NvU16 Family; 557 NvU16 ExtFamily; 558 NvU16 DisplayedFamily; 559 NvU8 Model; 560 NvU8 ExtModel; 561 NvU8 DisplayedModel; 562 NvU8 Stepping; 563 NvU32 BrandId; 564 } CPUIDINFO, *PCPUIDINFO; 565 566 // Forward refernces. 567 // 568 569 static void getCpuCounts(OBJSYS *pSys, PCPUIDINFO pCpuidInfo); 570 static NvBool getEmbeddedProcessorName(char *pName, NvU32 size); 571 static void cpuidInfoAMD(OBJSYS *pSys, PCPUIDINFO pCpuidInfo); 572 static void cpuidInfoIntel(OBJSYS *pSys, PCPUIDINFO pCpuidInfo); 573 574 #if defined(_M_IX86) || defined(NVCPU_X86) 575 static void cpuidInfoWinChip(OBJSYS *pSys, PCPUIDINFO pCpuidInfo); 576 static void cpuidInfoCyrix(OBJSYS *pSys, PCPUIDINFO pCpuidInfo); 577 static void cpuidInfoTransmeta(OBJSYS *pSys, PCPUIDINFO pCpuidInfo); 578 #endif 579 580 581 /* 582 * ID the CPU. 583 */ 584 585 void RmInitCpuInfo(void) 586 { 587 OBJSYS *pSys = SYS_GET_INSTANCE(); 588 CPUIDINFO cpuinfo; 589 NvU32 eax, ebx, ecx, edx; 590 OBJOS *pOS = SYS_GET_OS(pSys); 591 592 // Do this only once. 593 if (pSys->cpuInfo.bInitialized) 594 return; 595 596 // Initialize the processor structure to default values. 597 // 598 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN; 599 pSys->cpuInfo.caps = 0; 600 pSys->cpuInfo.clock = 0; 601 pSys->cpuInfo.dataCacheLineSize = 0; 602 pSys->cpuInfo.l1DataCacheSize = 0; 603 pSys->cpuInfo.l2DataCacheSize = 0; 604 pSys->cpuInfo.coresOnDie = 0; 605 pSys->cpuInfo.platformID = 0; 606 portMemSet(pSys->cpuInfo.name, 0, sizeof(pSys->cpuInfo.name)); 607 608 // Init internal structure to default. 609 // 610 portMemSet(&cpuinfo, 0, sizeof(cpuinfo)); 611 612 // Get CPUID stuff for all processors. We will figure out what to do with it later. 613 614 // if pOS->osNv_cpuid returns 0, then this cpu does not support cpuid instruction 615 // We just worry about this on the first call... 616 if ( ! pOS->osNv_cpuid(pOS, 0, 0, &eax, &cpuinfo.Foundry.StrID[0], 617 &cpuinfo.Foundry.StrID[2], &cpuinfo.Foundry.StrID[1])) 618 goto Exit; 619 620 pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx); 621 cpuinfo.Family = (NvU16)((eax >> 8) & 0x0F); 622 cpuinfo.ExtFamily = (NvU16)((eax >> 20) & 0xFF); 623 if (cpuinfo.Family != 0xF) 624 { 625 cpuinfo.DisplayedFamily = cpuinfo.Family; 626 } 627 else 628 { 629 cpuinfo.DisplayedFamily = cpuinfo.ExtFamily + cpuinfo.Family; 630 } 631 632 cpuinfo.Model = (NvU8)((eax >> 4) & 0x0F); 633 cpuinfo.ExtModel = (NvU8)((eax >> 16) & 0x0F); 634 if (cpuinfo.Family == 6 || cpuinfo.Family == 0xF) 635 { 636 cpuinfo.DisplayedModel = (cpuinfo.ExtModel << 4) + cpuinfo.Model; 637 } 638 else 639 { 640 cpuinfo.DisplayedModel = cpuinfo.Model; 641 } 642 643 cpuinfo.Stepping = (NvU8)(eax & 0x0F); 644 cpuinfo.StandardFeatures = edx; 645 cpuinfo.BrandId = ((ebx & 0xE0) << 3) | (ebx & 0x1F); // 8bit brandID in 12 bit format 646 647 // Decode the standard features. Assume that all CPU vendors use the 648 // standard feature bits to mean the same thing. Non-Intel vendors use 649 // the extended CPUID to provide non-standard freture bits, so this 650 // should be OK. 651 652 if (cpuinfo.StandardFeatures & CPU_STD_MMX) 653 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_MMX; 654 655 if (cpuinfo.StandardFeatures & CPU_STD_CMOV) 656 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_CMOV; 657 658 if (cpuinfo.StandardFeatures & CPU_STD_CLFSH) 659 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_CLFLUSH; 660 661 // Check for Streaming SIMD extensions (Katmai) 662 if (cpuinfo.StandardFeatures & CPU_STD_SSE) 663 { 664 665 // SFENCE is an SSE instruction, but it does not require CR4.OSFXSR. 666 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE; 667 668 if (cpuinfo.StandardFeatures & CPU_STD_FXSR) 669 { 670 NvBool check_osfxsr; 671 NvBool check_osxsave; 672 // Before setting the NV0000_CTRL_SYSTEM_CPU_CAP_SSE bit, we'll 673 // also check that CR4.OSFXSR (bit 9) is set, which means the OS 674 // is prepared to switch the additional SSE FP state for us. 675 // CPU_STD_FXSR indicates that CR4.OSFXSR is valid. 676 check_osfxsr = ((cpuinfo.StandardFeatures & CPU_STD_FXSR) != 0) && 677 ((pOS->osNv_rdcr4(pOS) & 0x200) != 0); 678 679 // For NV0000_CTRL_SYSTEM_CPU_CAP_AVX bit, we need: 680 // - CPU_STD2_OSXSAVE - CR4.OSXSAVE is valid 681 // - CR4.OSXSAVE (bit 18) - The OS will the additional FP state 682 // specified by XCR0 683 // - XCR0 - bits 1 and 2 indicate SSE and AVX support respectively 684 check_osxsave = ((ecx & CPU_STD2_OSXSAVE) != 0) && 685 ((pOS->osNv_rdcr4(pOS) & (1<<18)) != 0) && 686 ((pOS->osNv_rdxcr0(pOS) & 0x6) != 0); 687 if(check_osfxsr) 688 { 689 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE; 690 691 // supports SSE2 (Willamette NI) instructions 692 if (cpuinfo.StandardFeatures & CPU_STD_SSE2) 693 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE2; 694 695 // Prescott New Instructions 696 if (ecx & CPU_STD2_SSE3) 697 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE3; 698 699 // Penryn subset of SSE4 700 if (ecx & CPU_STD2_SSE41) 701 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE41; 702 703 // Nehalem subset of SSE4 704 if (ecx & CPU_STD2_SSE42) 705 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE42; 706 } 707 708 // If the OS setup XSAVE / XRESTOR (and set the AVX bit) 709 // enable AVX 710 if (check_osxsave) 711 { 712 if (ecx & CPU_STD2_AVX) 713 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_AVX; 714 } 715 } 716 } 717 718 if (pOS->osNv_cpuid(pOS, 7, 0, &eax, &ebx, &ecx, &edx)) 719 { 720 if (ebx & CPU_EXT2_ERMS) 721 { 722 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_ERMS; 723 } 724 } 725 726 // Calculate the frequency 727 if (cpuinfo.StandardFeatures & CPU_STD_TSC) 728 pSys->cpuInfo.clock = osGetCpuFrequency(); 729 730 // Get the extended features (if they exist). 731 if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx) && eax >= 0x80000001) 732 { 733 if (pOS->osNv_cpuid(pOS, 0x80000001, 0, &eax, &ebx, &ecx, &edx)) 734 { 735 cpuinfo.ExtendedFeatures = edx; 736 // if 8 bit brandId is 0 737 if (!cpuinfo.BrandId) 738 { 739 // Check for 12 bit brand ID 740 cpuinfo.BrandId = (ebx & 0xfff); 741 } 742 } 743 } 744 745 // Get the embedded processor name (if there is one). 746 getEmbeddedProcessorName(pSys->cpuInfo.name, sizeof(pSys->cpuInfo.name)); 747 748 if (IS_INTEL(cpuinfo.Foundry)) 749 cpuidInfoIntel(pSys, &cpuinfo); 750 else if (IS_AMD(cpuinfo.Foundry)) 751 cpuidInfoAMD(pSys, &cpuinfo); 752 #if defined(_M_IX86) || defined(NVCPU_X86) 753 else if (IS_WINCHIP(cpuinfo.Foundry)) 754 cpuidInfoWinChip(pSys, &cpuinfo); 755 else if (IS_CYRIX(cpuinfo.Foundry)) 756 cpuidInfoCyrix(pSys, &cpuinfo); 757 else if (IS_TRANSM(cpuinfo.Foundry)) 758 cpuidInfoTransmeta(pSys, &cpuinfo); 759 #endif 760 else 761 { 762 // We are clueless. If the processor had an embedded name, its already in there. 763 // If not, use the foundary name as the processor name. 764 if (pSys->cpuInfo.name[0] == 0) 765 portMemCopy(pSys->cpuInfo.name, sizeof(cpuinfo.Foundry.String), cpuinfo.Foundry.String, sizeof(cpuinfo.Foundry.String)); 766 } 767 768 // Pick up the vendor-specific family & model 769 pSys->cpuInfo.family = cpuinfo.DisplayedFamily; 770 pSys->cpuInfo.model = cpuinfo.DisplayedModel; 771 772 #if defined(AMD64) || defined(NVCPU_X86_64) 773 // The WinXP AMD-64 does not context switch the x87/MMX/3DNow registers. We have to zap the bits 774 // even though the CPU supports them. 775 // The OS should somehow tell us this, like CR4.OSFXSR above. Need to find a better way... 776 777 pSys->cpuInfo.caps &= ~(NV0000_CTRL_SYSTEM_CPU_CAP_MMX | 778 NV0000_CTRL_SYSTEM_CPU_CAP_MMX_EXT | 779 NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW | 780 NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW_EXT); 781 #endif 782 783 pSys->cpuInfo.stepping = cpuinfo.Stepping; 784 pSys->cpuInfo.brandId = cpuinfo.BrandId; 785 786 Exit: 787 788 // set physical/logical processor counts 789 getCpuCounts(pSys, &cpuinfo); 790 791 // host page size used when allocated host-page-aligned objects in heap 792 #ifdef PAGE_SIZE 793 pSys->cpuInfo.hostPageSize = PAGE_SIZE; 794 #else 795 pSys->cpuInfo.hostPageSize = 4096; 796 #endif 797 798 pSys->cpuInfo.bInitialized = NV_TRUE; 799 } 800 801 // 802 // This routine determines the number of physical processors enabled 803 // on the system as well as the number of logical processors per 804 // physical processors. Intel's HyperThreading technology can yield 805 // a logical processor count of > 1 per physical processor. 806 // 807 // This code was more or less lifted from some Intel sample code. 808 // 809 810 #define INTEL_HT_BIT 0x10000000 // EDX[28] 811 #define INTEL_CORE_CNT 0xFC000000 // EAX[31:26] 812 #define INTEL_LOGICAL_CNT 0x00FF0000 // EBX[23:16] 813 #define INTEL_LOGICAL_CNT_LEAFB 0x0000FFFF // EBX[15:0] 814 #define AMD_HT_BIT 0x10000000 // EDX[28] 815 #define AMD_LOGICAL_CNT 0x00FF0000 // EBX[23:16] 816 817 static void 818 getCpuCounts(OBJSYS *pSys, PCPUIDINFO pCpuidInfo) 819 { 820 OBJOS *pOS = SYS_GET_OS(pSys); 821 NvU32 numPhysicalCpus, numLogicalCpus, maxLogicalCpus; 822 NvU32 eax = 0; 823 NvU32 ebx = 0; 824 NvU32 ecx = 0; 825 NvU32 edx = 0; 826 827 // 828 // First use OS call to get number of logical CPUs. 829 // 830 numLogicalCpus = osGetCpuCount(); 831 832 // 833 // Assume the number of physical CPUs is the same as the number of logical CPUs. 834 // 835 numPhysicalCpus = numLogicalCpus; 836 maxLogicalCpus = numLogicalCpus; 837 838 // There is no reliable way to tell if hyper-threading is enabled. So, if 839 // there is more than 1 logical CPUs AND the CPU is hyperthreading capable, 840 // then assume that HT is enabled. 841 // 842 // This should give the right answer for most cases. Some HT capable dual 843 // CPU systems with HT disabled will be detected as single GPU systems with 844 // HT enabled. While less than ideal, this should be OK, since logical CPUs 845 // is 2 in both cases. 846 // 847 #if defined(_M_IX86) || defined(NVCPU_X86) || defined(NVCPU_X86_64) 848 if (IS_INTEL(pCpuidInfo->Foundry)) 849 { 850 NvBool cpuHasLeafB = NV_FALSE; 851 852 pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx); 853 if (eax >= 0xB) 854 { 855 pOS->osNv_cpuid(pOS, 0xB, 0, &eax, &ebx, &ecx, &edx); 856 if (ebx != 0) 857 { 858 cpuHasLeafB = NV_TRUE; 859 } 860 } 861 862 pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx); 863 864 if (edx & INTEL_HT_BIT) 865 { 866 NvU32 CpuHT; 867 868 if (cpuHasLeafB) 869 { 870 pOS->osNv_cpuid(pOS, 0xB, 0, &eax, &ebx, &ecx, &edx); 871 CpuHT = (ebx & INTEL_LOGICAL_CNT_LEAFB); 872 pOS->osNv_cpuid(pOS, 0xB, 1, &eax, &ebx, &ecx, &edx); 873 maxLogicalCpus = (ebx & INTEL_LOGICAL_CNT_LEAFB); 874 numPhysicalCpus = maxLogicalCpus/CpuHT; 875 } 876 else 877 { 878 pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx); 879 if (eax >=4) 880 { 881 pOS->osNv_cpuid(pOS, 4, 0, &eax, &ebx, &ecx, &edx); 882 numPhysicalCpus = ((eax & INTEL_CORE_CNT) >> 26) + 1; 883 pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx); 884 maxLogicalCpus = (ebx & INTEL_LOGICAL_CNT) >> 16; 885 CpuHT = maxLogicalCpus/numPhysicalCpus; 886 } 887 } 888 889 if (numPhysicalCpus > numLogicalCpus) 890 numPhysicalCpus = numLogicalCpus; 891 892 if (numPhysicalCpus < 1) 893 numPhysicalCpus = 1; 894 895 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_HT_CAPABLE; 896 } 897 } 898 else if(IS_AMD(pCpuidInfo->Foundry)) 899 { 900 pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx); 901 if( edx & AMD_HT_BIT ) 902 { 903 maxLogicalCpus = (ebx & AMD_LOGICAL_CNT) >> 16; 904 } 905 } 906 907 NV_PRINTF(LEVEL_INFO, "RmInitCpuCounts: physical 0x%x logical 0x%x\n", 908 numPhysicalCpus, numLogicalCpus); 909 #endif 910 911 if(maxLogicalCpus < numLogicalCpus) 912 maxLogicalCpus = numLogicalCpus; 913 914 #if NVCPU_IS_FAMILY_X86 915 // bug1974464: Ryzen physical CPU count is getting misreported 916 if (IS_AMD(pCpuidInfo->Foundry) && (pCpuidInfo->DisplayedFamily == 0x17)) 917 { 918 numPhysicalCpus = NV_MAX(maxLogicalCpus/2, 1); 919 } 920 #endif 921 922 pSys->cpuInfo.numPhysicalCpus = numPhysicalCpus; 923 pSys->cpuInfo.numLogicalCpus = numLogicalCpus; 924 pSys->cpuInfo.maxLogicalCpus = maxLogicalCpus; 925 926 return; 927 } 928 929 930 // getEmbeddedProcessorName 931 // 932 // All processors that have extended CPUID info up through 0x80000004 have an embedded name. 933 // 934 static NvBool getEmbeddedProcessorName(char *pName, NvU32 size) 935 { 936 NvU32 op, eax, ebx, ecx, edx; 937 char *p = pName; 938 OBJSYS *pSys = SYS_GET_INSTANCE(); 939 OBJOS *pOS = SYS_GET_OS(pSys); 940 const NvU32 maxSize = 48; // max 48 bytes on x86 CPUs 941 942 NV_ASSERT_OR_RETURN(size >= maxSize, 0); 943 944 pName[size > maxSize ? maxSize : size-1] = 0; // Make sure it has a zero at the end. 945 946 // Is there is a enough data? If not bail. 947 if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx) == 0 || eax < 0x80000004) 948 return NV_FALSE; 949 950 // Yes, get 48 bytes of CPU name. 951 for (op = 0x80000002; op < 0x80000005; op++, p += 16) 952 pOS->osNv_cpuid(pOS, op, 0, (NvU32 *)&p[0], (NvU32 *)&p[4], (NvU32 *)&p[8], (NvU32 *)&p[12]); 953 954 // Kill leading spaces. (Intel's string is right justified.) 955 if (*pName == ' ') 956 { 957 p = pName; 958 while (*p == ' ') 959 p++; 960 do 961 *(pName++) = *(p++); 962 while (*p); 963 } 964 965 return NV_TRUE; 966 } 967 968 969 // Decode Prescott style cache descriptors. 970 // 971 static NvBool DecodePrescottCache(OBJSYS *pSys) 972 { 973 NvU32 eax, ebx, ecx, edx; 974 OBJOS *pOS = SYS_GET_OS(pSys); 975 976 // Decode the cache desciptors. 977 978 if (pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx)) 979 { 980 if (eax >= 4 && eax < 0x80000000) // CPU support new (Prescott) cache descrtiptors? 981 { 982 // From Prescot New Instructions Software Developers Guide 252490-003 983 984 NvU32 uLevel; 985 NvU32 uLineSize; 986 NvU32 uCacheSize; 987 int i; 988 989 // Loop over the cache descriptors by incrementing sub-function. This will never get 990 // get run on pre-Prescott CPUs since they do not support CPUID 4, but limit number of 991 // cache descriptors to 20 just in case, so it does not get in an infinite loop. 992 // 993 for (i = 0; i < 20; i++) 994 { 995 pOS->osNv_cpuid(pOS, 4, i, &eax, &ebx, &ecx, &edx); 996 997 if (i == 0) 998 { 999 pSys->cpuInfo.coresOnDie = (eax >> 26) + 1;// eax[31:26] Processor cores on the chip 1000 } 1001 1002 switch (eax & 0x1f) // Cache type. 1003 { 1004 case 0: // No more cache descriptors. 1005 i = 100; // Break out of loop. 1006 break; 1007 1008 case 1: // Data cache. 1009 case 3: // Unified cache. 1010 uLevel = (eax >> 5) & 0x7; // eax[7:5] Cache level 1011 uLineSize = (ebx & 0xfff) + 1; // ebx[11:0] System Coherency Line Size 1012 1013 uCacheSize = uLineSize // ebx[11:0] System Coherency Line Size 1014 * (((ebx >> 12) & 0x3FF) + 1) // ebx[21:12] Physical line partitions 1015 * (((ebx >> 22) & 0x3FF) + 1) // ebx[21:12] Ways of associativity 1016 * (ecx + 1) // ecx[31:0] Number of sets 1017 / 1024; // Put it in KB. 1018 1019 pSys->cpuInfo.dataCacheLineSize = uLineSize; 1020 1021 if (uLevel == 1) 1022 pSys->cpuInfo.l1DataCacheSize = uCacheSize; 1023 else if (pSys->cpuInfo.l2DataCacheSize < uCacheSize) 1024 pSys->cpuInfo.l2DataCacheSize = uCacheSize; 1025 break; 1026 1027 default: // Instruction of unknown cache type. 1028 break; // Do nothing. 1029 } 1030 } 1031 1032 return NV_TRUE; 1033 } 1034 } 1035 1036 return NV_FALSE; 1037 } 1038 1039 #if defined(_M_IX86) || defined(NVCPU_X86) 1040 static void DecodeIntelCacheEntry(OBJSYS *pSys, NvU8 cacheEntry) 1041 { 1042 // From Intel's AP-485 (11/03). 1043 // 1044 // 00h Null 1045 // 01h Instruction TLB: 4K-byte Pages, 4-way set associative, 32 entries 1046 // 02h Instruction TLB: 4M-byte Pages, fully associative, 2 entries 1047 // 03h Data TLB: 4K-byte Pages, 4-way set associative, 64 entries 1048 // 04h Data TLB: 4M-byte Pages, 4-way set associative, 8 entries 1049 // 06h 1st-level instruction cache: 8K-bytes, 4-way set associative, 32 byte line size 1050 // 08h 1st-level instruction cache: 16K-bytes, 4-way set associative, 32 byte line size 1051 // 0Ah 1st-level data cache: 8K-bytes, 2-way set associative, 32 byte line size 1052 // 0Ch 1st-level data cache: 16K-bytes, 4-way set associative, 32 byte line size 1053 // 22h 3rd-level cache: 512K-bytes, 4-way set associative, sectored cache, 64-byte line size 1054 // 23h 3rd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size 1055 // 25h 3rd-level cache: 2MB, 8-way set associative, sectored cache, 64-byte line size 1056 // 29h 3rd-level cache: 4MB, 8-way set associative, sectored cache, 64-byte line size 1057 // 2Ch 1st-level data cache: 32K-bytes, 8-way set associative, 64-byte line size 1058 // 30h 1st-level instruction cache: 32K-bytes, 8-way set associative, 64-byte line size 1059 // 39h 2nd-level cache: 128K-bytes, 4-way set associative, sectored cache, 64-byte line size 1060 // 3Bh 2nd-level cache: 128KB, 2-way set associative, sectored cache, 64-byte line size 1061 // 3Ch 2nd-level cache: 256K-bytes, 4-way set associative, sectored cache, 64-byte line size 1062 // 40h No 2nd-level cache or, if processor contains a valid 2nd-level cache, no3rd-level cache 1063 // 41h 2nd-level cache: 128K-bytes, 4-way set associative, 32 byte line size 1064 // 42h 2nd-level cache: 256K-bytes, 4-way set associative, 32 byte line size 1065 // 43h 2nd-level cache: 512K-bytes, 4-way set associative, 32 byte line size 1066 // 44h 2nd-level cache: 1M-bytes, 4-way set associative, 32 byte line size 1067 // 45h 2nd-level cache: 2M-bytes, 4-way set associative, 32 byte line size 1068 // 50h Instruction TLB: 4K, 2M or 4M pages, fully associative, 64 entries 1069 // 51h Instruction TLB: 4K, 2M or 4M pages, fully associative, 128 entries 1070 // 52h Instruction TLB: 4K, 2M or 4M pages, fully associative, 256 entries 1071 // 5Bh Data TLB: 4K or 4M pages, fully associative, 64 entries 1072 // 5Ch Data TLB: 4K or 4M pages, fully associative, 128 entries 1073 // 5Dh Data TLB: 4K or 4M pages, fully associative, 256 entries 1074 // 66h 1st-level data cache: 8K-bytes, 4-way set associative, sectored cache, 64-byte line size 1075 // 67h 1st-level data cache: 16K-bytes, 4-way set associative, sectored cache, 64-byte line size 1076 // 68h 1st-level data cache: 32K-bytes, 4 way set associative, sectored cache, 64-byte line size 1077 // 70h Trace cache: 12K-uops, 8-way set associative 1078 // 71h Trace cache: 16K-uops, 8-way set associative 1079 // 72h Trace cache: 32K-uops, 8-way set associative 1080 // 79h 2nd-level cache: 128K-bytes, 8-way set associative, sectored cache, 64-byte line size 1081 // 7Ah 2nd-level cache: 256K-bytes, 8-way set associative, sectored cache, 64-byte line size 1082 // 7Bh 2nd-level cache: 512K-bytes, 8-way set associative, sectored cache, 64-byte line size 1083 // 7Ch 2nd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size 1084 // 82h 2nd-level cache: 256K-bytes, 8-way set associative, 32 byte line size 1085 // 83h 2nd-level cache: 512K-bytes, 8-way set associative, 32 byte line size 1086 // 84h 2nd-level cache: 1M-bytes, 8-way set associative, 32 byte line size 1087 // 85h 2nd-level cache: 2M-bytes, 8-way set associative, 32 byte line size 1088 // 86h 2nd-level cache: 512K-bytes, 4-way set associative, 64 byte line size 1089 // 87h 2nd-level cache: 1M-bytes, 8-way set associative, 64 byte line size 1090 // B0h Instruction TLB: 4K-byte Pages, 4-way set associative, 128 entries 1091 // B3h Data TLB: 4K-byte Pages, 4-way set associative, 128 entries 1092 // 1093 // From Intel via Michael Diamond (under NDA): 1094 // Fixes bug 75982 - Reporting incorrect cache info on Banias mobile platform. 1095 // 1096 // 7D 2M; 8 way; 64 byte line size; unified on-die 1097 // 78 1M; 8 way; 64 byte line size, unified on-die 1098 // 1099 // Note: Newer GPUs have added an additional cache level. What used to be L2 is 1100 // now L3. Set the L2 cache to the largest L2 or L3 descriptor found. 1101 1102 switch (cacheEntry) 1103 { 1104 case 0x0A: // 1st-level data cache: 8K-bytes, 2-way set associative, 32 byte line size 1105 pSys->cpuInfo.l1DataCacheSize = 8; 1106 pSys->cpuInfo.dataCacheLineSize = 32; 1107 break; 1108 1109 case 0x0C: // 1st-level data cache: 16K-bytes, 4-way set associative, 32 byte line size 1110 pSys->cpuInfo.l1DataCacheSize = 16; 1111 pSys->cpuInfo.dataCacheLineSize = 32; 1112 break; 1113 1114 case 0x66: // 1st-level data cache: 8K-bytes, 4-way set associative, sectored cache, 64-byte line size 1115 pSys->cpuInfo.l1DataCacheSize = 8; 1116 pSys->cpuInfo.dataCacheLineSize = 64; 1117 break; 1118 1119 case 0x67: // 1st-level data cache: 16K-bytes, 4-way set associative, sectored cache, 64-byte line size 1120 pSys->cpuInfo.l1DataCacheSize = 16; 1121 pSys->cpuInfo.dataCacheLineSize = 64; 1122 break; 1123 1124 case 0x2C: // 1st-level data cache: 32K-bytes, 8-way set associative, 64-byte line size 1125 case 0x68: // 1st-level data cache: 32K-bytes, 4 way set associative, sectored cache, 64-byte line size 1126 pSys->cpuInfo.l1DataCacheSize = 32; 1127 pSys->cpuInfo.dataCacheLineSize = 64; 1128 break; 1129 1130 case 0x41: // 2nd-level cache: 128K-bytes, 4-way set associative, 32 byte line size 1131 pSys->cpuInfo.dataCacheLineSize = 32; 1132 if (pSys->cpuInfo.l2DataCacheSize < 128) 1133 pSys->cpuInfo.l2DataCacheSize = 128; 1134 break; 1135 1136 case 0x39: // 2nd-level cache: 128K-bytes, 4-way set associative, sectored cache, 64-byte line size 1137 case 0x3B: // 2nd-level cache: 128KB, 2-way set associative, sectored cache, 64-byte line size 1138 case 0x79: // 2nd-level cache: 128K-bytes, 8-way set associative, sectored cache, 64-byte line size 1139 pSys->cpuInfo.dataCacheLineSize = 64; 1140 if (pSys->cpuInfo.l2DataCacheSize < 128) 1141 pSys->cpuInfo.l2DataCacheSize = 128; 1142 break; 1143 1144 case 0x42: // 2nd-level cache: 256K-bytes, 4-way set associative, 32 byte line size 1145 case 0x82: // 2nd-level cache: 256K-bytes, 8-way set associative, 32 byte line size 1146 pSys->cpuInfo.dataCacheLineSize = 32; 1147 if (pSys->cpuInfo.l2DataCacheSize < 256) 1148 pSys->cpuInfo.l2DataCacheSize = 256; 1149 break; 1150 1151 case 0x3C: // 2nd-level cache: 256K-bytes, 4-way set associative, sectored cache, 64-byte line size 1152 case 0x7A: // 2nd-level cache: 256K-bytes, 8-way set associative, sectored cache, 64-byte line size 1153 pSys->cpuInfo.dataCacheLineSize = 64; 1154 if (pSys->cpuInfo.l2DataCacheSize < 256) 1155 pSys->cpuInfo.l2DataCacheSize = 256; 1156 break; 1157 1158 case 0x43: // 2nd-level cache: 512K-bytes, 4-way set associative, 32 byte line size 1159 case 0x83: // 2nd-level cache: 512K-bytes, 8-way set associative, 32 byte line size 1160 pSys->cpuInfo.dataCacheLineSize = 32; 1161 if (pSys->cpuInfo.l2DataCacheSize < 512) 1162 pSys->cpuInfo.l2DataCacheSize = 512; 1163 break; 1164 1165 case 0x22: // 3rd-level cache: 512K-bytes, 4-way set associative, sectored cache, 64-byte line size 1166 case 0x7B: // 2nd-level cache: 512K-bytes, 8-way set associative, sectored cache, 64-byte line size 1167 case 0x86: // 2nd-level cache: 512K-bytes, 4-way set associative, 64 byte line size 1168 pSys->cpuInfo.dataCacheLineSize = 64; 1169 if (pSys->cpuInfo.l2DataCacheSize < 512) 1170 pSys->cpuInfo.l2DataCacheSize = 512; 1171 break; 1172 1173 case 0x44: // 2nd-level cache: 1M-bytes, 4-way set associative, 32 byte line size 1174 case 0x84: // 2nd-level cache: 1M-bytes, 8-way set associative, 32 byte line size 1175 pSys->cpuInfo.dataCacheLineSize = 32; 1176 if (pSys->cpuInfo.l2DataCacheSize < 1024) 1177 pSys->cpuInfo.l2DataCacheSize = 1024; 1178 break; 1179 1180 case 0x23: // 3rd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size 1181 case 0x78: // 1M; 8 way; 64 byte line size, unified on-die 1182 case 0x7C: // 2nd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size 1183 case 0x87: // 2nd-level cache: 1M-bytes, 8-way set associative, 64 byte line size 1184 pSys->cpuInfo.dataCacheLineSize = 64; 1185 if (pSys->cpuInfo.l2DataCacheSize < 1024) 1186 pSys->cpuInfo.l2DataCacheSize = 1024; 1187 break; 1188 1189 case 0x45: // 2nd-level cache: 2M-bytes, 4-way set associative, 32 byte line size 1190 case 0x85: // 2nd-level cache: 2M-bytes, 8-way set associative, 32 byte line size 1191 pSys->cpuInfo.dataCacheLineSize = 32; 1192 if (pSys->cpuInfo.l2DataCacheSize < 2048) 1193 pSys->cpuInfo.l2DataCacheSize = 2048; 1194 break; 1195 1196 case 0x25: // 3rd-level cache: 2MB, 8-way set associative, sectored cache, 64-byte line size 1197 case 0x7D: // 2M; 8 way; 64 byte line size; unified on-die 1198 pSys->cpuInfo.dataCacheLineSize = 64; 1199 if (pSys->cpuInfo.l2DataCacheSize < 2048) 1200 pSys->cpuInfo.l2DataCacheSize = 2048; 1201 break; 1202 1203 case 0x29: // 3rd-level cache: 4MB, 8-way set associative, sectored cache, 64-byte line size 1204 pSys->cpuInfo.dataCacheLineSize = 64; 1205 if (pSys->cpuInfo.l2DataCacheSize < 4096) 1206 pSys->cpuInfo.l2DataCacheSize = 4096; 1207 break; 1208 } 1209 } 1210 1211 static void DecodeIntelCacheRegister(OBJSYS *pSys, NvU32 cacheRegister /* punny, huh? */) 1212 { 1213 if ((cacheRegister & NVBIT(31)) == 0) // If bit 31 is set, it is reserved. 1214 { 1215 DecodeIntelCacheEntry(pSys, (NvU8)(cacheRegister >> 24)); 1216 DecodeIntelCacheEntry(pSys, (NvU8)(cacheRegister >> 16)); 1217 DecodeIntelCacheEntry(pSys, (NvU8)(cacheRegister >> 8)); 1218 DecodeIntelCacheEntry(pSys, (NvU8)cacheRegister); 1219 } 1220 } 1221 #endif 1222 1223 static void cpuidInfoIntel(OBJSYS *pSys, PCPUIDINFO pCpuidInfo) 1224 { 1225 NvU32 eax, ebx, ecx, edx; 1226 OBJOS *pOS = SYS_GET_OS(pSys); 1227 1228 if (pCpuidInfo->Family == 5) 1229 { 1230 if (pCpuidInfo->Model == 4) 1231 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P55; 1232 else 1233 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P5; 1234 } 1235 else if (pCpuidInfo->Family == 6) 1236 { 1237 switch (pCpuidInfo->DisplayedModel) 1238 { 1239 case 1: // Pentium Pro 1240 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P6; 1241 break; 1242 1243 case 3: // Pentium II 1244 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P2; 1245 break; 1246 1247 case 5: // Pentium II, Pentium II Xeon, or Celeron 1248 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P2XC; 1249 break; 1250 1251 case 6: // Pentium II Celeron-A 1252 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CELA; 1253 break; 1254 1255 case 7: // Pentium III or Pentium III Xeon (Katmai) 1256 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P3; 1257 break; 1258 1259 case 15: // Conroe, Core2 Duo 1260 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CORE2; 1261 break; 1262 1263 case 22: // Celeron model 16h (65nm) 1264 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CELN_M16H; 1265 break; 1266 1267 case 23: // Intel Core2 Extreme/Intel Xeon model 17h (45nm) 1268 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CORE2_EXTRM; 1269 break; 1270 1271 case 28: 1272 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ATOM; 1273 break; 1274 1275 case 8: // Pentium III, Pentium III Xeon, or Celeron (Coppermine, 0.18 micron) 1276 case 10: // Pentium III Xeon (Tualatin, 0.13 micron) 1277 case 11: // Pentium III, or Celeron (Tualatin, 0.13 micron) 1278 default: // If it is a new family 6, it is a Pentium III of some type. 1279 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P3_INTL2; 1280 break; 1281 } 1282 // Flag processors that may be affected by bug 124888. At this time, 1283 // we believe these are Pentium III and Pentium M processors. The 1284 // model numbers for these processors in Family 6 are: 1285 // 7 - Pentium III or Pentium III Xeon 1286 // 8 - Pentium III, Pentium III Xeon, or Celeron 1287 // 9 - Pentium M 1288 // 10 - Pentium III Xeon 1289 // 11 - Pentium III 1290 // 12 - ??? 1291 // 13 - Pentium M ("Dothan") 1292 // 14 - ??? 1293 // 15 - Core 2 (bug 272047) 1294 if (pCpuidInfo->Model >= 7) 1295 { 1296 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_NEEDS_WAR_124888; 1297 } 1298 } 1299 else if (pCpuidInfo->Family == 0x0F) 1300 { 1301 // Model 0 & 1 == Pentium 4 or Pentium 4 Xeon (Willamette, 423 or 478-pin packages, 0.18 micron) 1302 // Model 2 == Pentium 4 or Pentium 4 Xeon (Northwood, 478-pin package for brookdale, 0.13 micron) 1303 // 1304 // Be careful if you change this. Both D3D and OpenGL are enabling 1305 // performance options based on NV0000_CTRL_SYSTEM_CPU_TYPE_P4. 1306 // 1307 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P4; 1308 1309 // The first P4s (pre-Northwood ones) have a performance problem 1310 // when mixing write combined and cached writes. This is fixed 1311 // with model revision 2. 1312 if ((pCpuidInfo->Model == 0) || (pCpuidInfo->Model == 1)) 1313 { 1314 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_NEEDS_WC_WORKAROUND; 1315 } 1316 } 1317 1318 if (pCpuidInfo->Family == 0xF || (pCpuidInfo->Family == 6 && pCpuidInfo->Model >= 7)) 1319 { 1320 if (pOS->osNv_cpuid(pOS, 0x17, 0, &eax, &ebx, &ecx, &edx)) 1321 pSys->cpuInfo.platformID = (edx >> 18) & 7; // edx[20:18] PlatformID (package type) 1322 } 1323 1324 // Decode the cache desciptors. 1325 if (!DecodePrescottCache(pSys)) 1326 { 1327 #if defined(_M_IX86) || defined(NVCPU_X86) 1328 1329 // Prescott style cache descriptors are not supported. Fall back to older style. 1330 // 1331 if (pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx)) 1332 { 1333 if (eax >= 2) // CPU support old cache descrtiptors? 1334 { 1335 pOS->osNv_cpuid(pOS, 2, 0, &eax, &ebx, &ecx, &edx); 1336 1337 if ((eax & 0xff) == 1) // AL contains number of times CPU must be called. This will be 1 forever. 1338 { 1339 DecodeIntelCacheRegister(pSys, eax & 0xffffff00); 1340 DecodeIntelCacheRegister(pSys, ebx); 1341 DecodeIntelCacheRegister(pSys, ecx); 1342 DecodeIntelCacheRegister(pSys, edx); 1343 } 1344 } 1345 } 1346 #endif 1347 } 1348 } 1349 1350 static void cpuidInfoAMD(OBJSYS *pSys, PCPUIDINFO pCpuidInfo) 1351 { 1352 NvU32 eax = 0; 1353 NvU32 ebx = 0; 1354 NvU32 ecx = 0; 1355 NvU32 edx = 0; 1356 1357 OBJOS *pOS = SYS_GET_OS(pSys); 1358 NvU32 largestExtendedFunctionNumberSupported = 0x80000000; 1359 1360 if (pCpuidInfo->Family == 5) // K5, K6, K6-2 with 3DNow, K6-3 1361 { 1362 if (pCpuidInfo->Model < 6) 1363 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K5; 1364 else if (pCpuidInfo->Model < 8) 1365 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K6; 1366 else if (pCpuidInfo->Model == 8) 1367 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K62; 1368 else if (pCpuidInfo->Model == 9) 1369 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K63; 1370 } 1371 else if (pCpuidInfo->Family == 6) // K7 1372 { 1373 // Family 6 is a mixture of Athlon and Duron processors. Just set the 1374 // processor type to Athlon. The processor name will show the branding. 1375 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K7; 1376 } 1377 else if (pCpuidInfo->Family == 15) // K8 1378 { 1379 // If family is 15, we need to use AMD's extended family/model information. 1380 pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx); 1381 pCpuidInfo->Family = (NvU16)(((eax >> 8) & 0x0F) + ((eax >> 16) & 0xFF0)); // 27:20 concat 11:8 1382 pCpuidInfo->Model = (NvU8) (((eax >> 4) & 0x0F) + ((eax >> 12) & 0xF0)); // 19:16 concat 7:4 1383 1384 // Differentiate K8, K10, K11, RYZEN, etc 1385 switch( pCpuidInfo->Family & 0xFF0) 1386 { 1387 case 0x000: 1388 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K8; 1389 break; 1390 case 0x010: 1391 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K10; 1392 break; 1393 case 0x020: 1394 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K11; 1395 break; 1396 case 0x080: 1397 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_RYZEN; 1398 break; 1399 default: 1400 NV_PRINTF(LEVEL_ERROR, 1401 "Unrecognized AMD processor in cpuidInfoAMD\n"); 1402 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K8; 1403 break; 1404 } 1405 } 1406 1407 if (pCpuidInfo->ExtendedFeatures & CPU_EXT_3DNOW) 1408 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW; // 3DNow 1409 1410 if (pCpuidInfo->ExtendedFeatures & CPU_EXT_AMD_3DNOW_EXT) 1411 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW_EXT; // 3DNow, with Extensions (AMD specific) 1412 1413 if (pCpuidInfo->ExtendedFeatures & CPU_EXT_AMD_MMX_EXT) 1414 { 1415 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_MMX_EXT; // MMX, with Extensions (AMD specific) 1416 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE; 1417 } 1418 1419 // Get the cache info. 1420 if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx)) 1421 { 1422 largestExtendedFunctionNumberSupported = eax; 1423 1424 if (largestExtendedFunctionNumberSupported >= 0x80000006) 1425 { 1426 // L1 cache 1427 if (pOS->osNv_cpuid(pOS, 0x80000005, 0, &eax, &ebx, &ecx, &edx)) 1428 { 1429 pSys->cpuInfo.dataCacheLineSize = ecx & 0xff; 1430 pSys->cpuInfo.l1DataCacheSize = ecx >> 24; 1431 } 1432 1433 // L2 cache 1434 if (pOS->osNv_cpuid(pOS, 0x80000006, 0, &eax, &ebx, &ecx, &edx)) 1435 pSys->cpuInfo.l2DataCacheSize = ecx >> 16; 1436 } 1437 1438 // Get the SEV capability info 1439 if ((largestExtendedFunctionNumberSupported >= 0x8000001f) && 1440 pOS->osNv_cpuid(pOS, 0x8000001f, 0, &eax, &ebx, &ecx, &edx)) 1441 { 1442 // 1443 // EAX[1] stores capability info 1444 // ECX[31:0] stores # of encrypted guests supported simultaneously 1445 // 1446 if (eax & 0x2) 1447 { 1448 pSys->cpuInfo.bSEVCapable = NV_TRUE; 1449 pSys->cpuInfo.maxEncryptedGuests = ecx; 1450 } 1451 } 1452 } 1453 } 1454 1455 1456 #if defined(_M_IX86) || defined(NVCPU_X86) 1457 1458 static void cpuidInfoWinChip(OBJSYS *pSys, PCPUIDINFO pCpuidInfo) 1459 { 1460 if (pCpuidInfo->Family == 5) // Winchip C6, Winchip2 w/ 3DNow 1461 { 1462 if (pCpuidInfo->Model == 4) 1463 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_C6; 1464 if (pCpuidInfo->Model == 8) 1465 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_C62; 1466 } 1467 1468 if (pCpuidInfo->ExtendedFeatures & CPU_EXT_3DNOW) 1469 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW; 1470 } 1471 1472 static void cpuidInfoCyrix(OBJSYS *pSys, PCPUIDINFO pCpuidInfo) 1473 { 1474 if (pCpuidInfo->Family == 4) // MediaGX 1475 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_GX; 1476 if (pCpuidInfo->Family == 5) // Cyrix 6x86 or MediaGX w/ MMX 1477 { 1478 if (pCpuidInfo->Model == 2) 1479 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_M1; 1480 if (pCpuidInfo->Model == 4) 1481 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_MGX; 1482 } 1483 if (pCpuidInfo->Family == 6) // Cyrix MII 1484 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_M2; 1485 1486 if (pCpuidInfo->ExtendedFeatures & CPU_EXT_3DNOW) 1487 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW; 1488 } 1489 1490 static void cpuidInfoTransmeta(OBJSYS *pSys, PCPUIDINFO pCpuidInfo) 1491 { 1492 NvU32 eax, ebx, ecx, edx; 1493 OBJOS *pOS = SYS_GET_OS(pSys); 1494 1495 // 1496 // Transmeta allows the OEM to program the foundry, family, model, and stepping. Arrrrgh... 1497 // If this turns out to be a problem, we will need to use one of the extended CPUID calls to 1498 // get the real info. 1499 // 1500 1501 // Docs were not real clear on which family/model. Just assume it's a Crusoe 1502 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_TM_CRUSOE; 1503 1504 // 1505 // Get the cache info. From preliminary TM8000 programming and config guide, 2/19/03 1506 // This appears to match AMD's cache CPUID definitions. 1507 // 1508 if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx) && eax >= 0x80000006) 1509 { 1510 // L1 Cache 1511 if (pOS->osNv_cpuid(pOS, 0x80000005, 0, &eax, &ebx, &ecx, &edx)) 1512 { 1513 pSys->cpuInfo.dataCacheLineSize = ecx & 0xff; 1514 pSys->cpuInfo.l1DataCacheSize = ecx >> 24; 1515 } 1516 1517 // L2 Cache 1518 if (pOS->osNv_cpuid(pOS, 0x80000006, 0, &eax, &ebx, &ecx, &edx)) 1519 pSys->cpuInfo.l2DataCacheSize = ecx >> 16; 1520 } 1521 } 1522 1523 #endif // defined(_M_IX86) || defined(NVCPU_X86) 1524 1525 #endif // defined(_M_IX86) || defined(NVCPU_X86) || defined(AMD64) || defined(NVCPU_X86_64) 1526 1527 #endif // NVCPU_IS_X86 || NVCPU_IS_X86_64 1528