1 /*
2  * SPDX-FileCopyrightText: Copyright (c) 2014-2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
3  * SPDX-License-Identifier: MIT
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a
6  * copy of this software and associated documentation files (the "Software"),
7  * to deal in the Software without restriction, including without limitation
8  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9  * and/or sell copies of the Software, and to permit persons to whom the
10  * Software is furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21  * DEALINGS IN THE SOFTWARE.
22  */
23 
24 /******************* CPU Information Gather Routines ***********************\
25 *                                                                           *
26 *   One time initialization code to setup the Processor type                *
27 *                                                                           *
28 \***************************************************************************/
29 
30 #include "cpuopsys.h"
31 
32 #include "Nvcm.h"
33 #include "os/os.h"
34 #include "core/system.h"
35 
36 #include "ctrl/ctrl0000/ctrl0000system.h"
37 
38 
39 #if NVCPU_IS_AARCH64
40 
41 #include "cpu_arm_def.h"
42 
43 #if defined(__GNUC__)
44 
45 #define CP_READ_REGISTER(reg)                                   \
46     ({                                                          \
47         NvU32 __res;                                            \
48                                                                 \
49         asm("mrs %0, " reg "\r\t"                               \
50             : "=r" (__res)                                      \
51            );                                                   \
52                                                                 \
53         __res;                                                  \
54     })
55 
56 #define CP_WRITE_REGISTER(reg, val)                             \
57     ({                                                          \
58         asm("msr " reg ", %0\r\t"                               \
59             :                                                   \
60             : "r" (val)                                         \
61            );                                                   \
62     })
63 
64 #endif //end defined(__GNUC__)
65 
66 static void DecodeAarch64Cache(OBJSYS *pSys)
67 {
68     NvU32 val, field, numsets, assoc, linesize;
69 
70     // Select level 1 data cache
71     CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE);
72 
73     // Retrieve data cache information
74     val = CP_READ_CCSIDR_REGISTER();
75 
76     field    = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val);
77     numsets  = field + 1;
78     field    = GET_BITMASK(CCSIDR_CACHE_ASSOCIATIVITY, val);
79     assoc    = field + 1;
80     field    = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val);
81     linesize = 1 << (field + 4);
82 
83     pSys->cpuInfo.dataCacheLineSize = linesize;
84     pSys->cpuInfo.l1DataCacheSize   = (numsets * assoc * linesize) >> 10;
85 
86     // Select level 2 data cache
87     CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE_LEVEL2);
88 
89     // Retrieve data cache information
90     val = CP_READ_CCSIDR_REGISTER();
91 
92     field    = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val);
93     numsets  = field + 1;
94     field    = GET_BITMASK(CCSIDR_CACHE_ASSOCIATIVITY, val);
95     assoc    = field + 1;
96     field    = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val);
97     linesize = 1 << (field + 4);
98 
99     pSys->cpuInfo.l2DataCacheSize = (numsets * assoc * linesize) >> 10;
100 }
101 
102 /*
103  * ID the CPU.
104  */
105 void RmInitCpuInfo(void)
106 {
107 #define AARCH64_VENDOR_PART_NUMBER(v, p) \
108         (((v)<<16)|(p))
109 #define AARCH64_VENDOR_PART(v, p) \
110         AARCH64_VENDOR_PART_NUMBER(CP_MIDR_IMPLEMENTER_##v, CP_MIDR_PRIMARY_PART_NUM_##p)
111 
112     OBJSYS *pSys = SYS_GET_INSTANCE();
113 
114     if (pSys->cpuInfo.bInitialized)
115     {
116         return;
117     }
118 
119     // Init structure to default
120     portMemSet(&pSys->cpuInfo, 0, sizeof(pSys->cpuInfo));
121 
122     // ARM has the equivalent of a fence instruction (DSB)
123 
124     // Leave this here for MODS
125     pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN;
126     pSys->cpuInfo.caps = (NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE |
127                           NV0000_CTRL_SYSTEM_CPU_CAP_WRITE_COMBINING);
128 
129     // Calculate the frequency
130     pSys->cpuInfo.clock = osGetCpuFrequency();
131 
132     // Number of core is available from SCU configuration.
133     pSys->cpuInfo.numPhysicalCpus = osGetCpuCount();
134 
135     // There is no hyper-threading on ARM
136     pSys->cpuInfo.numLogicalCpus = pSys->cpuInfo.numPhysicalCpus;
137     pSys->cpuInfo.maxLogicalCpus = pSys->cpuInfo.numPhysicalCpus;
138 
139     // Zero out the vendor-specific family, model & stepping
140     pSys->cpuInfo.family = 0;
141     pSys->cpuInfo.model  = 0;
142     pSys->cpuInfo.stepping = 0;
143 
144     NvU32 val;
145     NvU32 impl;
146     NvU32 part;
147 
148     // Retrieve Main ID register
149     val = CP_READ_MIDR_REGISTER();
150 
151     impl = GET_BITMASK(MIDR_IMPLEMENTER, val);
152     part = GET_BITMASK(MIDR_PRIMARY_PART_NUM, val);
153 
154     switch(AARCH64_VENDOR_PART_NUMBER(impl, part))
155     {
156         case AARCH64_VENDOR_PART(NVIDIA, DENVER_1):
157             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_NV_DENVER_1_0;
158             break;
159         case AARCH64_VENDOR_PART(NVIDIA, DENVER_2):
160             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_NV_DENVER_2_0;
161             break;
162 
163         case AARCH64_VENDOR_PART(NVIDIA, CARMEL):
164             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARMV8A_GENERIC;
165             break;
166         /*
167          * Applied Micro is now Ampere computing, and the Ampere eMag
168          * vendor/part ids are the same as AMCC XGENE
169          */
170         case AARCH64_VENDOR_PART(AMCC, XGENE):
171         case AARCH64_VENDOR_PART(ARM, CORTEX_A76):
172         case AARCH64_VENDOR_PART(MARVELL, THUNDER_X2):
173         case AARCH64_VENDOR_PART(HUAWEI, KUNPENG_920):
174         case AARCH64_VENDOR_PART(ARM, BLUEFIELD):
175         // The Neoverse N1 is the same as Gravitron
176         case AARCH64_VENDOR_PART(ARM, GRAVITRON2):
177         case AARCH64_VENDOR_PART(FUJITSU, A64FX):
178         case AARCH64_VENDOR_PART(PHYTIUM, FT2000):
179         case AARCH64_VENDOR_PART(PHYTIUM, S2500):
180         case AARCH64_VENDOR_PART(AMPERE, ALTRA):
181         case AARCH64_VENDOR_PART(MARVELL, OCTEON_CN96XX):
182         case AARCH64_VENDOR_PART(MARVELL, OCTEON_CN98XX):
183         case AARCH64_VENDOR_PART(ARM, CORTEX_A57):
184             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARMV8A_GENERIC;
185             break;
186         default:
187             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARMV8A_GENERIC;
188             portDbgPrintf(
189                 "NVRM: CPUID: unknown implementer/part 0x%x/0x%x.\n", impl, part);
190             portDbgPrintf(
191                 "The NVIDIA GPU driver for AArch64 has not been qualified on this CPU\n"
192                 "and therefore it is not recommended or intended for use in any production\n"
193                 "environment.\n");
194             break;
195     }
196     DecodeAarch64Cache(pSys);
197 
198     // Host native page size
199 #ifdef PAGE_SIZE
200     pSys->cpuInfo.hostPageSize = PAGE_SIZE;
201 #else
202     pSys->cpuInfo.hostPageSize = 4096;
203 #endif
204 
205     pSys->cpuInfo.bInitialized = NV_TRUE;
206 #undef AARCH64_VENDOR_PART
207 #undef AARCH64_VENDOR_PART_NUMBER
208 }
209 
210 #endif // NVCPU_IS_AARCH64
211 
212 
213 /***************************************************************************/
214 
215 
216 #if NVCPU_IS_ARM
217 
218 #include "cpu_arm_def.h"
219 
220 #if defined(__GNUC__)
221 
222     #define CP_READ_REGISTER(reg)                                   \
223         ({                                                          \
224             NvU32 __res;                                            \
225                                                                     \
226             asm("mrc p15, " reg ", %0, c0, c0, 0\r\t"               \
227                 : "=r" (__res)                                      \
228                 :                                                   \
229                 : "cc");                                            \
230                                                                     \
231             __res;                                                  \
232         })
233 
234     #define CP_WRITE_REGISTER(reg, val)                             \
235         ({                                                          \
236             asm("mcr p15, " reg ", %0, c0, c0, 0\r\t"               \
237                 :                                                   \
238                 : "r"(val));                                        \
239         })
240 
241 #endif //end defined(__GNUC__)
242 
243 /*
244  * Documentation:
245  *
246  * https://developer.arm.com/documentation/ddi0388/f/CIHHDACH
247  */
248 static void DecodeCortexA9Cache(OBJSYS *pSys)
249 {
250     NvU32 val, field;
251 
252     // Select data cache
253     CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE);
254 
255     // Retrieve data cache information
256     val = CP_READ_CCSIDR_REGISTER();
257 
258     // L1 Data Cache Size (from KB to KB)
259     field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val);
260 
261     if (field == CP_CCSIDR_CACHE_NUM_SETS_16KB)
262     {
263         pSys->cpuInfo.l1DataCacheSize = 16;
264     }
265     else if (field == CP_CCSIDR_CACHE_NUM_SETS_32KB)
266     {
267         pSys->cpuInfo.l1DataCacheSize = 32;
268     }
269     else if (field == CP_CCSIDR_CACHE_NUM_SETS_64KB)
270     {
271         pSys->cpuInfo.l1DataCacheSize = 64;
272     }
273     else
274     {
275         NV_PRINTF(LEVEL_ERROR, "CPUID: Couldn't find L1DataCacheSize.\n");
276     }
277 
278     // There is only one level of cache in the Cortex-A9 processor
279     pSys->cpuInfo.l2DataCacheSize = 0;
280 
281     // Data Cache Line (from W to B)
282     field = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val);
283 
284     if (field & CP_CCSIDR_CACHE_LINE_SIZE_8W)
285     {
286         pSys->cpuInfo.dataCacheLineSize = 8 * 4;
287     }
288     else
289     {
290         NV_PRINTF(LEVEL_ERROR, "CPUID: Couldn't find DataCacheLineSize.\n");
291     }
292 }
293 
294 static NvU32 DecodeCortexA15CacheSize(NvU32 field)
295 {
296     switch(field)
297     {
298         case CP_CCSIDR_CACHE_NUM_SETS_A15_32KB:
299             return 32;
300         case CP_CCSIDR_CACHE_NUM_SETS_A15_512KB:
301             return 512;
302         case CP_CCSIDR_CACHE_NUM_SETS_A15_1024KB:
303             return 1024;
304         case CP_CCSIDR_CACHE_NUM_SETS_A15_2048KB:
305             return 2048;
306         case CP_CCSIDR_CACHE_NUM_SETS_A15_4096KB:
307             return 4096;
308         default:
309             NV_PRINTF(LEVEL_ERROR, "CPUID: Couldn't find DataCacheSize.\n");
310             return 0;
311     }
312 }
313 
314 static void DecodeCortexA15Cache(OBJSYS *pSys)
315 {
316     NvU32 val, field;
317 
318     // Select level 1 data cache
319     CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE);
320 
321     // Retrieve data cache information
322     val = CP_READ_CCSIDR_REGISTER();
323 
324     // L1 Data Cache Size (from KB to KB)
325     field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val);
326 
327     pSys->cpuInfo.l1DataCacheSize = DecodeCortexA15CacheSize(field);
328 
329     // Data Cache Line (from W to B)
330     field = GET_BITMASK(CCSIDR_CACHE_LINE_SIZE, val);
331 
332     // line size = 2 ** (field + 2) words
333     pSys->cpuInfo.dataCacheLineSize = 4 * (1 << (field + 2));
334 
335     // Select level 2 data cache
336     CP_WRITE_CSSELR_REGISTER(CP_CSSELR_DATA_CACHE_LEVEL2);
337 
338     // Retrieve data cache information
339     val = CP_READ_CCSIDR_REGISTER();
340 
341     // L2 Data Cache Size (from KB to KB)
342     field = GET_BITMASK(CCSIDR_CACHE_NUM_SETS, val);
343 
344     pSys->cpuInfo.l2DataCacheSize = DecodeCortexA15CacheSize(field);
345 }
346 
347 /*
348  * ID the CPU.
349  */
350 void RmInitCpuInfo(void)
351 {
352     OBJSYS *pSys = SYS_GET_INSTANCE();
353 
354     if (pSys->cpuInfo.bInitialized)
355     {
356         return;
357     }
358 
359     // Init structure to default
360     portMemSet(&pSys->cpuInfo, 0, sizeof(pSys->cpuInfo));
361 
362     // ARM has the equivalent of a fence instruction (DSB)
363 
364     // Leave this here for MODS
365     pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN;
366     pSys->cpuInfo.caps = (NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE |
367                           NV0000_CTRL_SYSTEM_CPU_CAP_WRITE_COMBINING);
368 
369     // Calculate the frequency
370     pSys->cpuInfo.clock = osGetCpuFrequency();
371 
372     // Number of core is available from SCU configuration.
373     pSys->cpuInfo.numPhysicalCpus = osGetCpuCount();
374     pSys->cpuInfo.maxLogicalCpus = pSys->cpuInfo.numPhysicalCpus;
375 
376     // There is no hyper-threading on ARM
377     pSys->cpuInfo.numLogicalCpus = pSys->cpuInfo.numPhysicalCpus;
378 
379     // Zero out the vendor-specific family, model & stepping
380     pSys->cpuInfo.family = 0;
381     pSys->cpuInfo.model  = 0;
382     pSys->cpuInfo.stepping = 0;
383 
384     NvU32 val;
385     NvU32 field;
386 
387     // Retrieve Main ID register
388     val = CP_READ_MIDR_REGISTER();
389 
390     field = GET_BITMASK(MIDR_PRIMARY_PART_NUM, val);
391 
392     switch(field)
393     {
394         case CP_MIDR_PRIMARY_PART_NUM_A9:
395             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARM_A9;
396             DecodeCortexA9Cache(pSys);
397             break;
398         case CP_MIDR_PRIMARY_PART_NUM_A15:
399             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARM_A15;
400             DecodeCortexA15Cache(pSys);
401             break;
402         default:
403             // Narrow down to an unknown arm cpu
404             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ARM_UNKNOWN;
405             NV_PRINTF(LEVEL_ERROR, "CPUID: unknown part number 0x%x.\n",
406                       field);
407             break;
408     }
409 
410     // Host native page size
411 #ifdef PAGE_SIZE
412     pSys->cpuInfo.hostPageSize = PAGE_SIZE;
413 #else
414     pSys->cpuInfo.hostPageSize = 4096;
415 #endif
416 
417     pSys->cpuInfo.bInitialized = NV_TRUE;
418 }
419 
420 #endif // NVCPU_IS_ARM
421 
422 
423 /***************************************************************************/
424 
425 
426 #if NVCPU_IS_PPC64LE
427 
428 /*
429  * ID the CPU.
430  */
431 void RmInitCpuInfo(void)
432 {
433     OBJSYS    *pSys = SYS_GET_INSTANCE();
434 
435     pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_POWERN;
436     pSys->cpuInfo.caps = 0;
437 
438     // Zero out the vendor-specific family, model & stepping
439     pSys->cpuInfo.family = 0;
440     pSys->cpuInfo.model  = 0;
441     pSys->cpuInfo.stepping = 0;
442 
443     // Calculate the frequency
444     pSys->cpuInfo.clock = osGetCpuFrequency();
445 
446     // Number of CPUs.
447     // Should maybe take into account SMT, etc.
448     pSys->cpuInfo.numPhysicalCpus = osGetCpuCount();
449     pSys->cpuInfo.numLogicalCpus = pSys->cpuInfo.numPhysicalCpus;
450     pSys->cpuInfo.maxLogicalCpus = pSys->cpuInfo.numPhysicalCpus;
451 
452     // host native page size
453     pSys->cpuInfo.hostPageSize = 64 * 1024;
454 
455     return;
456 }
457 
458 #endif // NVCPU_IS_PPC64LE
459 
460 
461 /***************************************************************************/
462 
463 
464 #if NVCPU_IS_RISCV64
465 
466 /*
467  * ID the CPU. (stub)
468  */
469 void RmInitCpuInfo(
470     void
471 )
472 {
473     OBJSYS    *pSys = SYS_GET_INSTANCE();
474 
475     // XXX
476     pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN;
477 
478     // Zero out the vendor-specific family, model & stepping
479     pSys->cpuInfo.family = 0;
480     pSys->cpuInfo.model  = 0;
481     pSys->cpuInfo.stepping = 0;
482 
483     // Calculate the frequency
484     pSys->cpuInfo.clock = 1;
485 
486     // host native page size
487     pSys->cpuInfo.hostPageSize = 4096;
488 }
489 
490 #endif // NVCPU_IS_RISCV64
491 
492 
493 /***************************************************************************/
494 
495 
496 #if NVCPU_IS_X86 || NVCPU_IS_X86_64
497 
498 #include "platform/cpu.h"
499 
500 #if defined(_M_IX86) || defined(NVCPU_X86) || defined(AMD64) || defined(NVCPU_X86_64)
501 
502 // bits returned in EDX register by CPUID instruction with EAX=1
503 #define CPU_STD_TSC                 NVBIT(4)
504 #define CPU_STD_CMOV                NVBIT(15)   // Supports conditional move instructions.
505 #define CPU_STD_CLFSH               NVBIT(19)   // Supports CLFLUSH instruction.
506 #define CPU_STD_MMX                 NVBIT(23)
507 #define CPU_STD_FXSR                NVBIT(24)   // Indicates CR4.OSFXSR is available.
508 #define CPU_STD_SSE                 NVBIT(25)   // Katmai
509 #define CPU_STD_SSE2                NVBIT(26)   // Willamette NI
510 
511 // bits returned in ECX register by CPUID instruction with EAX=1
512 #define CPU_STD2_SSE3               NVBIT(0)
513 #define CPU_STD2_SSE41              NVBIT(19)
514 #define CPU_STD2_SSE42              NVBIT(20)
515 #define CPU_STD2_OSXSAVE            NVBIT(27)   // Indicates the OS supports XSAVE/XRESTOR
516 #define CPU_STD2_AVX                NVBIT(28)
517 
518 // "Extended Feature Flags" - bits returned in EDX register by CPUID
519 // instruction with EAX=0x80000001
520 #define CPU_EXT_3DNOW               NVBIT(31)   // 3DNow
521 #define CPU_EXT_AMD_3DNOW_EXT       NVBIT(30)   // 3DNow, with Extensions (AMD specific)
522 #define CPU_EXT_AMD_MMX_EXT         NVBIT(22)   // MMX, with Extensions (AMD specific)
523 
524 // "Structured Extended Feature Identifiers" - bits returned in EBX
525 // register by CPUID instruction with EAX=7
526 #define CPU_EXT2_ERMS               NVBIT(9)
527 
528 /*
529  * Identify chip foundry.
530  *      IS_INTEL   = "GenuineIntel"
531  *      IS_AMD     = "AuthenticAMD"
532  *      IS_WINCHIP = "CentaurHauls"
533  *      IS_CYRIX   = "CyrixInstead"
534  *      IS_TRANSM  = "GenuineTMx86"  // Transmeta
535  */
536 #define IS_INTEL(fndry)     (((fndry).StrID[0]==0x756E6547)&&((fndry).StrID[1]==0x49656E69)&&((fndry).StrID[2]==0x6C65746E))
537 #define IS_AMD(fndry)       (((fndry).StrID[0]==0x68747541)&&((fndry).StrID[1]==0x69746E65)&&((fndry).StrID[2]==0x444D4163))
538 #define IS_WINCHIP(fndry)   (((fndry).StrID[0]==0x746E6543)&&((fndry).StrID[1]==0x48727561)&&((fndry).StrID[2]==0x736C7561))
539 #define IS_CYRIX(fndry)     (((fndry).StrID[0]==0x69727943)&&((fndry).StrID[1]==0x736E4978)&&((fndry).StrID[2]==0x64616574))
540 #define IS_TRANSM(fndry)    (((fndry).StrID[0]==0x756E6547)&&((fndry).StrID[1]==0x54656E69)&&((fndry).StrID[2]==0x3638784D))
541 
542 // CPUID Info
543 // Used internally in this source.
544 
545 typedef struct _def_CPUID_info
546 {
547     union
548     {
549         NvU8 String[12];
550         NvU32 StrID[3];
551     } Foundry;
552 
553     NvU32 StandardFeatures;
554     NvU32 ExtendedFeatures;
555 
556     NvU16 Family;
557     NvU16 ExtFamily;
558     NvU16 DisplayedFamily;
559     NvU8 Model;
560     NvU8 ExtModel;
561     NvU8 DisplayedModel;
562     NvU8 Stepping;
563     NvU32 BrandId;
564 } CPUIDINFO, *PCPUIDINFO;
565 
566 // Forward refernces.
567 //
568 
569 static void getCpuCounts(OBJSYS *pSys, PCPUIDINFO pCpuidInfo);
570 static NvBool getEmbeddedProcessorName(char *pName, NvU32 size);
571 static void cpuidInfoAMD(OBJSYS *pSys, PCPUIDINFO pCpuidInfo);
572 static void cpuidInfoIntel(OBJSYS *pSys, PCPUIDINFO pCpuidInfo);
573 
574 #if defined(_M_IX86) || defined(NVCPU_X86)
575 static void cpuidInfoWinChip(OBJSYS *pSys, PCPUIDINFO pCpuidInfo);
576 static void cpuidInfoCyrix(OBJSYS *pSys, PCPUIDINFO pCpuidInfo);
577 static void cpuidInfoTransmeta(OBJSYS *pSys, PCPUIDINFO pCpuidInfo);
578 #endif
579 
580 
581 /*
582  * ID the CPU.
583  */
584 
585 void RmInitCpuInfo(void)
586 {
587     OBJSYS    *pSys = SYS_GET_INSTANCE();
588     CPUIDINFO  cpuinfo;
589     NvU32      eax, ebx, ecx, edx;
590     OBJOS     *pOS = SYS_GET_OS(pSys);
591 
592     // Do this only once.
593     if (pSys->cpuInfo.bInitialized)
594         return;
595 
596     // Initialize the processor structure to default values.
597     //
598     pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_UNKNOWN;
599     pSys->cpuInfo.caps = 0;
600     pSys->cpuInfo.clock = 0;
601     pSys->cpuInfo.dataCacheLineSize = 0;
602     pSys->cpuInfo.l1DataCacheSize = 0;
603     pSys->cpuInfo.l2DataCacheSize = 0;
604     pSys->cpuInfo.coresOnDie = 0;
605     pSys->cpuInfo.platformID = 0;
606     portMemSet(pSys->cpuInfo.name, 0, sizeof(pSys->cpuInfo.name));
607 
608     // Init internal structure to default.
609     //
610     portMemSet(&cpuinfo, 0, sizeof(cpuinfo));
611 
612     // Get CPUID stuff for all processors.  We will figure out what to do with it later.
613 
614     // if pOS->osNv_cpuid returns 0, then this cpu does not support cpuid instruction
615     // We just worry about this on the first call...
616     if ( ! pOS->osNv_cpuid(pOS, 0, 0, &eax, &cpuinfo.Foundry.StrID[0],
617             &cpuinfo.Foundry.StrID[2], &cpuinfo.Foundry.StrID[1]))
618         goto Exit;
619 
620     pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx);
621     cpuinfo.Family = (NvU16)((eax >> 8) & 0x0F);
622     cpuinfo.ExtFamily = (NvU16)((eax >> 20) & 0xFF);
623     if (cpuinfo.Family != 0xF)
624     {
625         cpuinfo.DisplayedFamily = cpuinfo.Family;
626     }
627     else
628     {
629         cpuinfo.DisplayedFamily = cpuinfo.ExtFamily + cpuinfo.Family;
630     }
631 
632     cpuinfo.Model = (NvU8)((eax >> 4) & 0x0F);
633     cpuinfo.ExtModel = (NvU8)((eax >> 16) & 0x0F);
634     if (cpuinfo.Family == 6 || cpuinfo.Family == 0xF)
635     {
636         cpuinfo.DisplayedModel = (cpuinfo.ExtModel << 4) + cpuinfo.Model;
637     }
638     else
639     {
640         cpuinfo.DisplayedModel = cpuinfo.Model;
641     }
642 
643     cpuinfo.Stepping = (NvU8)(eax & 0x0F);
644     cpuinfo.StandardFeatures = edx;
645     cpuinfo.BrandId = ((ebx & 0xE0) << 3) | (ebx & 0x1F); // 8bit brandID in 12 bit format
646 
647     // Decode the standard features.  Assume that all CPU vendors use the
648     // standard feature bits to mean the same thing.  Non-Intel vendors use
649     // the extended CPUID to provide non-standard freture bits, so this
650     // should be OK.
651 
652     if (cpuinfo.StandardFeatures & CPU_STD_MMX)
653         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_MMX;
654 
655     if (cpuinfo.StandardFeatures & CPU_STD_CMOV)
656         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_CMOV;
657 
658     if (cpuinfo.StandardFeatures & CPU_STD_CLFSH)
659         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_CLFLUSH;
660 
661     // Check for Streaming SIMD extensions (Katmai)
662     if (cpuinfo.StandardFeatures & CPU_STD_SSE)
663     {
664 
665         // SFENCE is an SSE instruction, but it does not require CR4.OSFXSR.
666         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE;
667 
668         if (cpuinfo.StandardFeatures & CPU_STD_FXSR)
669         {
670             NvBool check_osfxsr;
671             NvBool check_osxsave;
672             // Before setting the NV0000_CTRL_SYSTEM_CPU_CAP_SSE bit, we'll
673             // also check that CR4.OSFXSR (bit 9) is set, which means the OS
674             // is prepared to switch the additional SSE FP state for us.
675             // CPU_STD_FXSR indicates that CR4.OSFXSR is valid.
676             check_osfxsr = ((cpuinfo.StandardFeatures & CPU_STD_FXSR) != 0) &&
677                            ((pOS->osNv_rdcr4(pOS) & 0x200) != 0);
678 
679             // For NV0000_CTRL_SYSTEM_CPU_CAP_AVX bit, we need:
680             // - CPU_STD2_OSXSAVE - CR4.OSXSAVE is valid
681             // - CR4.OSXSAVE (bit 18) - The OS will the additional FP state
682             //     specified by XCR0
683             // - XCR0 - bits 1 and 2 indicate SSE and AVX support respectively
684             check_osxsave = ((ecx & CPU_STD2_OSXSAVE) != 0) &&
685                             ((pOS->osNv_rdcr4(pOS) & (1<<18)) != 0) &&
686                             ((pOS->osNv_rdxcr0(pOS) & 0x6) != 0);
687             if(check_osfxsr)
688             {
689                 pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE;
690 
691                 // supports SSE2 (Willamette NI) instructions
692                 if (cpuinfo.StandardFeatures & CPU_STD_SSE2)
693                     pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE2;
694 
695                 // Prescott New Instructions
696                 if (ecx & CPU_STD2_SSE3)
697                     pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE3;
698 
699                 // Penryn subset of SSE4
700                 if (ecx & CPU_STD2_SSE41)
701                     pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE41;
702 
703                 // Nehalem subset of SSE4
704                 if (ecx & CPU_STD2_SSE42)
705                     pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SSE42;
706             }
707 
708             // If the OS setup XSAVE / XRESTOR (and set the AVX bit)
709             //   enable AVX
710             if (check_osxsave)
711             {
712                 if (ecx & CPU_STD2_AVX)
713                     pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_AVX;
714             }
715         }
716     }
717 
718     if (pOS->osNv_cpuid(pOS, 7, 0, &eax, &ebx, &ecx, &edx))
719     {
720         if (ebx & CPU_EXT2_ERMS)
721         {
722             pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_ERMS;
723         }
724     }
725 
726     // Calculate the frequency
727     if (cpuinfo.StandardFeatures & CPU_STD_TSC)
728         pSys->cpuInfo.clock = osGetCpuFrequency();
729 
730     // Get the extended features (if they exist).
731     if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx) && eax >= 0x80000001)
732     {
733         if (pOS->osNv_cpuid(pOS, 0x80000001, 0, &eax, &ebx, &ecx, &edx))
734         {
735             cpuinfo.ExtendedFeatures = edx;
736             // if 8 bit brandId is 0
737             if (!cpuinfo.BrandId)
738             {
739                 // Check for 12 bit brand ID
740                 cpuinfo.BrandId = (ebx & 0xfff);
741             }
742         }
743    }
744 
745     // Get the embedded processor name (if there is one).
746     getEmbeddedProcessorName(pSys->cpuInfo.name, sizeof(pSys->cpuInfo.name));
747 
748     if (IS_INTEL(cpuinfo.Foundry))
749         cpuidInfoIntel(pSys, &cpuinfo);
750     else if (IS_AMD(cpuinfo.Foundry))
751         cpuidInfoAMD(pSys, &cpuinfo);
752 #if defined(_M_IX86) || defined(NVCPU_X86)
753     else if (IS_WINCHIP(cpuinfo.Foundry))
754         cpuidInfoWinChip(pSys, &cpuinfo);
755     else if (IS_CYRIX(cpuinfo.Foundry))
756         cpuidInfoCyrix(pSys, &cpuinfo);
757     else if (IS_TRANSM(cpuinfo.Foundry))
758         cpuidInfoTransmeta(pSys, &cpuinfo);
759 #endif
760     else
761     {
762         // We are clueless.  If the processor had an embedded name, its already in there.
763         // If not, use the foundary name as the processor name.
764         if (pSys->cpuInfo.name[0] == 0)
765             portMemCopy(pSys->cpuInfo.name, sizeof(cpuinfo.Foundry.String), cpuinfo.Foundry.String, sizeof(cpuinfo.Foundry.String));
766     }
767 
768     // Pick up the vendor-specific family & model
769     pSys->cpuInfo.family = cpuinfo.DisplayedFamily;
770     pSys->cpuInfo.model  = cpuinfo.DisplayedModel;
771 
772 #if defined(AMD64) || defined(NVCPU_X86_64)
773     // The WinXP AMD-64 does not context switch the x87/MMX/3DNow registers.  We have to zap the bits
774     // even though the CPU supports them.
775     // The OS should somehow tell us this, like CR4.OSFXSR above.  Need to find a better way...
776 
777     pSys->cpuInfo.caps &= ~(NV0000_CTRL_SYSTEM_CPU_CAP_MMX |
778                             NV0000_CTRL_SYSTEM_CPU_CAP_MMX_EXT |
779                             NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW |
780                             NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW_EXT);
781 #endif
782 
783     pSys->cpuInfo.stepping = cpuinfo.Stepping;
784     pSys->cpuInfo.brandId = cpuinfo.BrandId;
785 
786  Exit:
787 
788     // set physical/logical processor counts
789     getCpuCounts(pSys, &cpuinfo);
790 
791     // host page size used when allocated host-page-aligned objects in heap
792 #ifdef PAGE_SIZE
793     pSys->cpuInfo.hostPageSize = PAGE_SIZE;
794 #else
795     pSys->cpuInfo.hostPageSize = 4096;
796 #endif
797 
798     pSys->cpuInfo.bInitialized = NV_TRUE;
799 }
800 
801 //
802 // This routine determines the number of physical processors enabled
803 // on the system as well as the number of logical processors per
804 // physical processors.  Intel's HyperThreading technology can yield
805 // a logical processor count of > 1 per physical processor.
806 //
807 // This code was more or less lifted from some Intel sample code.
808 //
809 
810 #define INTEL_HT_BIT             0x10000000      // EDX[28]
811 #define INTEL_CORE_CNT           0xFC000000      // EAX[31:26]
812 #define INTEL_LOGICAL_CNT        0x00FF0000      // EBX[23:16]
813 #define INTEL_LOGICAL_CNT_LEAFB  0x0000FFFF      // EBX[15:0]
814 #define AMD_HT_BIT               0x10000000      // EDX[28]
815 #define AMD_LOGICAL_CNT          0x00FF0000      // EBX[23:16]
816 
817 static void
818 getCpuCounts(OBJSYS *pSys, PCPUIDINFO pCpuidInfo)
819 {
820     OBJOS *pOS = SYS_GET_OS(pSys);
821     NvU32  numPhysicalCpus, numLogicalCpus, maxLogicalCpus;
822     NvU32  eax = 0;
823     NvU32  ebx = 0;
824     NvU32  ecx = 0;
825     NvU32  edx = 0;
826 
827     //
828     // First use OS call to get number of logical CPUs.
829     //
830     numLogicalCpus = osGetCpuCount();
831 
832     //
833     // Assume the number of physical CPUs is the same as the number of logical CPUs.
834     //
835     numPhysicalCpus = numLogicalCpus;
836     maxLogicalCpus = numLogicalCpus;
837 
838     // There is no reliable way to tell if hyper-threading is enabled.  So, if
839     // there is more than 1 logical CPUs AND the CPU is hyperthreading capable,
840     // then assume that HT is enabled.
841     //
842     // This should give the right answer for most cases.  Some HT capable dual
843     // CPU systems with HT disabled will be detected as single GPU systems with
844     // HT enabled.  While less than ideal, this should be OK, since logical CPUs
845     // is 2 in both cases.
846     //
847 #if defined(_M_IX86) || defined(NVCPU_X86) || defined(NVCPU_X86_64)
848     if (IS_INTEL(pCpuidInfo->Foundry))
849     {
850         NvBool cpuHasLeafB = NV_FALSE;
851 
852         pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx);
853         if (eax >= 0xB)
854         {
855             pOS->osNv_cpuid(pOS, 0xB, 0, &eax, &ebx, &ecx, &edx);
856             if (ebx != 0)
857             {
858                 cpuHasLeafB = NV_TRUE;
859             }
860         }
861 
862         pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx);
863 
864         if (edx & INTEL_HT_BIT)
865         {
866             NvU32 CpuHT;
867 
868             if (cpuHasLeafB)
869             {
870                 pOS->osNv_cpuid(pOS, 0xB, 0, &eax, &ebx, &ecx, &edx);
871                 CpuHT = (ebx & INTEL_LOGICAL_CNT_LEAFB);
872                 pOS->osNv_cpuid(pOS, 0xB, 1, &eax, &ebx, &ecx, &edx);
873                 maxLogicalCpus = (ebx & INTEL_LOGICAL_CNT_LEAFB);
874                 numPhysicalCpus = maxLogicalCpus/CpuHT;
875             }
876             else
877             {
878                 pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx);
879                 if (eax >=4)
880                 {
881                     pOS->osNv_cpuid(pOS, 4, 0, &eax, &ebx, &ecx, &edx);
882                     numPhysicalCpus = ((eax & INTEL_CORE_CNT) >> 26) + 1;
883                     pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx);
884                     maxLogicalCpus = (ebx & INTEL_LOGICAL_CNT) >> 16;
885                     CpuHT = maxLogicalCpus/numPhysicalCpus;
886                 }
887             }
888 
889             if (numPhysicalCpus > numLogicalCpus)
890                 numPhysicalCpus = numLogicalCpus;
891 
892             if (numPhysicalCpus < 1)
893                 numPhysicalCpus = 1;
894 
895             pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_HT_CAPABLE;
896         }
897     }
898     else if(IS_AMD(pCpuidInfo->Foundry))
899     {
900         pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx);
901         if( edx & AMD_HT_BIT )
902         {
903             maxLogicalCpus = (ebx & AMD_LOGICAL_CNT) >> 16;
904         }
905     }
906 
907     NV_PRINTF(LEVEL_INFO, "RmInitCpuCounts: physical 0x%x logical 0x%x\n",
908               numPhysicalCpus, numLogicalCpus);
909 #endif
910 
911     if(maxLogicalCpus < numLogicalCpus)
912         maxLogicalCpus = numLogicalCpus;
913 
914 #if NVCPU_IS_FAMILY_X86
915     // bug1974464: Ryzen physical CPU count is getting misreported
916     if (IS_AMD(pCpuidInfo->Foundry) && (pCpuidInfo->DisplayedFamily == 0x17))
917     {
918         numPhysicalCpus = NV_MAX(maxLogicalCpus/2, 1);
919     }
920 #endif
921 
922     pSys->cpuInfo.numPhysicalCpus = numPhysicalCpus;
923     pSys->cpuInfo.numLogicalCpus = numLogicalCpus;
924     pSys->cpuInfo.maxLogicalCpus = maxLogicalCpus;
925 
926     return;
927 }
928 
929 
930 // getEmbeddedProcessorName
931 //
932 // All processors that have extended CPUID info up through 0x80000004 have an embedded name.
933 //
934 static NvBool getEmbeddedProcessorName(char *pName, NvU32 size)
935 {
936     NvU32       op, eax, ebx, ecx, edx;
937     char       *p       = pName;
938     OBJSYS     *pSys    = SYS_GET_INSTANCE();
939     OBJOS      *pOS     = SYS_GET_OS(pSys);
940     const NvU32 maxSize = 48; // max 48 bytes on x86 CPUs
941 
942     NV_ASSERT_OR_RETURN(size >= maxSize, 0);
943 
944     pName[size > maxSize ? maxSize : size-1] = 0;  // Make sure it has a zero at the end.
945 
946     // Is there is a enough data?  If not bail.
947     if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx) == 0 || eax < 0x80000004)
948         return NV_FALSE;
949 
950     // Yes, get 48 bytes of CPU name.
951     for (op = 0x80000002; op < 0x80000005; op++, p += 16)
952         pOS->osNv_cpuid(pOS, op, 0, (NvU32 *)&p[0], (NvU32 *)&p[4], (NvU32 *)&p[8], (NvU32 *)&p[12]);
953 
954     // Kill leading spaces. (Intel's string is right justified.)
955     if (*pName == ' ')
956     {
957         p = pName;
958         while (*p == ' ')
959             p++;
960         do
961             *(pName++) = *(p++);
962         while (*p);
963     }
964 
965     return NV_TRUE;
966 }
967 
968 
969 // Decode Prescott style cache descriptors.
970 //
971 static NvBool DecodePrescottCache(OBJSYS *pSys)
972 {
973     NvU32   eax, ebx, ecx, edx;
974     OBJOS  *pOS = SYS_GET_OS(pSys);
975 
976     // Decode the cache desciptors.
977 
978     if (pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx))
979     {
980         if (eax >= 4 && eax < 0x80000000)     // CPU support new (Prescott) cache descrtiptors?
981         {
982             // From Prescot New Instructions Software Developers Guide 252490-003
983 
984             NvU32 uLevel;
985             NvU32 uLineSize;
986             NvU32 uCacheSize;
987             int i;
988 
989             // Loop over the cache descriptors by incrementing sub-function.  This will never get
990             // get run on pre-Prescott CPUs since they do not support CPUID 4, but limit number of
991             // cache descriptors to 20 just in case, so it does not get in an infinite loop.
992             //
993             for (i = 0; i < 20; i++)
994             {
995                 pOS->osNv_cpuid(pOS, 4, i, &eax, &ebx, &ecx, &edx);
996 
997                 if (i == 0)
998                 {
999                     pSys->cpuInfo.coresOnDie = (eax >> 26) + 1;// eax[31:26] Processor cores on the chip
1000                 }
1001 
1002                 switch (eax & 0x1f)      // Cache type.
1003                 {
1004                     case 0:              // No more cache descriptors.
1005                         i = 100;         // Break out of loop.
1006                         break;
1007 
1008                     case 1:              // Data cache.
1009                     case 3:              // Unified cache.
1010                         uLevel =     (eax >> 5) & 0x7;             // eax[7:5]    Cache level
1011                         uLineSize =  (ebx & 0xfff) + 1;            // ebx[11:0]   System Coherency Line Size
1012 
1013                         uCacheSize = uLineSize                     // ebx[11:0]   System Coherency Line Size
1014                                      * (((ebx >> 12) & 0x3FF) + 1) // ebx[21:12]  Physical line partitions
1015                                      * (((ebx >> 22) & 0x3FF) + 1) // ebx[21:12]  Ways of associativity
1016                                      * (ecx + 1)                   // ecx[31:0]   Number of sets
1017                                      / 1024;                       // Put it in KB.
1018 
1019                         pSys->cpuInfo.dataCacheLineSize = uLineSize;
1020 
1021                         if (uLevel == 1)
1022                             pSys->cpuInfo.l1DataCacheSize = uCacheSize;
1023                         else if (pSys->cpuInfo.l2DataCacheSize < uCacheSize)
1024                             pSys->cpuInfo.l2DataCacheSize = uCacheSize;
1025                         break;
1026 
1027                     default:             // Instruction of unknown cache type.
1028                         break;           // Do nothing.
1029                 }
1030             }
1031 
1032             return NV_TRUE;
1033         }
1034     }
1035 
1036     return NV_FALSE;
1037 }
1038 
1039 #if defined(_M_IX86) || defined(NVCPU_X86)
1040 static void DecodeIntelCacheEntry(OBJSYS *pSys, NvU8 cacheEntry)
1041 {
1042     // From Intel's AP-485 (11/03).
1043     //
1044     // 00h Null
1045     // 01h Instruction TLB: 4K-byte Pages, 4-way set associative, 32 entries
1046     // 02h Instruction TLB: 4M-byte Pages, fully associative, 2 entries
1047     // 03h Data TLB: 4K-byte Pages, 4-way set associative, 64 entries
1048     // 04h Data TLB: 4M-byte Pages, 4-way set associative, 8 entries
1049     // 06h 1st-level instruction cache: 8K-bytes, 4-way set associative, 32 byte line size
1050     // 08h 1st-level instruction cache: 16K-bytes, 4-way set associative, 32 byte line size
1051     // 0Ah 1st-level data cache: 8K-bytes, 2-way set associative, 32 byte line size
1052     // 0Ch 1st-level data cache: 16K-bytes, 4-way set associative, 32 byte line size
1053     // 22h 3rd-level cache: 512K-bytes, 4-way set associative, sectored cache, 64-byte line size
1054     // 23h 3rd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size
1055     // 25h 3rd-level cache: 2MB, 8-way set associative, sectored cache, 64-byte line size
1056     // 29h 3rd-level cache: 4MB, 8-way set associative, sectored cache, 64-byte line size
1057     // 2Ch 1st-level data cache: 32K-bytes, 8-way set associative, 64-byte line size
1058     // 30h 1st-level instruction cache: 32K-bytes, 8-way set associative, 64-byte line size
1059     // 39h 2nd-level cache: 128K-bytes, 4-way set associative, sectored cache, 64-byte line size
1060     // 3Bh 2nd-level cache: 128KB, 2-way set associative, sectored cache, 64-byte line size
1061     // 3Ch 2nd-level cache: 256K-bytes, 4-way set associative, sectored cache, 64-byte line size
1062     // 40h No 2nd-level cache or, if processor contains a valid 2nd-level cache, no3rd-level cache
1063     // 41h 2nd-level cache: 128K-bytes, 4-way set associative, 32 byte line size
1064     // 42h 2nd-level cache: 256K-bytes, 4-way set associative, 32 byte line size
1065     // 43h 2nd-level cache: 512K-bytes, 4-way set associative, 32 byte line size
1066     // 44h 2nd-level cache: 1M-bytes, 4-way set associative, 32 byte line size
1067     // 45h 2nd-level cache: 2M-bytes, 4-way set associative, 32 byte line size
1068     // 50h Instruction TLB: 4K, 2M or 4M pages, fully associative, 64 entries
1069     // 51h Instruction TLB: 4K, 2M or 4M pages, fully associative, 128 entries
1070     // 52h Instruction TLB: 4K, 2M or 4M pages, fully associative, 256 entries
1071     // 5Bh Data TLB: 4K or 4M pages, fully associative, 64 entries
1072     // 5Ch Data TLB: 4K or 4M pages, fully associative, 128 entries
1073     // 5Dh Data TLB: 4K or 4M pages, fully associative, 256 entries
1074     // 66h 1st-level data cache: 8K-bytes, 4-way set associative, sectored cache, 64-byte line size
1075     // 67h 1st-level data cache: 16K-bytes, 4-way set associative, sectored cache, 64-byte line size
1076     // 68h 1st-level data cache: 32K-bytes, 4 way set associative, sectored cache, 64-byte line size
1077     // 70h Trace cache: 12K-uops, 8-way set associative
1078     // 71h Trace cache: 16K-uops, 8-way set associative
1079     // 72h Trace cache: 32K-uops, 8-way set associative
1080     // 79h 2nd-level cache: 128K-bytes, 8-way set associative, sectored cache, 64-byte line size
1081     // 7Ah 2nd-level cache: 256K-bytes, 8-way set associative, sectored cache, 64-byte line size
1082     // 7Bh 2nd-level cache: 512K-bytes, 8-way set associative, sectored cache, 64-byte line size
1083     // 7Ch 2nd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size
1084     // 82h 2nd-level cache: 256K-bytes, 8-way set associative, 32 byte line size
1085     // 83h 2nd-level cache: 512K-bytes, 8-way set associative, 32 byte line size
1086     // 84h 2nd-level cache: 1M-bytes, 8-way set associative, 32 byte line size
1087     // 85h 2nd-level cache: 2M-bytes, 8-way set associative, 32 byte line size
1088     // 86h 2nd-level cache: 512K-bytes, 4-way set associative, 64 byte line size
1089     // 87h 2nd-level cache: 1M-bytes, 8-way set associative, 64 byte line size
1090     // B0h Instruction TLB: 4K-byte Pages, 4-way set associative, 128 entries
1091     // B3h Data TLB: 4K-byte Pages, 4-way set associative, 128 entries
1092     //
1093     // From Intel via Michael Diamond (under NDA):
1094     // Fixes bug 75982 - Reporting incorrect cache info on Banias mobile platform.
1095     //
1096     // 7D 2M; 8 way; 64 byte line size; unified on-die
1097     // 78 1M; 8 way; 64 byte line size, unified on-die
1098     //
1099     // Note: Newer GPUs have added an additional cache level.  What used to be L2 is
1100     // now L3.  Set the L2 cache to the largest L2 or L3 descriptor found.
1101 
1102     switch (cacheEntry)
1103     {
1104         case 0x0A: // 1st-level data cache: 8K-bytes, 2-way set associative, 32 byte line size
1105             pSys->cpuInfo.l1DataCacheSize = 8;
1106             pSys->cpuInfo.dataCacheLineSize = 32;
1107             break;
1108 
1109         case 0x0C: // 1st-level data cache: 16K-bytes, 4-way set associative, 32 byte line size
1110             pSys->cpuInfo.l1DataCacheSize = 16;
1111             pSys->cpuInfo.dataCacheLineSize = 32;
1112             break;
1113 
1114         case 0x66: // 1st-level data cache: 8K-bytes, 4-way set associative, sectored cache, 64-byte line size
1115             pSys->cpuInfo.l1DataCacheSize = 8;
1116             pSys->cpuInfo.dataCacheLineSize = 64;
1117             break;
1118 
1119         case 0x67: // 1st-level data cache: 16K-bytes, 4-way set associative, sectored cache, 64-byte line size
1120             pSys->cpuInfo.l1DataCacheSize = 16;
1121             pSys->cpuInfo.dataCacheLineSize = 64;
1122             break;
1123 
1124         case 0x2C: // 1st-level data cache: 32K-bytes, 8-way set associative, 64-byte line size
1125         case 0x68: // 1st-level data cache: 32K-bytes, 4 way set associative, sectored cache, 64-byte line size
1126             pSys->cpuInfo.l1DataCacheSize = 32;
1127             pSys->cpuInfo.dataCacheLineSize = 64;
1128             break;
1129 
1130         case 0x41: // 2nd-level cache: 128K-bytes, 4-way set associative, 32 byte line size
1131             pSys->cpuInfo.dataCacheLineSize = 32;
1132             if (pSys->cpuInfo.l2DataCacheSize < 128)
1133                 pSys->cpuInfo.l2DataCacheSize = 128;
1134             break;
1135 
1136         case 0x39: // 2nd-level cache: 128K-bytes, 4-way set associative, sectored cache, 64-byte line size
1137         case 0x3B: // 2nd-level cache: 128KB, 2-way set associative, sectored cache, 64-byte line size
1138         case 0x79: // 2nd-level cache: 128K-bytes, 8-way set associative, sectored cache, 64-byte line size
1139             pSys->cpuInfo.dataCacheLineSize = 64;
1140             if (pSys->cpuInfo.l2DataCacheSize < 128)
1141                 pSys->cpuInfo.l2DataCacheSize = 128;
1142             break;
1143 
1144         case 0x42: // 2nd-level cache: 256K-bytes, 4-way set associative, 32 byte line size
1145         case 0x82: // 2nd-level cache: 256K-bytes, 8-way set associative, 32 byte line size
1146             pSys->cpuInfo.dataCacheLineSize = 32;
1147             if (pSys->cpuInfo.l2DataCacheSize < 256)
1148                 pSys->cpuInfo.l2DataCacheSize = 256;
1149             break;
1150 
1151         case 0x3C: // 2nd-level cache: 256K-bytes, 4-way set associative, sectored cache, 64-byte line size
1152         case 0x7A: // 2nd-level cache: 256K-bytes, 8-way set associative, sectored cache, 64-byte line size
1153             pSys->cpuInfo.dataCacheLineSize = 64;
1154             if (pSys->cpuInfo.l2DataCacheSize < 256)
1155                 pSys->cpuInfo.l2DataCacheSize = 256;
1156             break;
1157 
1158         case 0x43: // 2nd-level cache: 512K-bytes, 4-way set associative, 32 byte line size
1159         case 0x83: // 2nd-level cache: 512K-bytes, 8-way set associative, 32 byte line size
1160             pSys->cpuInfo.dataCacheLineSize = 32;
1161             if (pSys->cpuInfo.l2DataCacheSize < 512)
1162                 pSys->cpuInfo.l2DataCacheSize = 512;
1163             break;
1164 
1165         case 0x22: // 3rd-level cache: 512K-bytes, 4-way set associative, sectored cache, 64-byte line size
1166         case 0x7B: // 2nd-level cache: 512K-bytes, 8-way set associative, sectored cache, 64-byte line size
1167         case 0x86: // 2nd-level cache: 512K-bytes, 4-way set associative, 64 byte line size
1168             pSys->cpuInfo.dataCacheLineSize = 64;
1169             if (pSys->cpuInfo.l2DataCacheSize < 512)
1170                 pSys->cpuInfo.l2DataCacheSize = 512;
1171             break;
1172 
1173         case 0x44: // 2nd-level cache: 1M-bytes, 4-way set associative, 32 byte line size
1174         case 0x84: // 2nd-level cache: 1M-bytes, 8-way set associative, 32 byte line size
1175             pSys->cpuInfo.dataCacheLineSize = 32;
1176             if (pSys->cpuInfo.l2DataCacheSize < 1024)
1177                 pSys->cpuInfo.l2DataCacheSize = 1024;
1178             break;
1179 
1180         case 0x23: // 3rd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size
1181         case 0x78: // 1M; 8 way; 64 byte line size, unified on-die
1182         case 0x7C: // 2nd-level cache: 1M-bytes, 8-way set associative, sectored cache, 64-byte line size
1183         case 0x87: // 2nd-level cache: 1M-bytes, 8-way set associative, 64 byte line size
1184             pSys->cpuInfo.dataCacheLineSize = 64;
1185             if (pSys->cpuInfo.l2DataCacheSize < 1024)
1186                 pSys->cpuInfo.l2DataCacheSize = 1024;
1187             break;
1188 
1189         case 0x45: // 2nd-level cache: 2M-bytes, 4-way set associative, 32 byte line size
1190         case 0x85: // 2nd-level cache: 2M-bytes, 8-way set associative, 32 byte line size
1191             pSys->cpuInfo.dataCacheLineSize = 32;
1192             if (pSys->cpuInfo.l2DataCacheSize < 2048)
1193                 pSys->cpuInfo.l2DataCacheSize = 2048;
1194             break;
1195 
1196         case 0x25: // 3rd-level cache: 2MB, 8-way set associative, sectored cache, 64-byte line size
1197         case 0x7D: // 2M; 8 way; 64 byte line size; unified on-die
1198             pSys->cpuInfo.dataCacheLineSize = 64;
1199             if (pSys->cpuInfo.l2DataCacheSize < 2048)
1200                 pSys->cpuInfo.l2DataCacheSize = 2048;
1201             break;
1202 
1203         case 0x29: // 3rd-level cache: 4MB, 8-way set associative, sectored cache, 64-byte line size
1204             pSys->cpuInfo.dataCacheLineSize = 64;
1205             if (pSys->cpuInfo.l2DataCacheSize < 4096)
1206                 pSys->cpuInfo.l2DataCacheSize = 4096;
1207             break;
1208     }
1209 }
1210 
1211 static void DecodeIntelCacheRegister(OBJSYS *pSys, NvU32 cacheRegister /* punny, huh? */)
1212 {
1213     if ((cacheRegister & NVBIT(31)) == 0)  // If bit 31 is set, it is reserved.
1214     {
1215         DecodeIntelCacheEntry(pSys, (NvU8)(cacheRegister >> 24));
1216         DecodeIntelCacheEntry(pSys, (NvU8)(cacheRegister >> 16));
1217         DecodeIntelCacheEntry(pSys, (NvU8)(cacheRegister >> 8));
1218         DecodeIntelCacheEntry(pSys, (NvU8)cacheRegister);
1219     }
1220 }
1221 #endif
1222 
1223 static void cpuidInfoIntel(OBJSYS *pSys, PCPUIDINFO pCpuidInfo)
1224 {
1225     NvU32   eax, ebx, ecx, edx;
1226     OBJOS  *pOS = SYS_GET_OS(pSys);
1227 
1228     if (pCpuidInfo->Family == 5)
1229     {
1230         if (pCpuidInfo->Model == 4)
1231             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P55;
1232         else
1233             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P5;
1234     }
1235     else if (pCpuidInfo->Family == 6)
1236     {
1237         switch (pCpuidInfo->DisplayedModel)
1238         {
1239             case 1:                                    // Pentium Pro
1240                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P6;
1241                 break;
1242 
1243             case 3:                                    // Pentium II
1244                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P2;
1245                 break;
1246 
1247             case 5:                                    // Pentium II, Pentium II Xeon, or Celeron
1248                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P2XC;
1249                 break;
1250 
1251             case 6:                                    // Pentium II Celeron-A
1252                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CELA;
1253                 break;
1254 
1255             case 7:                                    // Pentium III or Pentium III Xeon (Katmai)
1256                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P3;
1257                 break;
1258 
1259             case 15:                                   // Conroe, Core2 Duo
1260                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CORE2;
1261                 break;
1262 
1263             case 22:                                   // Celeron model 16h (65nm)
1264                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CELN_M16H;
1265                 break;
1266 
1267             case 23:                                   // Intel Core2 Extreme/Intel Xeon model 17h (45nm)
1268                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_CORE2_EXTRM;
1269                 break;
1270 
1271             case 28:
1272                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_ATOM;
1273                 break;
1274 
1275             case 8:                                    // Pentium III, Pentium III Xeon, or Celeron (Coppermine, 0.18 micron)
1276             case 10:                                   // Pentium III Xeon (Tualatin, 0.13 micron)
1277             case 11:                                   // Pentium III, or Celeron (Tualatin, 0.13 micron)
1278             default:                                   // If it is a new family 6, it is a Pentium III of some type.
1279                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P3_INTL2;
1280                 break;
1281         }
1282         // Flag processors that may be affected by bug 124888.  At this time,
1283         // we believe these are Pentium III and Pentium M processors.  The
1284         // model numbers for these processors in Family 6 are:
1285         //   7 - Pentium III or Pentium III Xeon
1286         //   8 - Pentium III, Pentium III Xeon, or Celeron
1287         //   9 - Pentium M
1288         //  10 - Pentium III Xeon
1289         //  11 - Pentium III
1290         //  12 - ???
1291         //  13 - Pentium M ("Dothan")
1292         //  14 - ???
1293         //  15 - Core 2 (bug 272047)
1294         if (pCpuidInfo->Model >= 7)
1295         {
1296             pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_NEEDS_WAR_124888;
1297         }
1298     }
1299     else if (pCpuidInfo->Family == 0x0F)
1300     {
1301         // Model 0 & 1 == Pentium 4 or Pentium 4 Xeon (Willamette, 423 or 478-pin packages, 0.18 micron)
1302         // Model 2 == Pentium 4 or Pentium 4 Xeon (Northwood, 478-pin package for brookdale, 0.13 micron)
1303         //
1304         // Be careful if you change this.  Both D3D and OpenGL are enabling
1305         // performance options based on NV0000_CTRL_SYSTEM_CPU_TYPE_P4.
1306         //
1307         pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_P4;
1308 
1309         // The first P4s (pre-Northwood ones) have a performance problem
1310         // when mixing write combined and cached writes. This is fixed
1311         // with model revision 2.
1312         if ((pCpuidInfo->Model == 0) || (pCpuidInfo->Model == 1))
1313         {
1314             pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_NEEDS_WC_WORKAROUND;
1315         }
1316     }
1317 
1318     if (pCpuidInfo->Family == 0xF || (pCpuidInfo->Family == 6 && pCpuidInfo->Model >= 7))
1319     {
1320         if (pOS->osNv_cpuid(pOS, 0x17, 0, &eax, &ebx, &ecx, &edx))
1321             pSys->cpuInfo.platformID = (edx >> 18) & 7;        // edx[20:18]   PlatformID (package type)
1322     }
1323 
1324     // Decode the cache desciptors.
1325     if (!DecodePrescottCache(pSys))
1326     {
1327 #if defined(_M_IX86) || defined(NVCPU_X86)
1328 
1329         // Prescott style cache descriptors are not supported.  Fall back to older style.
1330         //
1331         if (pOS->osNv_cpuid(pOS, 0, 0, &eax, &ebx, &ecx, &edx))
1332         {
1333             if (eax >= 2)                    // CPU support old cache descrtiptors?
1334             {
1335                 pOS->osNv_cpuid(pOS, 2, 0, &eax, &ebx, &ecx, &edx);
1336 
1337                 if ((eax & 0xff) == 1)  // AL contains number of times CPU must be called.  This will be 1 forever.
1338                 {
1339                     DecodeIntelCacheRegister(pSys, eax & 0xffffff00);
1340                     DecodeIntelCacheRegister(pSys, ebx);
1341                     DecodeIntelCacheRegister(pSys, ecx);
1342                     DecodeIntelCacheRegister(pSys, edx);
1343                 }
1344             }
1345         }
1346 #endif
1347     }
1348 }
1349 
1350 static void cpuidInfoAMD(OBJSYS *pSys, PCPUIDINFO pCpuidInfo)
1351 {
1352     NvU32   eax = 0;
1353     NvU32   ebx = 0;
1354     NvU32   ecx = 0;
1355     NvU32   edx = 0;
1356 
1357     OBJOS  *pOS = SYS_GET_OS(pSys);
1358     NvU32 largestExtendedFunctionNumberSupported = 0x80000000;
1359 
1360     if (pCpuidInfo->Family == 5)                // K5, K6, K6-2 with 3DNow, K6-3
1361     {
1362         if (pCpuidInfo->Model < 6)
1363             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K5;
1364         else if (pCpuidInfo->Model < 8)
1365             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K6;
1366         else if (pCpuidInfo->Model == 8)
1367             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K62;
1368         else if (pCpuidInfo->Model == 9)
1369             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K63;
1370     }
1371     else if (pCpuidInfo->Family == 6)           // K7
1372     {
1373         // Family 6 is a mixture of Athlon and Duron processors.  Just set the
1374         // processor type to Athlon.  The processor name will show the branding.
1375         pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K7;
1376     }
1377     else if (pCpuidInfo->Family == 15)          // K8
1378     {
1379         // If family is 15, we need to use AMD's extended family/model information.
1380         pOS->osNv_cpuid(pOS, 1, 0, &eax, &ebx, &ecx, &edx);
1381         pCpuidInfo->Family = (NvU16)(((eax >> 8) & 0x0F) + ((eax >> 16) & 0xFF0));  // 27:20 concat 11:8
1382         pCpuidInfo->Model  = (NvU8) (((eax >> 4) & 0x0F) + ((eax >> 12) & 0xF0));   // 19:16 concat 7:4
1383 
1384         // Differentiate K8, K10, K11, RYZEN, etc
1385         switch( pCpuidInfo->Family & 0xFF0)
1386         {
1387             case 0x000:
1388                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K8;
1389                 break;
1390             case 0x010:
1391                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K10;
1392                 break;
1393             case 0x020:
1394                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K11;
1395                 break;
1396             case 0x080:
1397                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_RYZEN;
1398                 break;
1399             default:
1400                 NV_PRINTF(LEVEL_ERROR,
1401                           "Unrecognized AMD processor in cpuidInfoAMD\n");
1402                 pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_K8;
1403                 break;
1404         }
1405     }
1406 
1407     if (pCpuidInfo->ExtendedFeatures & CPU_EXT_3DNOW)
1408         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW;      // 3DNow
1409 
1410     if (pCpuidInfo->ExtendedFeatures & CPU_EXT_AMD_3DNOW_EXT)
1411         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW_EXT;  // 3DNow, with Extensions (AMD specific)
1412 
1413     if (pCpuidInfo->ExtendedFeatures & CPU_EXT_AMD_MMX_EXT)
1414     {
1415         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_MMX_EXT;    // MMX, with Extensions (AMD specific)
1416         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_SFENCE;
1417     }
1418 
1419     // Get the cache info.
1420     if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx))
1421     {
1422         largestExtendedFunctionNumberSupported = eax;
1423 
1424         if (largestExtendedFunctionNumberSupported >= 0x80000006)
1425         {
1426             // L1 cache
1427             if (pOS->osNv_cpuid(pOS, 0x80000005, 0, &eax, &ebx, &ecx, &edx))
1428             {
1429                 pSys->cpuInfo.dataCacheLineSize = ecx & 0xff;
1430                 pSys->cpuInfo.l1DataCacheSize = ecx >> 24;
1431             }
1432 
1433             // L2 cache
1434             if (pOS->osNv_cpuid(pOS, 0x80000006, 0, &eax, &ebx, &ecx, &edx))
1435                 pSys->cpuInfo.l2DataCacheSize = ecx >> 16;
1436         }
1437 
1438         // Get the SEV capability info
1439         if ((largestExtendedFunctionNumberSupported >= 0x8000001f) &&
1440             pOS->osNv_cpuid(pOS, 0x8000001f, 0, &eax, &ebx, &ecx, &edx))
1441         {
1442             //
1443             // EAX[1] stores capability info
1444             // ECX[31:0] stores # of encrypted guests supported simultaneously
1445             //
1446             if (eax & 0x2)
1447             {
1448                 pSys->cpuInfo.bSEVCapable = NV_TRUE;
1449                 pSys->cpuInfo.maxEncryptedGuests = ecx;
1450             }
1451         }
1452     }
1453 }
1454 
1455 
1456 #if defined(_M_IX86) || defined(NVCPU_X86)
1457 
1458 static void cpuidInfoWinChip(OBJSYS *pSys, PCPUIDINFO pCpuidInfo)
1459 {
1460     if (pCpuidInfo->Family == 5)                // Winchip C6, Winchip2 w/ 3DNow
1461     {
1462         if (pCpuidInfo->Model == 4)
1463             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_C6;
1464         if (pCpuidInfo->Model == 8)
1465             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_C62;
1466     }
1467 
1468     if (pCpuidInfo->ExtendedFeatures & CPU_EXT_3DNOW)
1469         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW;
1470 }
1471 
1472 static void cpuidInfoCyrix(OBJSYS *pSys, PCPUIDINFO pCpuidInfo)
1473 {
1474     if (pCpuidInfo->Family == 4)                // MediaGX
1475         pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_GX;
1476     if (pCpuidInfo->Family == 5)                // Cyrix 6x86 or MediaGX w/ MMX
1477     {
1478         if (pCpuidInfo->Model == 2)
1479             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_M1;
1480         if (pCpuidInfo->Model == 4)
1481             pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_MGX;
1482     }
1483     if (pCpuidInfo->Family == 6)                // Cyrix MII
1484         pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_M2;
1485 
1486     if (pCpuidInfo->ExtendedFeatures & CPU_EXT_3DNOW)
1487         pSys->cpuInfo.caps |= NV0000_CTRL_SYSTEM_CPU_CAP_3DNOW;
1488 }
1489 
1490 static void cpuidInfoTransmeta(OBJSYS *pSys, PCPUIDINFO pCpuidInfo)
1491 {
1492     NvU32 eax, ebx, ecx, edx;
1493     OBJOS *pOS = SYS_GET_OS(pSys);
1494 
1495     //
1496     // Transmeta allows the OEM to program the foundry, family, model, and stepping.  Arrrrgh...
1497     // If this turns out to be a problem, we will need to use one of the extended CPUID calls to
1498     // get the real info.
1499     //
1500 
1501     // Docs were not real clear on which family/model.  Just assume it's a Crusoe
1502     pSys->cpuInfo.type = NV0000_CTRL_SYSTEM_CPU_TYPE_TM_CRUSOE;
1503 
1504     //
1505     // Get the cache info.  From preliminary TM8000 programming and config guide, 2/19/03
1506     // This appears to match AMD's cache CPUID definitions.
1507     //
1508     if (pOS->osNv_cpuid(pOS, 0x80000000, 0, &eax, &ebx, &ecx, &edx) && eax >= 0x80000006)
1509     {
1510         // L1 Cache
1511         if (pOS->osNv_cpuid(pOS, 0x80000005, 0, &eax, &ebx, &ecx, &edx))
1512         {
1513             pSys->cpuInfo.dataCacheLineSize = ecx & 0xff;
1514             pSys->cpuInfo.l1DataCacheSize = ecx >> 24;
1515         }
1516 
1517         // L2 Cache
1518         if (pOS->osNv_cpuid(pOS, 0x80000006, 0, &eax, &ebx, &ecx, &edx))
1519             pSys->cpuInfo.l2DataCacheSize = ecx >> 16;
1520     }
1521 }
1522 
1523 #endif // defined(_M_IX86) || defined(NVCPU_X86)
1524 
1525 #endif // defined(_M_IX86) || defined(NVCPU_X86) || defined(AMD64) || defined(NVCPU_X86_64)
1526 
1527 #endif // NVCPU_IS_X86 || NVCPU_IS_X86_64
1528