xref: /reactos/ntoskrnl/ke/i386/cpu.c (revision 6a6b383a)
1 /*
2  * PROJECT:         ReactOS Kernel
3  * LICENSE:         GPL - See COPYING in the top level directory
4  * FILE:            ntoskrnl/ke/i386/cpu.c
5  * PURPOSE:         Routines for CPU-level support
6  * PROGRAMMERS:     Alex Ionescu (alex.ionescu@reactos.org)
7  */
8 
9 /* INCLUDES *****************************************************************/
10 
11 #include <ntoskrnl.h>
12 #define NDEBUG
13 #include <debug.h>
14 
15 #include <xmmintrin.h>
16 
17 /* GLOBALS *******************************************************************/
18 
19 /* The TSS to use for Double Fault Traps (INT 0x9) */
20 UCHAR KiDoubleFaultTSS[KTSS_IO_MAPS];
21 
22 /* The TSS to use for NMI Fault Traps (INT 0x2) */
23 UCHAR KiNMITSS[KTSS_IO_MAPS];
24 
25 /* CPU Features and Flags */
26 ULONG KeI386CpuType;
27 ULONG KeI386CpuStep;
28 ULONG KiFastSystemCallDisable = 0;
29 ULONG KeI386NpxPresent = TRUE;
30 ULONG KiMXCsrMask = 0;
31 ULONG MxcsrFeatureMask = 0;
32 ULONG KeI386XMMIPresent = 0;
33 ULONG KeI386FxsrPresent = 0;
34 ULONG KeI386MachineType;
35 ULONG Ke386Pae = FALSE;
36 ULONG Ke386NoExecute = FALSE;
37 ULONG KeLargestCacheLine = 0x40;
38 ULONG KeDcacheFlushCount = 0;
39 ULONG KeIcacheFlushCount = 0;
40 ULONG KiDmaIoCoherency = 0;
41 ULONG KePrefetchNTAGranularity = 32;
42 BOOLEAN KiI386PentiumLockErrataPresent;
43 BOOLEAN KiSMTProcessorsPresent;
44 
45 /* The distance between SYSEXIT and IRETD return modes */
46 UCHAR KiSystemCallExitAdjust;
47 
48 /* The offset that was applied -- either 0 or the value above */
49 UCHAR KiSystemCallExitAdjusted;
50 
51 /* Whether the adjustment was already done once */
52 BOOLEAN KiFastCallCopyDoneOnce;
53 
54 /* Flush data */
55 volatile LONG KiTbFlushTimeStamp;
56 
57 /* CPU Signatures */
58 static const CHAR CmpIntelID[]       = "GenuineIntel";
59 static const CHAR CmpAmdID[]         = "AuthenticAMD";
60 static const CHAR CmpCyrixID[]       = "CyrixInstead";
61 static const CHAR CmpTransmetaID[]   = "GenuineTMx86";
62 static const CHAR CmpCentaurID[]     = "CentaurHauls";
63 static const CHAR CmpRiseID[]        = "RiseRiseRise";
64 
65 typedef union _CPU_SIGNATURE
66 {
67     struct
68     {
69         ULONG Step : 4;
70         ULONG Model : 4;
71         ULONG Family : 4;
72         ULONG Unused : 4;
73         ULONG ExtendedModel : 4;
74         ULONG ExtendedFamily : 8;
75         ULONG Unused2 : 4;
76     };
77     ULONG AsULONG;
78 } CPU_SIGNATURE;
79 
80 /* FX area alignment size */
81 #define FXSAVE_ALIGN 15
82 
83 /* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/
84 
85 /* NSC/Cyrix CPU configuration register index */
86 #define CX86_CCR1 0xc1
87 
88 /* NSC/Cyrix CPU indexed register access macros */
89 static __inline
90 UCHAR
getCx86(UCHAR reg)91 getCx86(UCHAR reg)
92 {
93     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
94     return READ_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23);
95 }
96 
97 static __inline
98 void
setCx86(UCHAR reg,UCHAR data)99 setCx86(UCHAR reg, UCHAR data)
100 {
101     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
102     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data);
103 }
104 
105 /* FUNCTIONS *****************************************************************/
106 
107 CODE_SEG("INIT")
108 ULONG
109 NTAPI
KiGetCpuVendor(VOID)110 KiGetCpuVendor(VOID)
111 {
112     PKPRCB Prcb = KeGetCurrentPrcb();
113     CPU_INFO CpuInfo;
114 
115     /* Get the Vendor ID */
116     KiCpuId(&CpuInfo, 0);
117 
118     /* Copy it to the PRCB and null-terminate it */
119     *(ULONG*)&Prcb->VendorString[0] = CpuInfo.Ebx;
120     *(ULONG*)&Prcb->VendorString[4] = CpuInfo.Edx;
121     *(ULONG*)&Prcb->VendorString[8] = CpuInfo.Ecx;
122     Prcb->VendorString[12] = 0;
123 
124     /* Now check the CPU Type */
125     if (!strcmp(Prcb->VendorString, CmpIntelID))
126     {
127         return CPU_INTEL;
128     }
129     else if (!strcmp(Prcb->VendorString, CmpAmdID))
130     {
131         return CPU_AMD;
132     }
133     else if (!strcmp(Prcb->VendorString, CmpCyrixID))
134     {
135         DPRINT1("Cyrix CPU support not fully tested!\n");
136         return CPU_CYRIX;
137     }
138     else if (!strcmp(Prcb->VendorString, CmpTransmetaID))
139     {
140         DPRINT1("Transmeta CPU support not fully tested!\n");
141         return CPU_TRANSMETA;
142     }
143     else if (!strcmp(Prcb->VendorString, CmpCentaurID))
144     {
145         DPRINT1("Centaur CPU support not fully tested!\n");
146         return CPU_CENTAUR;
147     }
148     else if (!strcmp(Prcb->VendorString, CmpRiseID))
149     {
150         DPRINT1("Rise CPU support not fully tested!\n");
151         return CPU_RISE;
152     }
153 
154     /* Unknown CPU */
155     DPRINT1("%s CPU support not fully tested!\n", Prcb->VendorString);
156     return CPU_UNKNOWN;
157 }
158 
159 CODE_SEG("INIT")
160 VOID
161 NTAPI
KiSetProcessorType(VOID)162 KiSetProcessorType(VOID)
163 {
164     CPU_INFO CpuInfo;
165     CPU_SIGNATURE CpuSignature;
166     BOOLEAN ExtendModel;
167     ULONG Stepping, Type;
168 
169     /* Do CPUID 1 now */
170     KiCpuId(&CpuInfo, 1);
171 
172     /*
173      * Get the Stepping and Type. The stepping contains both the
174      * Model and the Step, while the Type contains the returned Family.
175      *
176      * For the stepping, we convert this: zzzzzzxy into this: x0y
177      */
178     CpuSignature.AsULONG = CpuInfo.Eax;
179     Stepping = CpuSignature.Model;
180     ExtendModel = (CpuSignature.Family == 15);
181 #if ( (NTDDI_VERSION >= NTDDI_WINXPSP2) && (NTDDI_VERSION < NTDDI_WS03) ) || (NTDDI_VERSION >= NTDDI_WS03SP1)
182     if (CpuSignature.Family == 6)
183     {
184         ULONG Vendor = KiGetCpuVendor();
185         ExtendModel |= (Vendor == CPU_INTEL);
186 #if (NTDDI_VERSION >= NTDDI_WIN8)
187         ExtendModel |= (Vendor == CPU_CENTAUR);
188 #endif
189     }
190 #endif
191     if (ExtendModel)
192     {
193         /* Add ExtendedModel to distinguish from non-extended values. */
194         Stepping |= (CpuSignature.ExtendedModel << 4);
195     }
196     Stepping = (Stepping << 8) | CpuSignature.Step;
197     Type = CpuSignature.Family;
198     if (CpuSignature.Family == 15)
199     {
200         /* Add ExtendedFamily to distinguish from non-extended values.
201          * It must not be larger than 0xF0 to avoid overflow. */
202         Type += min(CpuSignature.ExtendedFamily, 0xF0);
203     }
204 
205     /* Save them in the PRCB */
206     KeGetCurrentPrcb()->CpuID = TRUE;
207     KeGetCurrentPrcb()->CpuType = (UCHAR)Type;
208     KeGetCurrentPrcb()->CpuStep = (USHORT)Stepping;
209 }
210 
211 CODE_SEG("INIT")
212 ULONG
213 NTAPI
KiGetFeatureBits(VOID)214 KiGetFeatureBits(VOID)
215 {
216     PKPRCB Prcb = KeGetCurrentPrcb();
217     ULONG Vendor;
218     ULONG FeatureBits = KF_WORKING_PTE;
219     CPU_INFO CpuInfo, DummyCpuInfo;
220     UCHAR Ccr1;
221     BOOLEAN ExtendedCPUID = TRUE;
222     ULONG CpuFeatures = 0;
223 
224     /* Get the Vendor ID */
225     Vendor = KiGetCpuVendor();
226 
227     /* Make sure we got a valid vendor ID at least. */
228     if (!Vendor) return FeatureBits;
229 
230     /* Get the CPUID Info. Features are in Reg[3]. */
231     KiCpuId(&CpuInfo, 1);
232 
233     /* Set the initial APIC ID */
234     Prcb->InitialApicId = (UCHAR)(CpuInfo.Ebx >> 24);
235 
236     switch (Vendor)
237     {
238         /* Intel CPUs */
239         case CPU_INTEL:
240 
241             /* Check if it's a P6 */
242             if (Prcb->CpuType == 6)
243             {
244                 /* Perform the special sequence to get the MicroCode Signature */
245                 __writemsr(0x8B, 0);
246                 KiCpuId(&DummyCpuInfo, 1);
247                 Prcb->UpdateSignature.QuadPart = __readmsr(0x8B);
248             }
249             else if (Prcb->CpuType == 5)
250             {
251                 /* On P5, enable workaround for the LOCK errata. */
252                 KiI386PentiumLockErrataPresent = TRUE;
253             }
254 
255             /* Check for broken P6 with bad SMP PTE implementation */
256             if (((CpuInfo.Eax & 0x0FF0) == 0x0610 && (CpuInfo.Eax & 0x000F) <= 0x9) ||
257                 ((CpuInfo.Eax & 0x0FF0) == 0x0630 && (CpuInfo.Eax & 0x000F) <= 0x4))
258             {
259                 /* Remove support for correct PTE support. */
260                 FeatureBits &= ~KF_WORKING_PTE;
261             }
262 
263             /* Check if the CPU is too old to support SYSENTER */
264             if ((Prcb->CpuType < 6) ||
265                 ((Prcb->CpuType == 6) && (Prcb->CpuStep < 0x0303)))
266             {
267                 /* Disable it */
268                 CpuInfo.Edx &= ~0x800;
269             }
270 
271             break;
272 
273         /* AMD CPUs */
274         case CPU_AMD:
275 
276             /* Check if this is a K5 or K6. (family 5) */
277             if ((CpuInfo.Eax & 0x0F00) == 0x0500)
278             {
279                 /* Get the Model Number */
280                 switch (CpuInfo.Eax & 0x00F0)
281                 {
282                     /* Model 1: K5 - 5k86 (initial models) */
283                     case 0x0010:
284 
285                         /* Check if this is Step 0 or 1. They don't support PGE */
286                         if ((CpuInfo.Eax & 0x000F) > 0x03) break;
287 
288                     /* Model 0: K5 - SSA5 */
289                     case 0x0000:
290 
291                         /* Model 0 doesn't support PGE at all. */
292                         CpuInfo.Edx &= ~0x2000;
293                         break;
294 
295                     /* Model 8: K6-2 */
296                     case 0x0080:
297 
298                         /* K6-2, Step 8 and over have support for MTRR. */
299                         if ((CpuInfo.Eax & 0x000F) >= 0x8) FeatureBits |= KF_AMDK6MTRR;
300                         break;
301 
302                     /* Model 9: K6-III
303                        Model D: K6-2+, K6-III+ */
304                     case 0x0090:
305                     case 0x00D0:
306 
307                         FeatureBits |= KF_AMDK6MTRR;
308                         break;
309                 }
310             }
311             else if((CpuInfo.Eax & 0x0F00) < 0x0500)
312             {
313                 /* Families below 5 don't support PGE, PSE or CMOV at all */
314                 CpuInfo.Edx &= ~(0x08 | 0x2000 | 0x8000);
315 
316                 /* They also don't support advanced CPUID functions. */
317                 ExtendedCPUID = FALSE;
318             }
319 
320             break;
321 
322         /* Cyrix CPUs */
323         case CPU_CYRIX:
324 
325             /* Workaround the "COMA" bug on 6x family of Cyrix CPUs */
326             if (Prcb->CpuType == 6 &&
327                 Prcb->CpuStep <= 1)
328             {
329                 /* Get CCR1 value */
330                 Ccr1 = getCx86(CX86_CCR1);
331 
332                 /* Enable the NO_LOCK bit */
333                 Ccr1 |= 0x10;
334 
335                 /* Set the new CCR1 value */
336                 setCx86(CX86_CCR1, Ccr1);
337             }
338 
339             break;
340 
341         /* Transmeta CPUs */
342         case CPU_TRANSMETA:
343 
344             /* Enable CMPXCHG8B if the family (>= 5), model and stepping (>= 4.2) support it */
345             if ((CpuInfo.Eax & 0x0FFF) >= 0x0542)
346             {
347                 __writemsr(0x80860004, __readmsr(0x80860004) | 0x0100);
348                 FeatureBits |= KF_CMPXCHG8B;
349             }
350 
351             break;
352 
353         /* Centaur, IDT, Rise and VIA CPUs */
354         case CPU_CENTAUR:
355         case CPU_RISE:
356 
357             /* These CPUs don't report the presence of CMPXCHG8B through CPUID.
358                However, this feature exists and operates properly without any additional steps. */
359             FeatureBits |= KF_CMPXCHG8B;
360 
361             break;
362     }
363 
364     /* Set the current features */
365     CpuFeatures = CpuInfo.Edx;
366 
367     /* Convert all CPUID Feature bits into our format */
368     if (CpuFeatures & X86_FEATURE_VME)     FeatureBits |= KF_V86_VIS | KF_CR4;
369     if (CpuFeatures & X86_FEATURE_PSE)     FeatureBits |= KF_LARGE_PAGE | KF_CR4;
370     if (CpuFeatures & X86_FEATURE_TSC)     FeatureBits |= KF_RDTSC;
371     if (CpuFeatures & X86_FEATURE_CX8)     FeatureBits |= KF_CMPXCHG8B;
372     if (CpuFeatures & X86_FEATURE_SYSCALL) FeatureBits |= KF_FAST_SYSCALL;
373     if (CpuFeatures & X86_FEATURE_MTTR)    FeatureBits |= KF_MTRR;
374     if (CpuFeatures & X86_FEATURE_PGE)     FeatureBits |= KF_GLOBAL_PAGE | KF_CR4;
375     if (CpuFeatures & X86_FEATURE_CMOV)    FeatureBits |= KF_CMOV;
376     if (CpuFeatures & X86_FEATURE_PAT)     FeatureBits |= KF_PAT;
377     if (CpuFeatures & X86_FEATURE_DS)      FeatureBits |= KF_DTS;
378     if (CpuFeatures & X86_FEATURE_MMX)     FeatureBits |= KF_MMX;
379     if (CpuFeatures & X86_FEATURE_FXSR)    FeatureBits |= KF_FXSR;
380     if (CpuFeatures & X86_FEATURE_SSE)     FeatureBits |= KF_XMMI;
381     if (CpuFeatures & X86_FEATURE_SSE2)    FeatureBits |= KF_XMMI64;
382 
383     /* Check if the CPU has hyper-threading */
384     if (CpuFeatures & X86_FEATURE_HT)
385     {
386         /* Set the number of logical CPUs */
387         Prcb->LogicalProcessorsPerPhysicalProcessor = (UCHAR)(CpuInfo.Ebx >> 16);
388         if (Prcb->LogicalProcessorsPerPhysicalProcessor > 1)
389         {
390             /* We're on dual-core */
391             KiSMTProcessorsPresent = TRUE;
392         }
393     }
394     else
395     {
396         /* We only have a single CPU */
397         Prcb->LogicalProcessorsPerPhysicalProcessor = 1;
398     }
399 
400     /* Check if CPUID 0x80000000 is supported */
401     if (ExtendedCPUID)
402     {
403         /* Do the call */
404         KiCpuId(&CpuInfo, 0x80000000);
405         if ((CpuInfo.Eax & 0xffffff00) == 0x80000000)
406         {
407             /* Check if CPUID 0x80000001 is supported */
408             if (CpuInfo.Eax >= 0x80000001)
409             {
410                 /* Check which extended features are available. */
411                 KiCpuId(&CpuInfo, 0x80000001);
412 
413                 /* Check if NX-bit is supported */
414                 if (CpuInfo.Edx & X86_FEATURE_NX) FeatureBits |= KF_NX_BIT;
415 
416                 /* Now handle each features for each CPU Vendor */
417                 switch (Vendor)
418                 {
419                     case CPU_AMD:
420                     case CPU_CENTAUR:
421                         if (CpuInfo.Edx & 0x80000000) FeatureBits |= KF_3DNOW;
422                         break;
423                 }
424             }
425         }
426     }
427 
428     /* Return the Feature Bits */
429     return FeatureBits;
430 }
431 
432 #if DBG
433 CODE_SEG("INIT")
434 VOID
KiReportCpuFeatures(VOID)435 KiReportCpuFeatures(VOID)
436 {
437     ULONG CpuFeatures = 0;
438     CPU_INFO CpuInfo;
439 
440     if (KiGetCpuVendor())
441     {
442         KiCpuId(&CpuInfo, 1);
443         CpuFeatures = CpuInfo.Edx;
444     }
445 
446     DPRINT1("Supported CPU features: ");
447 
448 #define print_kf_bit(kf_value) if (KeFeatureBits & kf_value) DbgPrint(#kf_value " ")
449     print_kf_bit(KF_V86_VIS);
450     print_kf_bit(KF_RDTSC);
451     print_kf_bit(KF_CR4);
452     print_kf_bit(KF_CMOV);
453     print_kf_bit(KF_GLOBAL_PAGE);
454     print_kf_bit(KF_LARGE_PAGE);
455     print_kf_bit(KF_MTRR);
456     print_kf_bit(KF_CMPXCHG8B);
457     print_kf_bit(KF_MMX);
458     print_kf_bit(KF_WORKING_PTE);
459     print_kf_bit(KF_PAT);
460     print_kf_bit(KF_FXSR);
461     print_kf_bit(KF_FAST_SYSCALL);
462     print_kf_bit(KF_XMMI);
463     print_kf_bit(KF_3DNOW);
464     print_kf_bit(KF_AMDK6MTRR);
465     print_kf_bit(KF_XMMI64);
466     print_kf_bit(KF_DTS);
467     print_kf_bit(KF_NX_BIT);
468     print_kf_bit(KF_NX_DISABLED);
469     print_kf_bit(KF_NX_ENABLED);
470 #undef print_kf_bit
471 
472 #define print_cf(cpu_flag) if (CpuFeatures & cpu_flag) DbgPrint(#cpu_flag " ")
473     print_cf(X86_FEATURE_PAE);
474     print_cf(X86_FEATURE_APIC);
475     print_cf(X86_FEATURE_HT);
476 #undef print_cf
477 
478     DbgPrint("\n");
479 }
480 #endif // DBG
481 
482 CODE_SEG("INIT")
483 VOID
484 NTAPI
KiGetCacheInformation(VOID)485 KiGetCacheInformation(VOID)
486 {
487     PKIPCR Pcr = (PKIPCR)KeGetPcr();
488     CPU_INFO CpuInfo;
489     ULONG CacheRequests = 0, i;
490     ULONG CurrentRegister;
491     UCHAR RegisterByte, Associativity = 0;
492     ULONG Size, CacheLine = 64, CurrentSize = 0;
493     BOOLEAN FirstPass = TRUE;
494 
495     /* Set default L2 size */
496     Pcr->SecondLevelCacheSize = 0;
497 
498     /* Check the Vendor ID */
499     switch (KiGetCpuVendor())
500     {
501         /* Handle Intel case */
502         case CPU_INTEL:
503 
504             /* Check if we support CPUID 2 */
505             KiCpuId(&CpuInfo, 0);
506             if (CpuInfo.Eax >= 2)
507             {
508                 /* We need to loop for the number of times CPUID will tell us to */
509                 do
510                 {
511                     /* Do the CPUID call */
512                     KiCpuId(&CpuInfo, 2);
513 
514                     /* Check if it was the first call */
515                     if (FirstPass)
516                     {
517                         /*
518                          * The number of times to loop is the first byte. Read
519                          * it and then destroy it so we don't get confused.
520                          */
521                         CacheRequests = CpuInfo.Eax & 0xFF;
522                         CpuInfo.Eax &= 0xFFFFFF00;
523 
524                         /* Don't go over this again */
525                         FirstPass = FALSE;
526                     }
527 
528                     /* Loop all 4 registers */
529                     for (i = 0; i < 4; i++)
530                     {
531                         /* Get the current register */
532                         CurrentRegister = CpuInfo.AsUINT32[i];
533 
534                         /*
535                          * If the upper bit is set, then this register should
536                          * be skipped.
537                          */
538                         if (CurrentRegister & 0x80000000) continue;
539 
540                         /* Keep looping for every byte inside this register */
541                         while (CurrentRegister)
542                         {
543                             /* Read a byte, skip a byte. */
544                             RegisterByte = (UCHAR)(CurrentRegister & 0xFF);
545                             CurrentRegister >>= 8;
546                             if (!RegisterByte) continue;
547 
548                             Size = 0;
549                             switch (RegisterByte)
550                             {
551                                 case 0x06:
552                                 case 0x08:
553                                     KePrefetchNTAGranularity = 32;
554                                     break;
555                                 case 0x09:
556                                     KePrefetchNTAGranularity = 64;
557                                     break;
558                                 case 0x0a:
559                                 case 0x0c:
560                                     KePrefetchNTAGranularity = 32;
561                                     break;
562                                 case 0x0d:
563                                 case 0x0e:
564                                     KePrefetchNTAGranularity = 64;
565                                     break;
566                                 case 0x1d:
567                                     Size = 128 * 1024;
568                                     Associativity = 2;
569                                     break;
570                                 case 0x21:
571                                     Size = 256 * 1024;
572                                     Associativity = 8;
573                                     break;
574                                 case 0x24:
575                                     Size = 1024 * 1024;
576                                     Associativity = 16;
577                                     break;
578                                 case 0x2c:
579                                 case 0x30:
580                                     KePrefetchNTAGranularity = 64;
581                                     break;
582                                 case 0x41:
583                                 case 0x42:
584                                 case 0x43:
585                                 case 0x44:
586                                 case 0x45:
587                                     Size = (1 << (RegisterByte - 0x41)) * 128 * 1024;
588                                     Associativity = 4;
589                                     break;
590                                 case 0x48:
591                                     Size = 3 * 1024 * 1024;
592                                     Associativity = 12;
593                                     break;
594                                 case 0x49:
595                                     Size = 4 * 1024 * 1024;
596                                     Associativity = 16;
597                                     break;
598                                 case 0x4e:
599                                     Size = 6 * 1024 * 1024;
600                                     Associativity = 24;
601                                     break;
602                                 case 0x60:
603                                 case 0x66:
604                                 case 0x67:
605                                 case 0x68:
606                                     KePrefetchNTAGranularity = 64;
607                                     break;
608                                 case 0x78:
609                                     Size = 1024 * 1024;
610                                     Associativity = 4;
611                                     break;
612                                 case 0x79:
613                                 case 0x7a:
614                                 case 0x7b:
615                                 case 0x7c:
616                                 case 0x7d:
617                                     Size = (1 << (RegisterByte - 0x79)) * 128 * 1024;
618                                     Associativity = 8;
619                                     break;
620                                 case 0x7f:
621                                     Size = 512 * 1024;
622                                     Associativity = 2;
623                                     break;
624                                 case 0x80:
625                                     Size = 512 * 1024;
626                                     Associativity = 8;
627                                     break;
628                                 case 0x82:
629                                 case 0x83:
630                                 case 0x84:
631                                 case 0x85:
632                                     Size = (1 << (RegisterByte - 0x82)) * 256 * 1024;
633                                     Associativity = 8;
634                                     break;
635                                 case 0x86:
636                                     Size = 512 * 1024;
637                                     Associativity = 4;
638                                     break;
639                                 case 0x87:
640                                     Size = 1024 * 1024;
641                                     Associativity = 8;
642                                     break;
643                                 case 0xf0:
644                                     KePrefetchNTAGranularity = 64;
645                                     break;
646                                 case 0xf1:
647                                     KePrefetchNTAGranularity = 128;
648                                     break;
649                             }
650                             if (Size && (Size / Associativity) > CurrentSize)
651                             {
652                                 /* Set the L2 Cache Size and Associativity */
653                                 CurrentSize = Size / Associativity;
654                                 Pcr->SecondLevelCacheSize = Size;
655                                 Pcr->SecondLevelCacheAssociativity = Associativity;
656                             }
657                         }
658                     }
659                 } while (--CacheRequests);
660             }
661             break;
662 
663         case CPU_AMD:
664 
665             /* Check if we support CPUID 0x80000005 */
666             KiCpuId(&CpuInfo, 0x80000000);
667             if (CpuInfo.Eax >= 0x80000005)
668             {
669                 /* Get L1 size first */
670                 KiCpuId(&CpuInfo, 0x80000005);
671                 KePrefetchNTAGranularity = CpuInfo.Ecx & 0xFF;
672 
673                 /* Check if we support CPUID 0x80000006 */
674                 KiCpuId(&CpuInfo, 0x80000000);
675                 if (CpuInfo.Eax >= 0x80000006)
676                 {
677                     /* Get 2nd level cache and tlb size */
678                     KiCpuId(&CpuInfo, 0x80000006);
679 
680                     /* Cache line size */
681                     CacheLine = CpuInfo.Ecx & 0xFF;
682 
683                     /* Hardcode associativity */
684                     RegisterByte = (CpuInfo.Ecx >> 12) & 0xFF;
685                     switch (RegisterByte)
686                     {
687                         case 2:
688                             Associativity = 2;
689                             break;
690 
691                         case 4:
692                             Associativity = 4;
693                             break;
694 
695                         case 6:
696                             Associativity = 8;
697                             break;
698 
699                         case 8:
700                         case 15:
701                             Associativity = 16;
702                             break;
703 
704                         default:
705                             Associativity = 1;
706                             break;
707                     }
708 
709                     /* Compute size */
710                     Size = (CpuInfo.Ecx >> 16) << 10;
711 
712                     /* Hack for Model 6, Steping 300 */
713                     if ((KeGetCurrentPrcb()->CpuType == 6) &&
714                         (KeGetCurrentPrcb()->CpuStep == 0x300))
715                     {
716                         /* Stick 64K in there */
717                         Size = 64 * 1024;
718                     }
719 
720                     /* Set the L2 Cache Size and associativity */
721                     Pcr->SecondLevelCacheSize = Size;
722                     Pcr->SecondLevelCacheAssociativity = Associativity;
723                 }
724             }
725             break;
726 
727         case CPU_CYRIX:
728         case CPU_TRANSMETA:
729         case CPU_CENTAUR:
730         case CPU_RISE:
731 
732             /* FIXME */
733             break;
734     }
735 
736     /* Set the cache line */
737     if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine;
738     DPRINT1("Prefetch Cache: %lu bytes\tL2 Cache: %lu bytes\tL2 Cache Line: %lu bytes\tL2 Cache Associativity: %lu\n",
739             KePrefetchNTAGranularity,
740             Pcr->SecondLevelCacheSize,
741             KeLargestCacheLine,
742             Pcr->SecondLevelCacheAssociativity);
743 }
744 
745 CODE_SEG("INIT")
746 VOID
747 NTAPI
KiSetCR0Bits(VOID)748 KiSetCR0Bits(VOID)
749 {
750     ULONG Cr0;
751 
752     /* Save current CR0 */
753     Cr0 = __readcr0();
754 
755     /* If this is a 486, enable Write-Protection */
756     if (KeGetCurrentPrcb()->CpuType > 3) Cr0 |= CR0_WP;
757 
758     /* Set new Cr0 */
759     __writecr0(Cr0);
760 }
761 
762 CODE_SEG("INIT")
763 VOID
764 NTAPI
KiInitializeTSS2(IN PKTSS Tss,IN PKGDTENTRY TssEntry OPTIONAL)765 KiInitializeTSS2(IN PKTSS Tss,
766                  IN PKGDTENTRY TssEntry OPTIONAL)
767 {
768     PUCHAR p;
769 
770     /* Make sure the GDT Entry is valid */
771     if (TssEntry)
772     {
773         /* Set the Limit */
774         TssEntry->LimitLow = sizeof(KTSS) - 1;
775         TssEntry->HighWord.Bits.LimitHi = 0;
776     }
777 
778     /* Now clear the I/O Map */
779     ASSERT(IOPM_COUNT == 1);
780     RtlFillMemory(Tss->IoMaps[0].IoMap, IOPM_FULL_SIZE, 0xFF);
781 
782     /* Initialize Interrupt Direction Maps */
783     p = (PUCHAR)(Tss->IoMaps[0].DirectionMap);
784     RtlZeroMemory(p, IOPM_DIRECTION_MAP_SIZE);
785 
786     /* Add DPMI support for interrupts */
787     p[0] = 4;
788     p[3] = 0x18;
789     p[4] = 0x18;
790 
791     /* Initialize the default Interrupt Direction Map */
792     p = Tss->IntDirectionMap;
793     RtlZeroMemory(Tss->IntDirectionMap, IOPM_DIRECTION_MAP_SIZE);
794 
795     /* Add DPMI support */
796     p[0] = 4;
797     p[3] = 0x18;
798     p[4] = 0x18;
799 }
800 
801 VOID
802 NTAPI
KiInitializeTSS(IN PKTSS Tss)803 KiInitializeTSS(IN PKTSS Tss)
804 {
805     /* Set an invalid map base */
806     Tss->IoMapBase = KiComputeIopmOffset(IO_ACCESS_MAP_NONE);
807 
808     /* Disable traps during Task Switches */
809     Tss->Flags = 0;
810 
811     /* Set LDT and Ring 0 SS */
812     Tss->LDT = 0;
813     Tss->Ss0 = KGDT_R0_DATA;
814 }
815 
816 CODE_SEG("INIT")
817 VOID
818 FASTCALL
Ki386InitializeTss(IN PKTSS Tss,IN PKIDTENTRY Idt,IN PKGDTENTRY Gdt)819 Ki386InitializeTss(IN PKTSS Tss,
820                    IN PKIDTENTRY Idt,
821                    IN PKGDTENTRY Gdt)
822 {
823     PKGDTENTRY TssEntry, TaskGateEntry;
824 
825     /* Initialize the boot TSS. */
826     TssEntry = &Gdt[KGDT_TSS / sizeof(KGDTENTRY)];
827     TssEntry->HighWord.Bits.Type = I386_TSS;
828     TssEntry->HighWord.Bits.Pres = 1;
829     TssEntry->HighWord.Bits.Dpl = 0;
830     KiInitializeTSS2(Tss, TssEntry);
831     KiInitializeTSS(Tss);
832 
833     /* Load the task register */
834     Ke386SetTr(KGDT_TSS);
835 
836     /* Setup the Task Gate for Double Fault Traps */
837     TaskGateEntry = (PKGDTENTRY)&Idt[8];
838     TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
839     TaskGateEntry->HighWord.Bits.Pres = 1;
840     TaskGateEntry->HighWord.Bits.Dpl = 0;
841     ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_DF_TSS;
842 
843     /* Initialize the TSS used for handling double faults. */
844     Tss = (PKTSS)KiDoubleFaultTSS;
845     KiInitializeTSS(Tss);
846     Tss->CR3 = __readcr3();
847     Tss->Esp0 = KiDoubleFaultStack;
848     Tss->Esp = KiDoubleFaultStack;
849     Tss->Eip = PtrToUlong(KiTrap08);
850     Tss->Cs = KGDT_R0_CODE;
851     Tss->Fs = KGDT_R0_PCR;
852     Tss->Ss = Ke386GetSs();
853     Tss->Es = KGDT_R3_DATA | RPL_MASK;
854     Tss->Ds = KGDT_R3_DATA | RPL_MASK;
855 
856     /* Setup the Double Trap TSS entry in the GDT */
857     TssEntry = &Gdt[KGDT_DF_TSS / sizeof(KGDTENTRY)];
858     TssEntry->HighWord.Bits.Type = I386_TSS;
859     TssEntry->HighWord.Bits.Pres = 1;
860     TssEntry->HighWord.Bits.Dpl = 0;
861     TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
862     TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
863     TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
864     TssEntry->LimitLow = KTSS_IO_MAPS;
865 
866     /* Now setup the NMI Task Gate */
867     TaskGateEntry = (PKGDTENTRY)&Idt[2];
868     TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
869     TaskGateEntry->HighWord.Bits.Pres = 1;
870     TaskGateEntry->HighWord.Bits.Dpl = 0;
871     ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_NMI_TSS;
872 
873     /* Initialize the actual TSS */
874     Tss = (PKTSS)KiNMITSS;
875     KiInitializeTSS(Tss);
876     Tss->CR3 = __readcr3();
877     Tss->Esp0 = KiDoubleFaultStack;
878     Tss->Esp = KiDoubleFaultStack;
879     Tss->Eip = PtrToUlong(KiTrap02);
880     Tss->Cs = KGDT_R0_CODE;
881     Tss->Fs = KGDT_R0_PCR;
882     Tss->Ss = Ke386GetSs();
883     Tss->Es = KGDT_R3_DATA | RPL_MASK;
884     Tss->Ds = KGDT_R3_DATA | RPL_MASK;
885 
886     /* And its associated TSS Entry */
887     TssEntry = &Gdt[KGDT_NMI_TSS / sizeof(KGDTENTRY)];
888     TssEntry->HighWord.Bits.Type = I386_TSS;
889     TssEntry->HighWord.Bits.Pres = 1;
890     TssEntry->HighWord.Bits.Dpl = 0;
891     TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
892     TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
893     TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
894     TssEntry->LimitLow = KTSS_IO_MAPS;
895 }
896 
897 VOID
898 NTAPI
KeFlushCurrentTb(VOID)899 KeFlushCurrentTb(VOID)
900 {
901 
902 #if !defined(_GLOBAL_PAGES_ARE_AWESOME_)
903 
904     /* Flush the TLB by resetting CR3 */
905     __writecr3(__readcr3());
906 
907 #else
908 
909     /* Check if global pages are enabled */
910     if (KeFeatureBits & KF_GLOBAL_PAGE)
911     {
912         ULONG Cr4;
913 
914         /* Disable PGE (Note: may not have been enabled yet) */
915         Cr4 = __readcr4();
916         __writecr4(Cr4 & ~CR4_PGE);
917 
918         /* Flush everything */
919         __writecr3(__readcr3());
920 
921         /* Re-enable PGE */
922         __writecr4(Cr4);
923     }
924     else
925     {
926         /* No global pages, resetting CR3 is enough */
927         __writecr3(__readcr3());
928     }
929 
930 #endif
931 
932 }
933 
934 VOID
935 NTAPI
KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState)936 KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState)
937 {
938     PKGDTENTRY TssEntry;
939 
940     //
941     // Restore the CR registers
942     //
943     __writecr0(ProcessorState->SpecialRegisters.Cr0);
944     Ke386SetCr2(ProcessorState->SpecialRegisters.Cr2);
945     __writecr3(ProcessorState->SpecialRegisters.Cr3);
946     if (KeFeatureBits & KF_CR4) __writecr4(ProcessorState->SpecialRegisters.Cr4);
947 
948     //
949     // Restore the DR registers
950     //
951     __writedr(0, ProcessorState->SpecialRegisters.KernelDr0);
952     __writedr(1, ProcessorState->SpecialRegisters.KernelDr1);
953     __writedr(2, ProcessorState->SpecialRegisters.KernelDr2);
954     __writedr(3, ProcessorState->SpecialRegisters.KernelDr3);
955     __writedr(6, ProcessorState->SpecialRegisters.KernelDr6);
956     __writedr(7, ProcessorState->SpecialRegisters.KernelDr7);
957 
958     //
959     // Restore GDT and IDT
960     //
961     Ke386SetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
962     __lidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
963 
964     //
965     // Clear the busy flag so we don't crash if we reload the same selector
966     //
967     TssEntry = (PKGDTENTRY)(ProcessorState->SpecialRegisters.Gdtr.Base +
968                             ProcessorState->SpecialRegisters.Tr);
969     TssEntry->HighWord.Bytes.Flags1 &= ~0x2;
970 
971     //
972     // Restore TSS and LDT
973     //
974     Ke386SetTr(ProcessorState->SpecialRegisters.Tr);
975     Ke386SetLocalDescriptorTable(ProcessorState->SpecialRegisters.Ldtr);
976 }
977 
978 VOID
979 NTAPI
KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)980 KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)
981 {
982     /* Save the CR registers */
983     ProcessorState->SpecialRegisters.Cr0 = __readcr0();
984     ProcessorState->SpecialRegisters.Cr2 = __readcr2();
985     ProcessorState->SpecialRegisters.Cr3 = __readcr3();
986     ProcessorState->SpecialRegisters.Cr4 = (KeFeatureBits & KF_CR4) ?
987                                            __readcr4() : 0;
988 
989     /* Save the DR registers */
990     ProcessorState->SpecialRegisters.KernelDr0 = __readdr(0);
991     ProcessorState->SpecialRegisters.KernelDr1 = __readdr(1);
992     ProcessorState->SpecialRegisters.KernelDr2 = __readdr(2);
993     ProcessorState->SpecialRegisters.KernelDr3 = __readdr(3);
994     ProcessorState->SpecialRegisters.KernelDr6 = __readdr(6);
995     ProcessorState->SpecialRegisters.KernelDr7 = __readdr(7);
996     __writedr(7, 0);
997 
998     /* Save GDT, IDT, LDT and TSS */
999     Ke386GetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
1000     __sidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
1001     ProcessorState->SpecialRegisters.Tr = Ke386GetTr();
1002     Ke386GetLocalDescriptorTable(&ProcessorState->SpecialRegisters.Ldtr);
1003 }
1004 
1005 CODE_SEG("INIT")
1006 VOID
1007 NTAPI
KiInitializeMachineType(VOID)1008 KiInitializeMachineType(VOID)
1009 {
1010     /* Set the Machine Type we got from NTLDR */
1011     KeI386MachineType = KeLoaderBlock->u.I386.MachineType & 0x000FF;
1012 }
1013 
1014 CODE_SEG("INIT")
1015 ULONG_PTR
1016 NTAPI
KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)1017 KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)
1018 {
1019     /* Set CS and ESP */
1020     __writemsr(0x174, KGDT_R0_CODE);
1021     __writemsr(0x175, (ULONG_PTR)KeGetCurrentPrcb()->DpcStack);
1022 
1023     /* Set LSTAR */
1024     __writemsr(0x176, (ULONG_PTR)KiFastCallEntry);
1025     return 0;
1026 }
1027 
1028 CODE_SEG("INIT")
1029 VOID
1030 NTAPI
KiRestoreFastSyscallReturnState(VOID)1031 KiRestoreFastSyscallReturnState(VOID)
1032 {
1033     /* Check if the CPU Supports fast system call */
1034     if (KeFeatureBits & KF_FAST_SYSCALL)
1035     {
1036         /* Check if it has been disabled */
1037         if (KiFastSystemCallDisable)
1038         {
1039             /* Disable fast system call */
1040             KeFeatureBits &= ~KF_FAST_SYSCALL;
1041             KiFastCallExitHandler = KiSystemCallTrapReturn;
1042             DPRINT1("Support for SYSENTER disabled.\n");
1043         }
1044         else
1045         {
1046             /* Do an IPI to enable it */
1047             KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
1048 
1049             /* It's enabled, so use the proper exit stub */
1050             KiFastCallExitHandler = KiSystemCallSysExitReturn;
1051             DPRINT("Support for SYSENTER detected.\n");
1052         }
1053     }
1054     else
1055     {
1056         /* Use the IRET handler */
1057         KiFastCallExitHandler = KiSystemCallTrapReturn;
1058         DPRINT1("No support for SYSENTER detected.\n");
1059     }
1060 }
1061 
1062 CODE_SEG("INIT")
1063 ULONG_PTR
1064 NTAPI
Ki386EnableDE(IN ULONG_PTR Context)1065 Ki386EnableDE(IN ULONG_PTR Context)
1066 {
1067     /* Enable DE */
1068     __writecr4(__readcr4() | CR4_DE);
1069     return 0;
1070 }
1071 
1072 CODE_SEG("INIT")
1073 ULONG_PTR
1074 NTAPI
Ki386EnableFxsr(IN ULONG_PTR Context)1075 Ki386EnableFxsr(IN ULONG_PTR Context)
1076 {
1077     /* Enable FXSR */
1078     __writecr4(__readcr4() | CR4_FXSR);
1079     return 0;
1080 }
1081 
1082 CODE_SEG("INIT")
1083 ULONG_PTR
1084 NTAPI
Ki386EnableXMMIExceptions(IN ULONG_PTR Context)1085 Ki386EnableXMMIExceptions(IN ULONG_PTR Context)
1086 {
1087     PKIDTENTRY IdtEntry;
1088 
1089     /* Get the IDT Entry for Interrupt 0x13 */
1090     IdtEntry = &((PKIPCR)KeGetPcr())->IDT[0x13];
1091 
1092     /* Set it up */
1093     IdtEntry->Selector = KGDT_R0_CODE;
1094     IdtEntry->Offset = ((ULONG_PTR)KiTrap13 & 0xFFFF);
1095     IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap13 >> 16) & 0xFFFF;
1096     ((PKIDT_ACCESS)&IdtEntry->Access)->Dpl = 0;
1097     ((PKIDT_ACCESS)&IdtEntry->Access)->Present = 1;
1098     ((PKIDT_ACCESS)&IdtEntry->Access)->SegmentType = I386_INTERRUPT_GATE;
1099 
1100     /* Enable XMMI exceptions */
1101     __writecr4(__readcr4() | CR4_XMMEXCPT);
1102     return 0;
1103 }
1104 
1105 CODE_SEG("INIT")
1106 VOID
1107 NTAPI
KiI386PentiumLockErrataFixup(VOID)1108 KiI386PentiumLockErrataFixup(VOID)
1109 {
1110     KDESCRIPTOR IdtDescriptor = {0, 0, 0};
1111     PKIDTENTRY NewIdt, NewIdt2;
1112     PMMPTE PointerPte;
1113 
1114     /* Allocate memory for a new IDT */
1115     NewIdt = ExAllocatePool(NonPagedPool, 2 * PAGE_SIZE);
1116 
1117     /* Put everything after the first 7 entries on a new page */
1118     NewIdt2 = (PVOID)((ULONG_PTR)NewIdt + PAGE_SIZE - (7 * sizeof(KIDTENTRY)));
1119 
1120     /* Disable interrupts */
1121     _disable();
1122 
1123     /* Get the current IDT and copy it */
1124     __sidt(&IdtDescriptor.Limit);
1125     RtlCopyMemory(NewIdt2,
1126                   (PVOID)IdtDescriptor.Base,
1127                   IdtDescriptor.Limit + 1);
1128     IdtDescriptor.Base = (ULONG)NewIdt2;
1129 
1130     /* Set the new IDT */
1131     __lidt(&IdtDescriptor.Limit);
1132     ((PKIPCR)KeGetPcr())->IDT = NewIdt2;
1133 
1134     /* Restore interrupts */
1135     _enable();
1136 
1137     /* Set the first 7 entries as read-only to produce a fault */
1138     PointerPte = MiAddressToPte(NewIdt);
1139     ASSERT(PointerPte->u.Hard.Write == 1);
1140     PointerPte->u.Hard.Write = 0;
1141     KeInvalidateTlbEntry(NewIdt);
1142 }
1143 
1144 BOOLEAN
1145 NTAPI
KeInvalidateAllCaches(VOID)1146 KeInvalidateAllCaches(VOID)
1147 {
1148     /* Only supported on Pentium Pro and higher */
1149     if (KeI386CpuType < 6) return FALSE;
1150 
1151     /* Invalidate all caches */
1152     __wbinvd();
1153     return TRUE;
1154 }
1155 
1156 VOID
1157 NTAPI
KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,IN PKEXCEPTION_FRAME ExceptionFrame)1158 KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,
1159                      IN PKEXCEPTION_FRAME ExceptionFrame)
1160 {
1161     PKPRCB Prcb = KeGetCurrentPrcb();
1162 
1163     //
1164     // Save full context
1165     //
1166     Prcb->ProcessorState.ContextFrame.ContextFlags = CONTEXT_FULL |
1167                                                      CONTEXT_DEBUG_REGISTERS;
1168     KeTrapFrameToContext(TrapFrame, NULL, &Prcb->ProcessorState.ContextFrame);
1169 
1170     //
1171     // Save control registers
1172     //
1173     KiSaveProcessorControlState(&Prcb->ProcessorState);
1174 }
1175 
1176 CODE_SEG("INIT")
1177 BOOLEAN
1178 NTAPI
KiIsNpxErrataPresent(VOID)1179 KiIsNpxErrataPresent(VOID)
1180 {
1181     static double Value1 = 4195835.0, Value2 = 3145727.0;
1182     INT ErrataPresent;
1183     ULONG Cr0;
1184 
1185     /* Interrupts have to be disabled here. */
1186     ASSERT(!(__readeflags() & EFLAGS_INTERRUPT_MASK));
1187 
1188     /* Read CR0 and remove FPU flags */
1189     Cr0 = __readcr0();
1190     __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1191 
1192     /* Initialize FPU state */
1193     Ke386FnInit();
1194 
1195     /* Multiply the magic values and divide, we should get the result back */
1196 #ifdef __GNUC__
1197     __asm__ __volatile__
1198     (
1199         "fldl %1\n\t"
1200         "fdivl %2\n\t"
1201         "fmull %2\n\t"
1202         "fldl %1\n\t"
1203         "fsubp\n\t"
1204         "fistpl %0\n\t"
1205         : "=m" (ErrataPresent)
1206         : "m" (Value1),
1207           "m" (Value2)
1208     );
1209 #else
1210     __asm
1211     {
1212         fld Value1
1213         fdiv Value2
1214         fmul Value2
1215         fld Value1
1216         fsubp st(1), st(0)
1217         fistp ErrataPresent
1218     };
1219 #endif
1220 
1221     /* Restore CR0 */
1222     __writecr0(Cr0);
1223 
1224     /* Return if there's an errata */
1225     return ErrataPresent != 0;
1226 }
1227 
1228 VOID
1229 NTAPI
KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)1230 KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)
1231 {
1232     ULONG EFlags, Cr0;
1233     PKTHREAD Thread, NpxThread;
1234     PFX_SAVE_AREA FxSaveArea;
1235 
1236     /* Save volatiles and disable interrupts */
1237     EFlags = __readeflags();
1238     _disable();
1239 
1240     /* Save the PCR and get the current thread */
1241     Thread = KeGetCurrentThread();
1242 
1243     /* Check if we're already loaded */
1244     if (Thread->NpxState != NPX_STATE_LOADED)
1245     {
1246         /* If there's nothing to load, quit */
1247         if (!SaveArea)
1248         {
1249             /* Restore interrupt state and return */
1250             __writeeflags(EFlags);
1251             return;
1252         }
1253 
1254         /* Need FXSR support for this */
1255         ASSERT(KeI386FxsrPresent == TRUE);
1256 
1257         /* Check for sane CR0 */
1258         Cr0 = __readcr0();
1259         if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1260         {
1261             /* Mask out FPU flags */
1262             __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1263         }
1264 
1265         /* Get the NPX thread and check its FPU state */
1266         NpxThread = KeGetCurrentPrcb()->NpxThread;
1267         if ((NpxThread) && (NpxThread->NpxState == NPX_STATE_LOADED))
1268         {
1269             /* Get the FX frame and store the state there */
1270             FxSaveArea = KiGetThreadNpxArea(NpxThread);
1271             Ke386FxSave(FxSaveArea);
1272 
1273             /* NPX thread has lost its state */
1274             NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1275         }
1276 
1277         /* Now load NPX state from the NPX area */
1278         FxSaveArea = KiGetThreadNpxArea(Thread);
1279         Ke386FxStore(FxSaveArea);
1280     }
1281     else
1282     {
1283         /* Check for sane CR0 */
1284         Cr0 = __readcr0();
1285         if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1286         {
1287             /* Mask out FPU flags */
1288             __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1289         }
1290 
1291         /* Get FX frame */
1292         FxSaveArea = KiGetThreadNpxArea(Thread);
1293         Thread->NpxState = NPX_STATE_NOT_LOADED;
1294 
1295         /* Save state if supported by CPU */
1296         if (KeI386FxsrPresent) Ke386FxSave(FxSaveArea);
1297     }
1298 
1299     /* Now save the FN state wherever it was requested */
1300     if (SaveArea) Ke386FnSave(SaveArea);
1301 
1302     /* Clear NPX thread */
1303     KeGetCurrentPrcb()->NpxThread = NULL;
1304 
1305     /* Add the CR0 from the NPX frame */
1306     Cr0 |= NPX_STATE_NOT_LOADED;
1307     Cr0 |= FxSaveArea->Cr0NpxState;
1308     __writecr0(Cr0);
1309 
1310     /* Restore interrupt state */
1311     __writeeflags(EFlags);
1312 }
1313 
1314 /* PUBLIC FUNCTIONS **********************************************************/
1315 
1316 /*
1317  * @implemented
1318  */
1319 VOID
1320 NTAPI
KiCoprocessorError(VOID)1321 KiCoprocessorError(VOID)
1322 {
1323     PFX_SAVE_AREA NpxArea;
1324 
1325     /* Get the FPU area */
1326     NpxArea = KiGetThreadNpxArea(KeGetCurrentThread());
1327 
1328     /* Set CR0_TS */
1329     NpxArea->Cr0NpxState = CR0_TS;
1330     __writecr0(__readcr0() | CR0_TS);
1331 }
1332 
1333 /**
1334  * @brief
1335  * Saves the current floating point unit state
1336  * context of the current calling thread.
1337  *
1338  * @param[out] Save
1339  * The saved floating point context given to the
1340  * caller at the end of function's operations.
1341  * The structure whose data contents are opaque
1342  * to the calling thread.
1343  *
1344  * @return
1345  * Returns STATUS_SUCCESS if the function has
1346  * successfully completed its operations.
1347  * STATUS_INSUFFICIENT_RESOURCES is returned
1348  * if the function couldn't allocate memory
1349  * for FPU state information.
1350  *
1351  * @remarks
1352  * The function performs a FPU state save
1353  * in two ways. A normal FPU save (FNSAVE)
1354  * is performed if the system doesn't have
1355  * SSE/SSE2, otherwise the function performs
1356  * a save of FPU, MMX and SSE states save (FXSAVE).
1357  */
1358 #if defined(__clang__)
1359 __attribute__((__target__("sse")))
1360 #endif
1361 NTSTATUS
1362 NTAPI
KeSaveFloatingPointState(_Out_ PKFLOATING_SAVE Save)1363 KeSaveFloatingPointState(
1364     _Out_ PKFLOATING_SAVE Save)
1365 {
1366     PFLOATING_SAVE_CONTEXT FsContext;
1367     PFX_SAVE_AREA FxSaveAreaFrame;
1368     PKPRCB CurrentPrcb;
1369 
1370     /* Sanity checks */
1371     ASSERT(Save);
1372     ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1373     ASSERT(KeI386NpxPresent);
1374 
1375     /* Initialize the floating point context */
1376     FsContext = ExAllocatePoolWithTag(NonPagedPool,
1377                                       sizeof(FLOATING_SAVE_CONTEXT),
1378                                       TAG_FLOATING_POINT_CONTEXT);
1379     if (!FsContext)
1380     {
1381         /* Bail out if we failed */
1382         return STATUS_INSUFFICIENT_RESOURCES;
1383     }
1384 
1385     /*
1386      * Allocate some memory pool for the buffer. The size
1387      * of this allocated buffer is the FX area plus the
1388      * alignment requirement needed for FXSAVE as a 16-byte
1389      * aligned pointer is compulsory in order to save the
1390      * FPU state.
1391      */
1392     FsContext->Buffer = ExAllocatePoolWithTag(NonPagedPool,
1393                                               sizeof(FX_SAVE_AREA) + FXSAVE_ALIGN,
1394                                               TAG_FLOATING_POINT_FX);
1395     if (!FsContext->Buffer)
1396     {
1397         /* Bail out if we failed */
1398         ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1399         return STATUS_INSUFFICIENT_RESOURCES;
1400     }
1401 
1402     /*
1403      * Now cache the allocated buffer into the save area
1404      * and align the said area to a 16-byte boundary. Why
1405      * do we have to do this is because of ExAllocate function.
1406      * We gave the necessary alignment requirement in the pool
1407      * allocation size although the function will always return
1408      * a 8-byte aligned pointer. Aligning the given pointer directly
1409      * can cause issues when freeing it from memory afterwards. With
1410      * that said, we have to cache the buffer to the area so that we
1411      * do not touch or mess the allocated buffer any further.
1412      */
1413     FsContext->PfxSaveArea = ALIGN_UP_POINTER_BY(FsContext->Buffer, 16);
1414 
1415     /* Disable interrupts and get the current processor control region */
1416     _disable();
1417     CurrentPrcb = KeGetCurrentPrcb();
1418 
1419     /* Store the current thread to context */
1420     FsContext->CurrentThread = KeGetCurrentThread();
1421 
1422     /*
1423      * Save the previous NPX thread state registers (aka Numeric
1424      * Processor eXtension) into the current context so that
1425      * we are informing the scheduler the current FPU state
1426      * belongs to this thread.
1427      */
1428     if (FsContext->CurrentThread != CurrentPrcb->NpxThread)
1429     {
1430         if ((CurrentPrcb->NpxThread != NULL) &&
1431             (CurrentPrcb->NpxThread->NpxState == NPX_STATE_LOADED))
1432         {
1433             /* Get the FX frame */
1434             FxSaveAreaFrame = KiGetThreadNpxArea(CurrentPrcb->NpxThread);
1435 
1436             /* Save the FPU state */
1437             Ke386SaveFpuState(FxSaveAreaFrame);
1438 
1439             /* NPX thread has lost its state */
1440             CurrentPrcb->NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1441             FxSaveAreaFrame->NpxSavedCpu = 0;
1442         }
1443 
1444         /* The new NPX thread is the current thread */
1445         CurrentPrcb->NpxThread = FsContext->CurrentThread;
1446     }
1447 
1448     /* Perform the save */
1449     Ke386SaveFpuState(FsContext->PfxSaveArea);
1450 
1451     /* Store the NPX IRQL */
1452     FsContext->OldNpxIrql = FsContext->CurrentThread->Header.NpxIrql;
1453 
1454     /* Set the current IRQL to NPX */
1455     FsContext->CurrentThread->Header.NpxIrql = KeGetCurrentIrql();
1456 
1457     /* Initialize the FPU */
1458     Ke386FnInit();
1459 
1460     /* Enable interrupts back */
1461     _enable();
1462 
1463     /* Give the saved FPU context to the caller */
1464     *((PVOID *) Save) = FsContext;
1465     return STATUS_SUCCESS;
1466 }
1467 
1468 /**
1469  * @brief
1470  * Restores the original FPU state context that has
1471  * been saved by a API call of KeSaveFloatingPointState.
1472  * Callers are expected to restore the floating point
1473  * state by calling this function when they've finished
1474  * doing FPU operations.
1475  *
1476  * @param[in] Save
1477  * The saved floating point context that is to be given
1478  * to the function to restore the FPU state.
1479  *
1480  * @return
1481  * Returns STATUS_SUCCESS indicating the function
1482  * has fully completed its operations.
1483  */
1484 #if defined(__clang__)
1485 __attribute__((__target__("sse")))
1486 #endif
1487 NTSTATUS
1488 NTAPI
KeRestoreFloatingPointState(_In_ PKFLOATING_SAVE Save)1489 KeRestoreFloatingPointState(
1490     _In_ PKFLOATING_SAVE Save)
1491 {
1492     PFLOATING_SAVE_CONTEXT FsContext;
1493 
1494     /* Sanity checks */
1495     ASSERT(Save);
1496     ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1497     ASSERT(KeI386NpxPresent);
1498 
1499     /* Cache the saved FS context */
1500     FsContext = *((PVOID *) Save);
1501 
1502     /*
1503      * We have to restore the regular saved FPU
1504      * state. For this we must first do some
1505      * validation checks so that we are sure
1506      * ourselves the state context is saved
1507      * properly. Check if we are in the same
1508      * calling thread.
1509      */
1510     if (FsContext->CurrentThread != KeGetCurrentThread())
1511     {
1512         /*
1513          * This isn't the thread that saved the
1514          * FPU state context, crash the system!
1515          */
1516         KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1517                      0x2,
1518                      (ULONG_PTR)FsContext->CurrentThread,
1519                      (ULONG_PTR)KeGetCurrentThread(),
1520                      0);
1521     }
1522 
1523     /* Are we under the same NPX interrupt level? */
1524     if (FsContext->CurrentThread->Header.NpxIrql != KeGetCurrentIrql())
1525     {
1526         /* The interrupt level has changed, crash the system! */
1527         KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1528                      0x1,
1529                      (ULONG_PTR)FsContext->CurrentThread->Header.NpxIrql,
1530                      (ULONG_PTR)KeGetCurrentIrql(),
1531                      0);
1532     }
1533 
1534     /* Disable interrupts */
1535     _disable();
1536 
1537     /*
1538      * The saved FPU state context is valid,
1539      * it's time to restore the state. First,
1540      * clear FPU exceptions now.
1541      */
1542     Ke386ClearFpExceptions();
1543 
1544     /* Restore the state */
1545     Ke386RestoreFpuState(FsContext->PfxSaveArea);
1546 
1547     /* Give the saved NPX IRQL back to the NPX thread */
1548     FsContext->CurrentThread->Header.NpxIrql = FsContext->OldNpxIrql;
1549 
1550     /* Enable interrupts back */
1551     _enable();
1552 
1553     /* We're done, free the allocated area and context */
1554     ExFreePoolWithTag(FsContext->Buffer, TAG_FLOATING_POINT_FX);
1555     ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1556 
1557     return STATUS_SUCCESS;
1558 }
1559 
1560 /*
1561  * @implemented
1562  */
1563 ULONG
1564 NTAPI
KeGetRecommendedSharedDataAlignment(VOID)1565 KeGetRecommendedSharedDataAlignment(VOID)
1566 {
1567     /* Return the global variable */
1568     return KeLargestCacheLine;
1569 }
1570 
1571 VOID
1572 NTAPI
KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext,IN PVOID Ignored1,IN PVOID Ignored2,IN PVOID Ignored3)1573 KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext,
1574                       IN PVOID Ignored1,
1575                       IN PVOID Ignored2,
1576                       IN PVOID Ignored3)
1577 {
1578     /* Signal this packet as done */
1579     KiIpiSignalPacketDone(PacketContext);
1580 
1581     /* Flush the TB for the Current CPU */
1582     KeFlushCurrentTb();
1583 }
1584 
1585 /*
1586  * @implemented
1587  */
1588 VOID
1589 NTAPI
KeFlushEntireTb(IN BOOLEAN Invalid,IN BOOLEAN AllProcessors)1590 KeFlushEntireTb(IN BOOLEAN Invalid,
1591                 IN BOOLEAN AllProcessors)
1592 {
1593     KIRQL OldIrql;
1594 #ifdef CONFIG_SMP
1595     KAFFINITY TargetAffinity;
1596     PKPRCB Prcb = KeGetCurrentPrcb();
1597 #endif
1598 
1599     /* Raise the IRQL for the TB Flush */
1600     OldIrql = KeRaiseIrqlToSynchLevel();
1601 
1602 #ifdef CONFIG_SMP
1603     /* FIXME: Use KiTbFlushTimeStamp to synchronize TB flush */
1604 
1605     /* Get the current processor affinity, and exclude ourselves */
1606     TargetAffinity = KeActiveProcessors;
1607     TargetAffinity &= ~Prcb->SetMember;
1608 
1609     /* Make sure this is MP */
1610     if (TargetAffinity)
1611     {
1612         /* Send an IPI TB flush to the other processors */
1613         KiIpiSendPacket(TargetAffinity,
1614                         KiFlushTargetEntireTb,
1615                         NULL,
1616                         0,
1617                         NULL);
1618     }
1619 #endif
1620 
1621     /* Flush the TB for the Current CPU, and update the flush stamp */
1622     KeFlushCurrentTb();
1623 
1624 #ifdef CONFIG_SMP
1625     /* If this is MP, wait for the other processors to finish */
1626     if (TargetAffinity)
1627     {
1628         /* Sanity check */
1629         ASSERT(Prcb == KeGetCurrentPrcb());
1630 
1631         /* FIXME: TODO */
1632         ASSERTMSG("Not yet implemented\n", FALSE);
1633     }
1634 #endif
1635 
1636     /* Update the flush stamp and return to original IRQL */
1637     InterlockedExchangeAdd(&KiTbFlushTimeStamp, 1);
1638     KeLowerIrql(OldIrql);
1639 }
1640 
1641 /*
1642  * @implemented
1643  */
1644 VOID
1645 NTAPI
KeSetDmaIoCoherency(IN ULONG Coherency)1646 KeSetDmaIoCoherency(IN ULONG Coherency)
1647 {
1648     /* Save the coherency globally */
1649     KiDmaIoCoherency = Coherency;
1650 }
1651 
1652 /*
1653  * @implemented
1654  */
1655 KAFFINITY
1656 NTAPI
KeQueryActiveProcessors(VOID)1657 KeQueryActiveProcessors(VOID)
1658 {
1659     PAGED_CODE();
1660 
1661     /* Simply return the number of active processors */
1662     return KeActiveProcessors;
1663 }
1664 
1665 /*
1666  * @implemented
1667  */
1668 VOID
1669 __cdecl
KeSaveStateForHibernate(IN PKPROCESSOR_STATE State)1670 KeSaveStateForHibernate(IN PKPROCESSOR_STATE State)
1671 {
1672     /* Capture the context */
1673     RtlCaptureContext(&State->ContextFrame);
1674 
1675     /* Capture the control state */
1676     KiSaveProcessorControlState(State);
1677 }
1678