xref: /reactos/ntoskrnl/ke/i386/cpu.c (revision 9164e9f8)
1 /*
2  * PROJECT:         ReactOS Kernel
3  * LICENSE:         GPL - See COPYING in the top level directory
4  * FILE:            ntoskrnl/ke/i386/cpu.c
5  * PURPOSE:         Routines for CPU-level support
6  * PROGRAMMERS:     Alex Ionescu (alex.ionescu@reactos.org)
7  */
8 
9 /* INCLUDES *****************************************************************/
10 
11 #include <ntoskrnl.h>
12 #define NDEBUG
13 #include <debug.h>
14 
15 #include <xmmintrin.h>
16 
17 /* GLOBALS *******************************************************************/
18 
19 /* The TSS to use for Double Fault Traps (INT 0x9) */
20 UCHAR KiDoubleFaultTSS[KTSS_IO_MAPS];
21 
22 /* The TSS to use for NMI Fault Traps (INT 0x2) */
23 UCHAR KiNMITSS[KTSS_IO_MAPS];
24 
25 /* CPU Features and Flags */
26 ULONG KeI386CpuType;
27 ULONG KeI386CpuStep;
28 ULONG KiFastSystemCallDisable = 0;
29 ULONG KeI386NpxPresent = TRUE;
30 ULONG KiMXCsrMask = 0;
31 ULONG MxcsrFeatureMask = 0;
32 ULONG KeI386XMMIPresent = 0;
33 ULONG KeI386FxsrPresent = 0;
34 ULONG KeI386MachineType;
35 ULONG Ke386Pae = FALSE;
36 ULONG Ke386NoExecute = FALSE;
37 ULONG KeLargestCacheLine = 0x40;
38 ULONG KeDcacheFlushCount = 0;
39 ULONG KeIcacheFlushCount = 0;
40 ULONG KiDmaIoCoherency = 0;
41 ULONG KePrefetchNTAGranularity = 32;
42 BOOLEAN KiI386PentiumLockErrataPresent;
43 BOOLEAN KiSMTProcessorsPresent;
44 
45 /* The distance between SYSEXIT and IRETD return modes */
46 UCHAR KiSystemCallExitAdjust;
47 
48 /* The offset that was applied -- either 0 or the value above */
49 UCHAR KiSystemCallExitAdjusted;
50 
51 /* Whether the adjustment was already done once */
52 BOOLEAN KiFastCallCopyDoneOnce;
53 
54 /* Flush data */
55 volatile LONG KiTbFlushTimeStamp;
56 
57 /* CPU Signatures */
58 static const CHAR CmpIntelID[]       = "GenuineIntel";
59 static const CHAR CmpAmdID[]         = "AuthenticAMD";
60 static const CHAR CmpCyrixID[]       = "CyrixInstead";
61 static const CHAR CmpTransmetaID[]   = "GenuineTMx86";
62 static const CHAR CmpCentaurID[]     = "CentaurHauls";
63 static const CHAR CmpRiseID[]        = "RiseRiseRise";
64 
65 typedef union _CPU_SIGNATURE
66 {
67     struct
68     {
69         ULONG Step : 4;
70         ULONG Model : 4;
71         ULONG Family : 4;
72         ULONG Unused : 4;
73         ULONG ExtendedModel : 4;
74         ULONG ExtendedFamily : 8;
75         ULONG Unused2 : 4;
76     };
77     ULONG AsULONG;
78 } CPU_SIGNATURE;
79 
80 /* FX area alignment size */
81 #define FXSAVE_ALIGN 15
82 
83 /* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/
84 
85 /* NSC/Cyrix CPU configuration register index */
86 #define CX86_CCR1 0xc1
87 
88 /* NSC/Cyrix CPU indexed register access macros */
89 static __inline
90 UCHAR
91 getCx86(UCHAR reg)
92 {
93     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
94     return READ_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23);
95 }
96 
97 static __inline
98 void
99 setCx86(UCHAR reg, UCHAR data)
100 {
101     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
102     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data);
103 }
104 
105 /* FUNCTIONS *****************************************************************/
106 
107 CODE_SEG("INIT")
108 ULONG
109 NTAPI
110 KiGetCpuVendor(VOID)
111 {
112     PKPRCB Prcb = KeGetCurrentPrcb();
113     CPU_INFO CpuInfo;
114 
115     /* Get the Vendor ID */
116     KiCpuId(&CpuInfo, 0);
117 
118     /* Copy it to the PRCB and null-terminate it */
119     *(ULONG*)&Prcb->VendorString[0] = CpuInfo.Ebx;
120     *(ULONG*)&Prcb->VendorString[4] = CpuInfo.Edx;
121     *(ULONG*)&Prcb->VendorString[8] = CpuInfo.Ecx;
122     Prcb->VendorString[12] = 0;
123 
124     /* Now check the CPU Type */
125     if (!strcmp(Prcb->VendorString, CmpIntelID))
126     {
127         return CPU_INTEL;
128     }
129     else if (!strcmp(Prcb->VendorString, CmpAmdID))
130     {
131         return CPU_AMD;
132     }
133     else if (!strcmp(Prcb->VendorString, CmpCyrixID))
134     {
135         DPRINT1("Cyrix CPU support not fully tested!\n");
136         return CPU_CYRIX;
137     }
138     else if (!strcmp(Prcb->VendorString, CmpTransmetaID))
139     {
140         DPRINT1("Transmeta CPU support not fully tested!\n");
141         return CPU_TRANSMETA;
142     }
143     else if (!strcmp(Prcb->VendorString, CmpCentaurID))
144     {
145         DPRINT1("Centaur CPU support not fully tested!\n");
146         return CPU_CENTAUR;
147     }
148     else if (!strcmp(Prcb->VendorString, CmpRiseID))
149     {
150         DPRINT1("Rise CPU support not fully tested!\n");
151         return CPU_RISE;
152     }
153 
154     /* Unknown CPU */
155     DPRINT1("%s CPU support not fully tested!\n", Prcb->VendorString);
156     return CPU_UNKNOWN;
157 }
158 
159 CODE_SEG("INIT")
160 VOID
161 NTAPI
162 KiSetProcessorType(VOID)
163 {
164     CPU_INFO CpuInfo;
165     CPU_SIGNATURE CpuSignature;
166     BOOLEAN ExtendModel;
167     ULONG Stepping, Type;
168 
169     /* Do CPUID 1 now */
170     KiCpuId(&CpuInfo, 1);
171 
172     /*
173      * Get the Stepping and Type. The stepping contains both the
174      * Model and the Step, while the Type contains the returned Family.
175      *
176      * For the stepping, we convert this: zzzzzzxy into this: x0y
177      */
178     CpuSignature.AsULONG = CpuInfo.Eax;
179     Stepping = CpuSignature.Model;
180     ExtendModel = (CpuSignature.Family == 15);
181 #if ( (NTDDI_VERSION >= NTDDI_WINXPSP2) && (NTDDI_VERSION < NTDDI_WS03) ) || (NTDDI_VERSION >= NTDDI_WS03SP1)
182     if (CpuSignature.Family == 6)
183     {
184         ULONG Vendor = KiGetCpuVendor();
185         ExtendModel |= (Vendor == CPU_INTEL);
186 #if (NTDDI_VERSION >= NTDDI_WIN8)
187         ExtendModel |= (Vendor == CPU_CENTAUR);
188 #endif
189     }
190 #endif
191     if (ExtendModel)
192     {
193         /* Add ExtendedModel to distinguish from non-extended values. */
194         Stepping |= (CpuSignature.ExtendedModel << 4);
195     }
196     Stepping = (Stepping << 8) | CpuSignature.Step;
197     Type = CpuSignature.Family;
198     if (CpuSignature.Family == 15)
199     {
200         /* Add ExtendedFamily to distinguish from non-extended values.
201          * It must not be larger than 0xF0 to avoid overflow. */
202         Type += min(CpuSignature.ExtendedFamily, 0xF0);
203     }
204 
205     /* Save them in the PRCB */
206     KeGetCurrentPrcb()->CpuID = TRUE;
207     KeGetCurrentPrcb()->CpuType = (UCHAR)Type;
208     KeGetCurrentPrcb()->CpuStep = (USHORT)Stepping;
209 }
210 
211 CODE_SEG("INIT")
212 ULONG64
213 NTAPI
214 KiGetFeatureBits(VOID)
215 {
216     PKPRCB Prcb = KeGetCurrentPrcb();
217     ULONG Vendor;
218     ULONG64 FeatureBits = KF_WORKING_PTE;
219     CPU_INFO CpuInfo, DummyCpuInfo;
220     UCHAR Ccr1;
221     BOOLEAN ExtendedCPUID = TRUE;
222     ULONG CpuFeatures = 0;
223 
224     /* Get the Vendor ID */
225     Vendor = KiGetCpuVendor();
226 
227     /* Make sure we got a valid vendor ID at least. */
228     if (!Vendor) return FeatureBits;
229 
230     /* Get the CPUID Info. Features are in Reg[3]. */
231     KiCpuId(&CpuInfo, 1);
232 
233     /* Set the initial APIC ID */
234     Prcb->InitialApicId = (UCHAR)(CpuInfo.Ebx >> 24);
235 
236     switch (Vendor)
237     {
238         /* Intel CPUs */
239         case CPU_INTEL:
240 
241             /* Check if it's a P6 */
242             if (Prcb->CpuType == 6)
243             {
244                 /* Perform the special sequence to get the MicroCode Signature */
245                 __writemsr(0x8B, 0);
246                 KiCpuId(&DummyCpuInfo, 1);
247                 Prcb->UpdateSignature.QuadPart = __readmsr(0x8B);
248             }
249             else if (Prcb->CpuType == 5)
250             {
251                 /* On P5, enable workaround for the LOCK errata. */
252                 KiI386PentiumLockErrataPresent = TRUE;
253             }
254 
255             /* Check for broken P6 with bad SMP PTE implementation */
256             if (((CpuInfo.Eax & 0x0FF0) == 0x0610 && (CpuInfo.Eax & 0x000F) <= 0x9) ||
257                 ((CpuInfo.Eax & 0x0FF0) == 0x0630 && (CpuInfo.Eax & 0x000F) <= 0x4))
258             {
259                 /* Remove support for correct PTE support. */
260                 FeatureBits &= ~KF_WORKING_PTE;
261             }
262 
263             /* Check if the CPU is too old to support SYSENTER */
264             if ((Prcb->CpuType < 6) ||
265                 ((Prcb->CpuType == 6) && (Prcb->CpuStep < 0x0303)))
266             {
267                 /* Disable it */
268                 CpuInfo.Edx &= ~0x800;
269             }
270 
271             break;
272 
273         /* AMD CPUs */
274         case CPU_AMD:
275 
276             /* Check if this is a K5 or K6. (family 5) */
277             if ((CpuInfo.Eax & 0x0F00) == 0x0500)
278             {
279                 /* Get the Model Number */
280                 switch (CpuInfo.Eax & 0x00F0)
281                 {
282                     /* Model 1: K5 - 5k86 (initial models) */
283                     case 0x0010:
284 
285                         /* Check if this is Step 0 or 1. They don't support PGE */
286                         if ((CpuInfo.Eax & 0x000F) > 0x03) break;
287 
288                     /* Model 0: K5 - SSA5 */
289                     case 0x0000:
290 
291                         /* Model 0 doesn't support PGE at all. */
292                         CpuInfo.Edx &= ~0x2000;
293                         break;
294 
295                     /* Model 8: K6-2 */
296                     case 0x0080:
297 
298                         /* K6-2, Step 8 and over have support for MTRR. */
299                         if ((CpuInfo.Eax & 0x000F) >= 0x8) FeatureBits |= KF_AMDK6MTRR;
300                         break;
301 
302                     /* Model 9: K6-III
303                        Model D: K6-2+, K6-III+ */
304                     case 0x0090:
305                     case 0x00D0:
306 
307                         FeatureBits |= KF_AMDK6MTRR;
308                         break;
309                 }
310             }
311             else if((CpuInfo.Eax & 0x0F00) < 0x0500)
312             {
313                 /* Families below 5 don't support PGE, PSE or CMOV at all */
314                 CpuInfo.Edx &= ~(0x08 | 0x2000 | 0x8000);
315 
316                 /* They also don't support advanced CPUID functions. */
317                 ExtendedCPUID = FALSE;
318             }
319 
320             break;
321 
322         /* Cyrix CPUs */
323         case CPU_CYRIX:
324 
325             /* Workaround the "COMA" bug on 6x family of Cyrix CPUs */
326             if (Prcb->CpuType == 6 &&
327                 Prcb->CpuStep <= 1)
328             {
329                 /* Get CCR1 value */
330                 Ccr1 = getCx86(CX86_CCR1);
331 
332                 /* Enable the NO_LOCK bit */
333                 Ccr1 |= 0x10;
334 
335                 /* Set the new CCR1 value */
336                 setCx86(CX86_CCR1, Ccr1);
337             }
338 
339             break;
340 
341         /* Transmeta CPUs */
342         case CPU_TRANSMETA:
343 
344             /* Enable CMPXCHG8B if the family (>= 5), model and stepping (>= 4.2) support it */
345             if ((CpuInfo.Eax & 0x0FFF) >= 0x0542)
346             {
347                 __writemsr(0x80860004, __readmsr(0x80860004) | 0x0100);
348                 FeatureBits |= KF_CMPXCHG8B;
349             }
350 
351             break;
352 
353         /* Centaur, IDT, Rise and VIA CPUs */
354         case CPU_CENTAUR:
355         case CPU_RISE:
356 
357             /* These CPUs don't report the presence of CMPXCHG8B through CPUID.
358                However, this feature exists and operates properly without any additional steps. */
359             FeatureBits |= KF_CMPXCHG8B;
360 
361             break;
362     }
363 
364     /* Get some features from ECX */
365     if (CpuInfo.Ecx & X86_FEATURE_SSE3) FeatureBits |= KF_SSE3;
366     if (CpuInfo.Ecx & X86_FEATURE_SSSE3) FeatureBits |= KF_SSSE3;
367     if (CpuInfo.Ecx & X86_FEATURE_SSE4_1) FeatureBits |= KF_SSE4_1;
368     if (CpuInfo.Ecx & X86_FEATURE_SSE4_2) FeatureBits |= KF_SSE4_2;
369     if (CpuInfo.Ecx & X86_FEATURE_XSAVE) FeatureBits |= KF_XSTATE;
370     if (CpuInfo.Ecx & X86_FEATURE_RDRAND) FeatureBits |= KF_RDRAND;
371 
372     /* Set the current features */
373     CpuFeatures = CpuInfo.Edx;
374 
375     /* Convert all CPUID Feature bits into our format */
376     if (CpuFeatures & X86_FEATURE_VME)     FeatureBits |= KF_V86_VIS | KF_CR4;
377     if (CpuFeatures & X86_FEATURE_PSE)     FeatureBits |= KF_LARGE_PAGE | KF_CR4;
378     if (CpuFeatures & X86_FEATURE_TSC)     FeatureBits |= KF_RDTSC;
379     if (CpuFeatures & X86_FEATURE_CX8)     FeatureBits |= KF_CMPXCHG8B;
380     if (CpuFeatures & X86_FEATURE_SYSCALL) FeatureBits |= KF_FAST_SYSCALL;
381     if (CpuFeatures & X86_FEATURE_MTTR)    FeatureBits |= KF_MTRR;
382     if (CpuFeatures & X86_FEATURE_PGE)     FeatureBits |= KF_GLOBAL_PAGE | KF_CR4;
383     if (CpuFeatures & X86_FEATURE_CMOV)    FeatureBits |= KF_CMOV;
384     if (CpuFeatures & X86_FEATURE_PAT)     FeatureBits |= KF_PAT;
385     if (CpuFeatures & X86_FEATURE_DS)      FeatureBits |= KF_DTS;
386     if (CpuFeatures & X86_FEATURE_MMX)     FeatureBits |= KF_MMX;
387     if (CpuFeatures & X86_FEATURE_FXSR)    FeatureBits |= KF_FXSR;
388     if (CpuFeatures & X86_FEATURE_SSE)     FeatureBits |= KF_XMMI;
389     if (CpuFeatures & X86_FEATURE_SSE2)    FeatureBits |= KF_XMMI64;
390 
391     /* Check if the CPU has hyper-threading */
392     if (CpuFeatures & X86_FEATURE_HT)
393     {
394         /* Set the number of logical CPUs */
395         Prcb->LogicalProcessorsPerPhysicalProcessor = (UCHAR)(CpuInfo.Ebx >> 16);
396         if (Prcb->LogicalProcessorsPerPhysicalProcessor > 1)
397         {
398             /* We're on dual-core */
399             KiSMTProcessorsPresent = TRUE;
400         }
401     }
402     else
403     {
404         /* We only have a single CPU */
405         Prcb->LogicalProcessorsPerPhysicalProcessor = 1;
406     }
407 
408     /* Check if CPUID 0x80000000 is supported */
409     if (ExtendedCPUID)
410     {
411         /* Do the call */
412         KiCpuId(&CpuInfo, 0x80000000);
413         if ((CpuInfo.Eax & 0xffffff00) == 0x80000000)
414         {
415             /* Check if CPUID 0x80000001 is supported */
416             if (CpuInfo.Eax >= 0x80000001)
417             {
418                 /* Check which extended features are available. */
419                 KiCpuId(&CpuInfo, 0x80000001);
420 
421                 /* Check if NX-bit is supported */
422                 if (CpuInfo.Edx & X86_FEATURE_NX) FeatureBits |= KF_NX_BIT;
423 
424                 /* Now handle each features for each CPU Vendor */
425                 switch (Vendor)
426                 {
427                     case CPU_AMD:
428                     case CPU_CENTAUR:
429                         if (CpuInfo.Edx & 0x80000000) FeatureBits |= KF_3DNOW;
430                         break;
431                 }
432             }
433         }
434     }
435 
436     /* Return the Feature Bits */
437     return FeatureBits;
438 }
439 
440 #if DBG
441 CODE_SEG("INIT")
442 VOID
443 KiReportCpuFeatures(VOID)
444 {
445     ULONG CpuFeatures = 0;
446     CPU_INFO CpuInfo;
447 
448     if (KiGetCpuVendor())
449     {
450         KiCpuId(&CpuInfo, 1);
451         CpuFeatures = CpuInfo.Edx;
452     }
453 
454     DPRINT1("Supported CPU features: ");
455 
456 #define print_kf_bit(kf_value) if (KeFeatureBits & kf_value) DbgPrint(#kf_value " ")
457     print_kf_bit(KF_V86_VIS);
458     print_kf_bit(KF_RDTSC);
459     print_kf_bit(KF_CR4);
460     print_kf_bit(KF_CMOV);
461     print_kf_bit(KF_GLOBAL_PAGE);
462     print_kf_bit(KF_LARGE_PAGE);
463     print_kf_bit(KF_MTRR);
464     print_kf_bit(KF_CMPXCHG8B);
465     print_kf_bit(KF_MMX);
466     print_kf_bit(KF_WORKING_PTE);
467     print_kf_bit(KF_PAT);
468     print_kf_bit(KF_FXSR);
469     print_kf_bit(KF_FAST_SYSCALL);
470     print_kf_bit(KF_XMMI);
471     print_kf_bit(KF_3DNOW);
472     print_kf_bit(KF_AMDK6MTRR);
473     print_kf_bit(KF_XMMI64);
474     print_kf_bit(KF_DTS);
475     print_kf_bit(KF_NX_BIT);
476     print_kf_bit(KF_NX_DISABLED);
477     print_kf_bit(KF_NX_ENABLED);
478 #undef print_kf_bit
479 
480 #define print_cf(cpu_flag) if (CpuFeatures & cpu_flag) DbgPrint(#cpu_flag " ")
481     print_cf(X86_FEATURE_PAE);
482     print_cf(X86_FEATURE_APIC);
483     print_cf(X86_FEATURE_HT);
484 #undef print_cf
485 
486     DbgPrint("\n");
487 }
488 #endif // DBG
489 
490 CODE_SEG("INIT")
491 VOID
492 NTAPI
493 KiGetCacheInformation(VOID)
494 {
495     PKIPCR Pcr = (PKIPCR)KeGetPcr();
496     CPU_INFO CpuInfo;
497     ULONG CacheRequests = 0, i;
498     ULONG CurrentRegister;
499     UCHAR RegisterByte, Associativity = 0;
500     ULONG Size, CacheLine = 64, CurrentSize = 0;
501     BOOLEAN FirstPass = TRUE;
502 
503     /* Set default L2 size */
504     Pcr->SecondLevelCacheSize = 0;
505 
506     /* Check the Vendor ID */
507     switch (KiGetCpuVendor())
508     {
509         /* Handle Intel case */
510         case CPU_INTEL:
511 
512             /* Check if we support CPUID 2 */
513             KiCpuId(&CpuInfo, 0);
514             if (CpuInfo.Eax >= 2)
515             {
516                 /* We need to loop for the number of times CPUID will tell us to */
517                 do
518                 {
519                     /* Do the CPUID call */
520                     KiCpuId(&CpuInfo, 2);
521 
522                     /* Check if it was the first call */
523                     if (FirstPass)
524                     {
525                         /*
526                          * The number of times to loop is the first byte. Read
527                          * it and then destroy it so we don't get confused.
528                          */
529                         CacheRequests = CpuInfo.Eax & 0xFF;
530                         CpuInfo.Eax &= 0xFFFFFF00;
531 
532                         /* Don't go over this again */
533                         FirstPass = FALSE;
534                     }
535 
536                     /* Loop all 4 registers */
537                     for (i = 0; i < 4; i++)
538                     {
539                         /* Get the current register */
540                         CurrentRegister = CpuInfo.AsUINT32[i];
541 
542                         /*
543                          * If the upper bit is set, then this register should
544                          * be skipped.
545                          */
546                         if (CurrentRegister & 0x80000000) continue;
547 
548                         /* Keep looping for every byte inside this register */
549                         while (CurrentRegister)
550                         {
551                             /* Read a byte, skip a byte. */
552                             RegisterByte = (UCHAR)(CurrentRegister & 0xFF);
553                             CurrentRegister >>= 8;
554                             if (!RegisterByte) continue;
555 
556                             Size = 0;
557                             switch (RegisterByte)
558                             {
559                                 case 0x06:
560                                 case 0x08:
561                                     KePrefetchNTAGranularity = 32;
562                                     break;
563                                 case 0x09:
564                                     KePrefetchNTAGranularity = 64;
565                                     break;
566                                 case 0x0a:
567                                 case 0x0c:
568                                     KePrefetchNTAGranularity = 32;
569                                     break;
570                                 case 0x0d:
571                                 case 0x0e:
572                                     KePrefetchNTAGranularity = 64;
573                                     break;
574                                 case 0x1d:
575                                     Size = 128 * 1024;
576                                     Associativity = 2;
577                                     break;
578                                 case 0x21:
579                                     Size = 256 * 1024;
580                                     Associativity = 8;
581                                     break;
582                                 case 0x24:
583                                     Size = 1024 * 1024;
584                                     Associativity = 16;
585                                     break;
586                                 case 0x2c:
587                                 case 0x30:
588                                     KePrefetchNTAGranularity = 64;
589                                     break;
590                                 case 0x41:
591                                 case 0x42:
592                                 case 0x43:
593                                 case 0x44:
594                                 case 0x45:
595                                     Size = (1 << (RegisterByte - 0x41)) * 128 * 1024;
596                                     Associativity = 4;
597                                     break;
598                                 case 0x48:
599                                     Size = 3 * 1024 * 1024;
600                                     Associativity = 12;
601                                     break;
602                                 case 0x49:
603                                     Size = 4 * 1024 * 1024;
604                                     Associativity = 16;
605                                     break;
606                                 case 0x4e:
607                                     Size = 6 * 1024 * 1024;
608                                     Associativity = 24;
609                                     break;
610                                 case 0x60:
611                                 case 0x66:
612                                 case 0x67:
613                                 case 0x68:
614                                     KePrefetchNTAGranularity = 64;
615                                     break;
616                                 case 0x78:
617                                     Size = 1024 * 1024;
618                                     Associativity = 4;
619                                     break;
620                                 case 0x79:
621                                 case 0x7a:
622                                 case 0x7b:
623                                 case 0x7c:
624                                 case 0x7d:
625                                     Size = (1 << (RegisterByte - 0x79)) * 128 * 1024;
626                                     Associativity = 8;
627                                     break;
628                                 case 0x7f:
629                                     Size = 512 * 1024;
630                                     Associativity = 2;
631                                     break;
632                                 case 0x80:
633                                     Size = 512 * 1024;
634                                     Associativity = 8;
635                                     break;
636                                 case 0x82:
637                                 case 0x83:
638                                 case 0x84:
639                                 case 0x85:
640                                     Size = (1 << (RegisterByte - 0x82)) * 256 * 1024;
641                                     Associativity = 8;
642                                     break;
643                                 case 0x86:
644                                     Size = 512 * 1024;
645                                     Associativity = 4;
646                                     break;
647                                 case 0x87:
648                                     Size = 1024 * 1024;
649                                     Associativity = 8;
650                                     break;
651                                 case 0xf0:
652                                     KePrefetchNTAGranularity = 64;
653                                     break;
654                                 case 0xf1:
655                                     KePrefetchNTAGranularity = 128;
656                                     break;
657                             }
658                             if (Size && (Size / Associativity) > CurrentSize)
659                             {
660                                 /* Set the L2 Cache Size and Associativity */
661                                 CurrentSize = Size / Associativity;
662                                 Pcr->SecondLevelCacheSize = Size;
663                                 Pcr->SecondLevelCacheAssociativity = Associativity;
664                             }
665                         }
666                     }
667                 } while (--CacheRequests);
668             }
669             break;
670 
671         case CPU_AMD:
672 
673             /* Check if we support CPUID 0x80000005 */
674             KiCpuId(&CpuInfo, 0x80000000);
675             if (CpuInfo.Eax >= 0x80000005)
676             {
677                 /* Get L1 size first */
678                 KiCpuId(&CpuInfo, 0x80000005);
679                 KePrefetchNTAGranularity = CpuInfo.Ecx & 0xFF;
680 
681                 /* Check if we support CPUID 0x80000006 */
682                 KiCpuId(&CpuInfo, 0x80000000);
683                 if (CpuInfo.Eax >= 0x80000006)
684                 {
685                     /* Get 2nd level cache and tlb size */
686                     KiCpuId(&CpuInfo, 0x80000006);
687 
688                     /* Cache line size */
689                     CacheLine = CpuInfo.Ecx & 0xFF;
690 
691                     /* Hardcode associativity */
692                     RegisterByte = (CpuInfo.Ecx >> 12) & 0xFF;
693                     switch (RegisterByte)
694                     {
695                         case 2:
696                             Associativity = 2;
697                             break;
698 
699                         case 4:
700                             Associativity = 4;
701                             break;
702 
703                         case 6:
704                             Associativity = 8;
705                             break;
706 
707                         case 8:
708                         case 15:
709                             Associativity = 16;
710                             break;
711 
712                         default:
713                             Associativity = 1;
714                             break;
715                     }
716 
717                     /* Compute size */
718                     Size = (CpuInfo.Ecx >> 16) << 10;
719 
720                     /* Hack for Model 6, Steping 300 */
721                     if ((KeGetCurrentPrcb()->CpuType == 6) &&
722                         (KeGetCurrentPrcb()->CpuStep == 0x300))
723                     {
724                         /* Stick 64K in there */
725                         Size = 64 * 1024;
726                     }
727 
728                     /* Set the L2 Cache Size and associativity */
729                     Pcr->SecondLevelCacheSize = Size;
730                     Pcr->SecondLevelCacheAssociativity = Associativity;
731                 }
732             }
733             break;
734 
735         case CPU_CYRIX:
736         case CPU_TRANSMETA:
737         case CPU_CENTAUR:
738         case CPU_RISE:
739 
740             /* FIXME */
741             break;
742     }
743 
744     /* Set the cache line */
745     if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine;
746     DPRINT1("Prefetch Cache: %lu bytes\tL2 Cache: %lu bytes\tL2 Cache Line: %lu bytes\tL2 Cache Associativity: %lu\n",
747             KePrefetchNTAGranularity,
748             Pcr->SecondLevelCacheSize,
749             KeLargestCacheLine,
750             Pcr->SecondLevelCacheAssociativity);
751 }
752 
753 CODE_SEG("INIT")
754 VOID
755 NTAPI
756 KiSetCR0Bits(VOID)
757 {
758     ULONG Cr0;
759 
760     /* Save current CR0 */
761     Cr0 = __readcr0();
762 
763     /* If this is a 486, enable Write-Protection */
764     if (KeGetCurrentPrcb()->CpuType > 3) Cr0 |= CR0_WP;
765 
766     /* Set new Cr0 */
767     __writecr0(Cr0);
768 }
769 
770 CODE_SEG("INIT")
771 VOID
772 NTAPI
773 KiInitializeTSS2(IN PKTSS Tss,
774                  IN PKGDTENTRY TssEntry OPTIONAL)
775 {
776     PUCHAR p;
777 
778     /* Make sure the GDT Entry is valid */
779     if (TssEntry)
780     {
781         /* Set the Limit */
782         TssEntry->LimitLow = sizeof(KTSS) - 1;
783         TssEntry->HighWord.Bits.LimitHi = 0;
784     }
785 
786     /* Now clear the I/O Map */
787     ASSERT(IOPM_COUNT == 1);
788     RtlFillMemory(Tss->IoMaps[0].IoMap, IOPM_FULL_SIZE, 0xFF);
789 
790     /* Initialize Interrupt Direction Maps */
791     p = (PUCHAR)(Tss->IoMaps[0].DirectionMap);
792     RtlZeroMemory(p, IOPM_DIRECTION_MAP_SIZE);
793 
794     /* Add DPMI support for interrupts */
795     p[0] = 4;
796     p[3] = 0x18;
797     p[4] = 0x18;
798 
799     /* Initialize the default Interrupt Direction Map */
800     p = Tss->IntDirectionMap;
801     RtlZeroMemory(Tss->IntDirectionMap, IOPM_DIRECTION_MAP_SIZE);
802 
803     /* Add DPMI support */
804     p[0] = 4;
805     p[3] = 0x18;
806     p[4] = 0x18;
807 }
808 
809 VOID
810 NTAPI
811 KiInitializeTSS(IN PKTSS Tss)
812 {
813     /* Set an invalid map base */
814     Tss->IoMapBase = KiComputeIopmOffset(IO_ACCESS_MAP_NONE);
815 
816     /* Disable traps during Task Switches */
817     Tss->Flags = 0;
818 
819     /* Set LDT and Ring 0 SS */
820     Tss->LDT = 0;
821     Tss->Ss0 = KGDT_R0_DATA;
822 }
823 
824 CODE_SEG("INIT")
825 VOID
826 FASTCALL
827 Ki386InitializeTss(IN PKTSS Tss,
828                    IN PKIDTENTRY Idt,
829                    IN PKGDTENTRY Gdt)
830 {
831     PKGDTENTRY TssEntry, TaskGateEntry;
832 
833     /* Initialize the boot TSS. */
834     TssEntry = &Gdt[KGDT_TSS / sizeof(KGDTENTRY)];
835     TssEntry->HighWord.Bits.Type = I386_TSS;
836     TssEntry->HighWord.Bits.Pres = 1;
837     TssEntry->HighWord.Bits.Dpl = 0;
838     KiInitializeTSS2(Tss, TssEntry);
839     KiInitializeTSS(Tss);
840 
841     /* Load the task register */
842     Ke386SetTr(KGDT_TSS);
843 
844     /* Setup the Task Gate for Double Fault Traps */
845     TaskGateEntry = (PKGDTENTRY)&Idt[8];
846     TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
847     TaskGateEntry->HighWord.Bits.Pres = 1;
848     TaskGateEntry->HighWord.Bits.Dpl = 0;
849     ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_DF_TSS;
850 
851     /* Initialize the TSS used for handling double faults. */
852     Tss = (PKTSS)KiDoubleFaultTSS;
853     KiInitializeTSS(Tss);
854     Tss->CR3 = __readcr3();
855     Tss->Esp0 = KiDoubleFaultStack;
856     Tss->Esp = KiDoubleFaultStack;
857     Tss->Eip = PtrToUlong(KiTrap08);
858     Tss->Cs = KGDT_R0_CODE;
859     Tss->Fs = KGDT_R0_PCR;
860     Tss->Ss = Ke386GetSs();
861     Tss->Es = KGDT_R3_DATA | RPL_MASK;
862     Tss->Ds = KGDT_R3_DATA | RPL_MASK;
863 
864     /* Setup the Double Trap TSS entry in the GDT */
865     TssEntry = &Gdt[KGDT_DF_TSS / sizeof(KGDTENTRY)];
866     TssEntry->HighWord.Bits.Type = I386_TSS;
867     TssEntry->HighWord.Bits.Pres = 1;
868     TssEntry->HighWord.Bits.Dpl = 0;
869     TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
870     TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
871     TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
872     TssEntry->LimitLow = KTSS_IO_MAPS;
873 
874     /* Now setup the NMI Task Gate */
875     TaskGateEntry = (PKGDTENTRY)&Idt[2];
876     TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
877     TaskGateEntry->HighWord.Bits.Pres = 1;
878     TaskGateEntry->HighWord.Bits.Dpl = 0;
879     ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_NMI_TSS;
880 
881     /* Initialize the actual TSS */
882     Tss = (PKTSS)KiNMITSS;
883     KiInitializeTSS(Tss);
884     Tss->CR3 = __readcr3();
885     Tss->Esp0 = KiDoubleFaultStack;
886     Tss->Esp = KiDoubleFaultStack;
887     Tss->Eip = PtrToUlong(KiTrap02);
888     Tss->Cs = KGDT_R0_CODE;
889     Tss->Fs = KGDT_R0_PCR;
890     Tss->Ss = Ke386GetSs();
891     Tss->Es = KGDT_R3_DATA | RPL_MASK;
892     Tss->Ds = KGDT_R3_DATA | RPL_MASK;
893 
894     /* And its associated TSS Entry */
895     TssEntry = &Gdt[KGDT_NMI_TSS / sizeof(KGDTENTRY)];
896     TssEntry->HighWord.Bits.Type = I386_TSS;
897     TssEntry->HighWord.Bits.Pres = 1;
898     TssEntry->HighWord.Bits.Dpl = 0;
899     TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
900     TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
901     TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
902     TssEntry->LimitLow = KTSS_IO_MAPS;
903 }
904 
905 VOID
906 NTAPI
907 KeFlushCurrentTb(VOID)
908 {
909 
910 #if !defined(_GLOBAL_PAGES_ARE_AWESOME_)
911 
912     /* Flush the TLB by resetting CR3 */
913     __writecr3(__readcr3());
914 
915 #else
916 
917     /* Check if global pages are enabled */
918     if (KeFeatureBits & KF_GLOBAL_PAGE)
919     {
920         ULONG Cr4;
921 
922         /* Disable PGE (Note: may not have been enabled yet) */
923         Cr4 = __readcr4();
924         __writecr4(Cr4 & ~CR4_PGE);
925 
926         /* Flush everything */
927         __writecr3(__readcr3());
928 
929         /* Re-enable PGE */
930         __writecr4(Cr4);
931     }
932     else
933     {
934         /* No global pages, resetting CR3 is enough */
935         __writecr3(__readcr3());
936     }
937 
938 #endif
939 
940 }
941 
942 VOID
943 NTAPI
944 KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState)
945 {
946     PKGDTENTRY TssEntry;
947 
948     //
949     // Restore the CR registers
950     //
951     __writecr0(ProcessorState->SpecialRegisters.Cr0);
952     Ke386SetCr2(ProcessorState->SpecialRegisters.Cr2);
953     __writecr3(ProcessorState->SpecialRegisters.Cr3);
954     if (KeFeatureBits & KF_CR4) __writecr4(ProcessorState->SpecialRegisters.Cr4);
955 
956     //
957     // Restore the DR registers
958     //
959     __writedr(0, ProcessorState->SpecialRegisters.KernelDr0);
960     __writedr(1, ProcessorState->SpecialRegisters.KernelDr1);
961     __writedr(2, ProcessorState->SpecialRegisters.KernelDr2);
962     __writedr(3, ProcessorState->SpecialRegisters.KernelDr3);
963     __writedr(6, ProcessorState->SpecialRegisters.KernelDr6);
964     __writedr(7, ProcessorState->SpecialRegisters.KernelDr7);
965 
966     //
967     // Restore GDT and IDT
968     //
969     Ke386SetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
970     __lidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
971 
972     //
973     // Clear the busy flag so we don't crash if we reload the same selector
974     //
975     TssEntry = (PKGDTENTRY)(ProcessorState->SpecialRegisters.Gdtr.Base +
976                             ProcessorState->SpecialRegisters.Tr);
977     TssEntry->HighWord.Bytes.Flags1 &= ~0x2;
978 
979     //
980     // Restore TSS and LDT
981     //
982     Ke386SetTr(ProcessorState->SpecialRegisters.Tr);
983     Ke386SetLocalDescriptorTable(ProcessorState->SpecialRegisters.Ldtr);
984 }
985 
986 VOID
987 NTAPI
988 KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)
989 {
990     /* Save the CR registers */
991     ProcessorState->SpecialRegisters.Cr0 = __readcr0();
992     ProcessorState->SpecialRegisters.Cr2 = __readcr2();
993     ProcessorState->SpecialRegisters.Cr3 = __readcr3();
994     ProcessorState->SpecialRegisters.Cr4 = (KeFeatureBits & KF_CR4) ?
995                                            __readcr4() : 0;
996 
997     /* Save the DR registers */
998     ProcessorState->SpecialRegisters.KernelDr0 = __readdr(0);
999     ProcessorState->SpecialRegisters.KernelDr1 = __readdr(1);
1000     ProcessorState->SpecialRegisters.KernelDr2 = __readdr(2);
1001     ProcessorState->SpecialRegisters.KernelDr3 = __readdr(3);
1002     ProcessorState->SpecialRegisters.KernelDr6 = __readdr(6);
1003     ProcessorState->SpecialRegisters.KernelDr7 = __readdr(7);
1004     __writedr(7, 0);
1005 
1006     /* Save GDT, IDT, LDT and TSS */
1007     Ke386GetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
1008     __sidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
1009     ProcessorState->SpecialRegisters.Tr = Ke386GetTr();
1010     Ke386GetLocalDescriptorTable(&ProcessorState->SpecialRegisters.Ldtr);
1011 }
1012 
1013 CODE_SEG("INIT")
1014 VOID
1015 NTAPI
1016 KiInitializeMachineType(VOID)
1017 {
1018     /* Set the Machine Type we got from NTLDR */
1019     KeI386MachineType = KeLoaderBlock->u.I386.MachineType & 0x000FF;
1020 }
1021 
1022 CODE_SEG("INIT")
1023 ULONG_PTR
1024 NTAPI
1025 KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)
1026 {
1027     /* Set CS and ESP */
1028     __writemsr(0x174, KGDT_R0_CODE);
1029     __writemsr(0x175, (ULONG_PTR)KeGetCurrentPrcb()->DpcStack);
1030 
1031     /* Set LSTAR */
1032     __writemsr(0x176, (ULONG_PTR)KiFastCallEntry);
1033     return 0;
1034 }
1035 
1036 CODE_SEG("INIT")
1037 VOID
1038 NTAPI
1039 KiRestoreFastSyscallReturnState(VOID)
1040 {
1041     /* Check if the CPU Supports fast system call */
1042     if (KeFeatureBits & KF_FAST_SYSCALL)
1043     {
1044         /* Check if it has been disabled */
1045         if (KiFastSystemCallDisable)
1046         {
1047             /* Disable fast system call */
1048             KeFeatureBits &= ~KF_FAST_SYSCALL;
1049             KiFastCallExitHandler = KiSystemCallTrapReturn;
1050             DPRINT1("Support for SYSENTER disabled.\n");
1051         }
1052         else
1053         {
1054             /* Do an IPI to enable it */
1055             KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
1056 
1057             /* It's enabled, so use the proper exit stub */
1058             KiFastCallExitHandler = KiSystemCallSysExitReturn;
1059             DPRINT("Support for SYSENTER detected.\n");
1060         }
1061     }
1062     else
1063     {
1064         /* Use the IRET handler */
1065         KiFastCallExitHandler = KiSystemCallTrapReturn;
1066         DPRINT1("No support for SYSENTER detected.\n");
1067     }
1068 }
1069 
1070 CODE_SEG("INIT")
1071 ULONG_PTR
1072 NTAPI
1073 Ki386EnableDE(IN ULONG_PTR Context)
1074 {
1075     /* Enable DE */
1076     __writecr4(__readcr4() | CR4_DE);
1077     return 0;
1078 }
1079 
1080 CODE_SEG("INIT")
1081 ULONG_PTR
1082 NTAPI
1083 Ki386EnableFxsr(IN ULONG_PTR Context)
1084 {
1085     /* Enable FXSR */
1086     __writecr4(__readcr4() | CR4_FXSR);
1087     return 0;
1088 }
1089 
1090 CODE_SEG("INIT")
1091 ULONG_PTR
1092 NTAPI
1093 Ki386EnableXMMIExceptions(IN ULONG_PTR Context)
1094 {
1095     PKIDTENTRY IdtEntry;
1096 
1097     /* Get the IDT Entry for Interrupt 0x13 */
1098     IdtEntry = &((PKIPCR)KeGetPcr())->IDT[0x13];
1099 
1100     /* Set it up */
1101     IdtEntry->Selector = KGDT_R0_CODE;
1102     IdtEntry->Offset = ((ULONG_PTR)KiTrap13 & 0xFFFF);
1103     IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap13 >> 16) & 0xFFFF;
1104     ((PKIDT_ACCESS)&IdtEntry->Access)->Dpl = 0;
1105     ((PKIDT_ACCESS)&IdtEntry->Access)->Present = 1;
1106     ((PKIDT_ACCESS)&IdtEntry->Access)->SegmentType = I386_INTERRUPT_GATE;
1107 
1108     /* Enable XMMI exceptions */
1109     __writecr4(__readcr4() | CR4_XMMEXCPT);
1110     return 0;
1111 }
1112 
1113 CODE_SEG("INIT")
1114 VOID
1115 NTAPI
1116 KiI386PentiumLockErrataFixup(VOID)
1117 {
1118     KDESCRIPTOR IdtDescriptor = {0, 0, 0};
1119     PKIDTENTRY NewIdt, NewIdt2;
1120     PMMPTE PointerPte;
1121 
1122     /* Allocate memory for a new IDT */
1123     NewIdt = ExAllocatePool(NonPagedPool, 2 * PAGE_SIZE);
1124 
1125     /* Put everything after the first 7 entries on a new page */
1126     NewIdt2 = (PVOID)((ULONG_PTR)NewIdt + PAGE_SIZE - (7 * sizeof(KIDTENTRY)));
1127 
1128     /* Disable interrupts */
1129     _disable();
1130 
1131     /* Get the current IDT and copy it */
1132     __sidt(&IdtDescriptor.Limit);
1133     RtlCopyMemory(NewIdt2,
1134                   (PVOID)IdtDescriptor.Base,
1135                   IdtDescriptor.Limit + 1);
1136     IdtDescriptor.Base = (ULONG)NewIdt2;
1137 
1138     /* Set the new IDT */
1139     __lidt(&IdtDescriptor.Limit);
1140     ((PKIPCR)KeGetPcr())->IDT = NewIdt2;
1141 
1142     /* Restore interrupts */
1143     _enable();
1144 
1145     /* Set the first 7 entries as read-only to produce a fault */
1146     PointerPte = MiAddressToPte(NewIdt);
1147     ASSERT(PointerPte->u.Hard.Write == 1);
1148     PointerPte->u.Hard.Write = 0;
1149     KeInvalidateTlbEntry(NewIdt);
1150 }
1151 
1152 BOOLEAN
1153 NTAPI
1154 KeInvalidateAllCaches(VOID)
1155 {
1156     /* Only supported on Pentium Pro and higher */
1157     if (KeI386CpuType < 6) return FALSE;
1158 
1159     /* Invalidate all caches */
1160     __wbinvd();
1161     return TRUE;
1162 }
1163 
1164 VOID
1165 NTAPI
1166 KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,
1167                      IN PKEXCEPTION_FRAME ExceptionFrame)
1168 {
1169     PKPRCB Prcb = KeGetCurrentPrcb();
1170 
1171     //
1172     // Save full context
1173     //
1174     Prcb->ProcessorState.ContextFrame.ContextFlags = CONTEXT_FULL |
1175                                                      CONTEXT_DEBUG_REGISTERS;
1176     KeTrapFrameToContext(TrapFrame, NULL, &Prcb->ProcessorState.ContextFrame);
1177 
1178     //
1179     // Save control registers
1180     //
1181     KiSaveProcessorControlState(&Prcb->ProcessorState);
1182 }
1183 
1184 CODE_SEG("INIT")
1185 BOOLEAN
1186 NTAPI
1187 KiIsNpxErrataPresent(VOID)
1188 {
1189     static double Value1 = 4195835.0, Value2 = 3145727.0;
1190     INT ErrataPresent;
1191     ULONG Cr0;
1192 
1193     /* Interrupts have to be disabled here. */
1194     ASSERT(!(__readeflags() & EFLAGS_INTERRUPT_MASK));
1195 
1196     /* Read CR0 and remove FPU flags */
1197     Cr0 = __readcr0();
1198     __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1199 
1200     /* Initialize FPU state */
1201     Ke386FnInit();
1202 
1203     /* Multiply the magic values and divide, we should get the result back */
1204 #ifdef __GNUC__
1205     __asm__ __volatile__
1206     (
1207         "fldl %1\n\t"
1208         "fdivl %2\n\t"
1209         "fmull %2\n\t"
1210         "fldl %1\n\t"
1211         "fsubp\n\t"
1212         "fistpl %0\n\t"
1213         : "=m" (ErrataPresent)
1214         : "m" (Value1),
1215           "m" (Value2)
1216     );
1217 #else
1218     __asm
1219     {
1220         fld Value1
1221         fdiv Value2
1222         fmul Value2
1223         fld Value1
1224         fsubp st(1), st(0)
1225         fistp ErrataPresent
1226     };
1227 #endif
1228 
1229     /* Restore CR0 */
1230     __writecr0(Cr0);
1231 
1232     /* Return if there's an errata */
1233     return ErrataPresent != 0;
1234 }
1235 
1236 VOID
1237 NTAPI
1238 KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)
1239 {
1240     ULONG EFlags, Cr0;
1241     PKTHREAD Thread, NpxThread;
1242     PFX_SAVE_AREA FxSaveArea;
1243 
1244     /* Save volatiles and disable interrupts */
1245     EFlags = __readeflags();
1246     _disable();
1247 
1248     /* Save the PCR and get the current thread */
1249     Thread = KeGetCurrentThread();
1250 
1251     /* Check if we're already loaded */
1252     if (Thread->NpxState != NPX_STATE_LOADED)
1253     {
1254         /* If there's nothing to load, quit */
1255         if (!SaveArea)
1256         {
1257             /* Restore interrupt state and return */
1258             __writeeflags(EFlags);
1259             return;
1260         }
1261 
1262         /* Need FXSR support for this */
1263         ASSERT(KeI386FxsrPresent == TRUE);
1264 
1265         /* Check for sane CR0 */
1266         Cr0 = __readcr0();
1267         if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1268         {
1269             /* Mask out FPU flags */
1270             __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1271         }
1272 
1273         /* Get the NPX thread and check its FPU state */
1274         NpxThread = KeGetCurrentPrcb()->NpxThread;
1275         if ((NpxThread) && (NpxThread->NpxState == NPX_STATE_LOADED))
1276         {
1277             /* Get the FX frame and store the state there */
1278             FxSaveArea = KiGetThreadNpxArea(NpxThread);
1279             Ke386FxSave(FxSaveArea);
1280 
1281             /* NPX thread has lost its state */
1282             NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1283         }
1284 
1285         /* Now load NPX state from the NPX area */
1286         FxSaveArea = KiGetThreadNpxArea(Thread);
1287         Ke386FxStore(FxSaveArea);
1288     }
1289     else
1290     {
1291         /* Check for sane CR0 */
1292         Cr0 = __readcr0();
1293         if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1294         {
1295             /* Mask out FPU flags */
1296             __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1297         }
1298 
1299         /* Get FX frame */
1300         FxSaveArea = KiGetThreadNpxArea(Thread);
1301         Thread->NpxState = NPX_STATE_NOT_LOADED;
1302 
1303         /* Save state if supported by CPU */
1304         if (KeI386FxsrPresent) Ke386FxSave(FxSaveArea);
1305     }
1306 
1307     /* Now save the FN state wherever it was requested */
1308     if (SaveArea) Ke386FnSave(SaveArea);
1309 
1310     /* Clear NPX thread */
1311     KeGetCurrentPrcb()->NpxThread = NULL;
1312 
1313     /* Add the CR0 from the NPX frame */
1314     Cr0 |= NPX_STATE_NOT_LOADED;
1315     Cr0 |= FxSaveArea->Cr0NpxState;
1316     __writecr0(Cr0);
1317 
1318     /* Restore interrupt state */
1319     __writeeflags(EFlags);
1320 }
1321 
1322 /* PUBLIC FUNCTIONS **********************************************************/
1323 
1324 /*
1325  * @implemented
1326  */
1327 VOID
1328 NTAPI
1329 KiCoprocessorError(VOID)
1330 {
1331     PFX_SAVE_AREA NpxArea;
1332 
1333     /* Get the FPU area */
1334     NpxArea = KiGetThreadNpxArea(KeGetCurrentThread());
1335 
1336     /* Set CR0_TS */
1337     NpxArea->Cr0NpxState = CR0_TS;
1338     __writecr0(__readcr0() | CR0_TS);
1339 }
1340 
1341 /**
1342  * @brief
1343  * Saves the current floating point unit state
1344  * context of the current calling thread.
1345  *
1346  * @param[out] Save
1347  * The saved floating point context given to the
1348  * caller at the end of function's operations.
1349  * The structure whose data contents are opaque
1350  * to the calling thread.
1351  *
1352  * @return
1353  * Returns STATUS_SUCCESS if the function has
1354  * successfully completed its operations.
1355  * STATUS_INSUFFICIENT_RESOURCES is returned
1356  * if the function couldn't allocate memory
1357  * for FPU state information.
1358  *
1359  * @remarks
1360  * The function performs a FPU state save
1361  * in two ways. A normal FPU save (FNSAVE)
1362  * is performed if the system doesn't have
1363  * SSE/SSE2, otherwise the function performs
1364  * a save of FPU, MMX and SSE states save (FXSAVE).
1365  */
1366 #if defined(__clang__)
1367 __attribute__((__target__("sse")))
1368 #endif
1369 NTSTATUS
1370 NTAPI
1371 KeSaveFloatingPointState(
1372     _Out_ PKFLOATING_SAVE Save)
1373 {
1374     PFLOATING_SAVE_CONTEXT FsContext;
1375     PFX_SAVE_AREA FxSaveAreaFrame;
1376     PKPRCB CurrentPrcb;
1377 
1378     /* Sanity checks */
1379     ASSERT(Save);
1380     ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1381     ASSERT(KeI386NpxPresent);
1382 
1383     /* Initialize the floating point context */
1384     FsContext = ExAllocatePoolWithTag(NonPagedPool,
1385                                       sizeof(FLOATING_SAVE_CONTEXT),
1386                                       TAG_FLOATING_POINT_CONTEXT);
1387     if (!FsContext)
1388     {
1389         /* Bail out if we failed */
1390         return STATUS_INSUFFICIENT_RESOURCES;
1391     }
1392 
1393     /*
1394      * Allocate some memory pool for the buffer. The size
1395      * of this allocated buffer is the FX area plus the
1396      * alignment requirement needed for FXSAVE as a 16-byte
1397      * aligned pointer is compulsory in order to save the
1398      * FPU state.
1399      */
1400     FsContext->Buffer = ExAllocatePoolWithTag(NonPagedPool,
1401                                               sizeof(FX_SAVE_AREA) + FXSAVE_ALIGN,
1402                                               TAG_FLOATING_POINT_FX);
1403     if (!FsContext->Buffer)
1404     {
1405         /* Bail out if we failed */
1406         ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1407         return STATUS_INSUFFICIENT_RESOURCES;
1408     }
1409 
1410     /*
1411      * Now cache the allocated buffer into the save area
1412      * and align the said area to a 16-byte boundary. Why
1413      * do we have to do this is because of ExAllocate function.
1414      * We gave the necessary alignment requirement in the pool
1415      * allocation size although the function will always return
1416      * a 8-byte aligned pointer. Aligning the given pointer directly
1417      * can cause issues when freeing it from memory afterwards. With
1418      * that said, we have to cache the buffer to the area so that we
1419      * do not touch or mess the allocated buffer any further.
1420      */
1421     FsContext->PfxSaveArea = ALIGN_UP_POINTER_BY(FsContext->Buffer, 16);
1422 
1423     /* Disable interrupts and get the current processor control region */
1424     _disable();
1425     CurrentPrcb = KeGetCurrentPrcb();
1426 
1427     /* Store the current thread to context */
1428     FsContext->CurrentThread = KeGetCurrentThread();
1429 
1430     /*
1431      * Save the previous NPX thread state registers (aka Numeric
1432      * Processor eXtension) into the current context so that
1433      * we are informing the scheduler the current FPU state
1434      * belongs to this thread.
1435      */
1436     if (FsContext->CurrentThread != CurrentPrcb->NpxThread)
1437     {
1438         if ((CurrentPrcb->NpxThread != NULL) &&
1439             (CurrentPrcb->NpxThread->NpxState == NPX_STATE_LOADED))
1440         {
1441             /* Get the FX frame */
1442             FxSaveAreaFrame = KiGetThreadNpxArea(CurrentPrcb->NpxThread);
1443 
1444             /* Save the FPU state */
1445             Ke386SaveFpuState(FxSaveAreaFrame);
1446 
1447             /* NPX thread has lost its state */
1448             CurrentPrcb->NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1449             FxSaveAreaFrame->NpxSavedCpu = 0;
1450         }
1451 
1452         /* The new NPX thread is the current thread */
1453         CurrentPrcb->NpxThread = FsContext->CurrentThread;
1454     }
1455 
1456     /* Perform the save */
1457     Ke386SaveFpuState(FsContext->PfxSaveArea);
1458 
1459     /* Store the NPX IRQL */
1460     FsContext->OldNpxIrql = FsContext->CurrentThread->Header.NpxIrql;
1461 
1462     /* Set the current IRQL to NPX */
1463     FsContext->CurrentThread->Header.NpxIrql = KeGetCurrentIrql();
1464 
1465     /* Initialize the FPU */
1466     Ke386FnInit();
1467 
1468     /* Enable interrupts back */
1469     _enable();
1470 
1471     /* Give the saved FPU context to the caller */
1472     *((PVOID *) Save) = FsContext;
1473     return STATUS_SUCCESS;
1474 }
1475 
1476 /**
1477  * @brief
1478  * Restores the original FPU state context that has
1479  * been saved by a API call of KeSaveFloatingPointState.
1480  * Callers are expected to restore the floating point
1481  * state by calling this function when they've finished
1482  * doing FPU operations.
1483  *
1484  * @param[in] Save
1485  * The saved floating point context that is to be given
1486  * to the function to restore the FPU state.
1487  *
1488  * @return
1489  * Returns STATUS_SUCCESS indicating the function
1490  * has fully completed its operations.
1491  */
1492 #if defined(__clang__)
1493 __attribute__((__target__("sse")))
1494 #endif
1495 NTSTATUS
1496 NTAPI
1497 KeRestoreFloatingPointState(
1498     _In_ PKFLOATING_SAVE Save)
1499 {
1500     PFLOATING_SAVE_CONTEXT FsContext;
1501 
1502     /* Sanity checks */
1503     ASSERT(Save);
1504     ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1505     ASSERT(KeI386NpxPresent);
1506 
1507     /* Cache the saved FS context */
1508     FsContext = *((PVOID *) Save);
1509 
1510     /*
1511      * We have to restore the regular saved FPU
1512      * state. For this we must first do some
1513      * validation checks so that we are sure
1514      * ourselves the state context is saved
1515      * properly. Check if we are in the same
1516      * calling thread.
1517      */
1518     if (FsContext->CurrentThread != KeGetCurrentThread())
1519     {
1520         /*
1521          * This isn't the thread that saved the
1522          * FPU state context, crash the system!
1523          */
1524         KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1525                      0x2,
1526                      (ULONG_PTR)FsContext->CurrentThread,
1527                      (ULONG_PTR)KeGetCurrentThread(),
1528                      0);
1529     }
1530 
1531     /* Are we under the same NPX interrupt level? */
1532     if (FsContext->CurrentThread->Header.NpxIrql != KeGetCurrentIrql())
1533     {
1534         /* The interrupt level has changed, crash the system! */
1535         KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1536                      0x1,
1537                      (ULONG_PTR)FsContext->CurrentThread->Header.NpxIrql,
1538                      (ULONG_PTR)KeGetCurrentIrql(),
1539                      0);
1540     }
1541 
1542     /* Disable interrupts */
1543     _disable();
1544 
1545     /*
1546      * The saved FPU state context is valid,
1547      * it's time to restore the state. First,
1548      * clear FPU exceptions now.
1549      */
1550     Ke386ClearFpExceptions();
1551 
1552     /* Restore the state */
1553     Ke386RestoreFpuState(FsContext->PfxSaveArea);
1554 
1555     /* Give the saved NPX IRQL back to the NPX thread */
1556     FsContext->CurrentThread->Header.NpxIrql = FsContext->OldNpxIrql;
1557 
1558     /* Enable interrupts back */
1559     _enable();
1560 
1561     /* We're done, free the allocated area and context */
1562     ExFreePoolWithTag(FsContext->Buffer, TAG_FLOATING_POINT_FX);
1563     ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1564 
1565     return STATUS_SUCCESS;
1566 }
1567 
1568 /*
1569  * @implemented
1570  */
1571 ULONG
1572 NTAPI
1573 KeGetRecommendedSharedDataAlignment(VOID)
1574 {
1575     /* Return the global variable */
1576     return KeLargestCacheLine;
1577 }
1578 
1579 VOID
1580 NTAPI
1581 KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext,
1582                       IN PVOID Ignored1,
1583                       IN PVOID Ignored2,
1584                       IN PVOID Ignored3)
1585 {
1586     /* Signal this packet as done */
1587     KiIpiSignalPacketDone(PacketContext);
1588 
1589     /* Flush the TB for the Current CPU */
1590     KeFlushCurrentTb();
1591 }
1592 
1593 /*
1594  * @implemented
1595  */
1596 VOID
1597 NTAPI
1598 KeFlushEntireTb(IN BOOLEAN Invalid,
1599                 IN BOOLEAN AllProcessors)
1600 {
1601     KIRQL OldIrql;
1602 #ifdef CONFIG_SMP
1603     KAFFINITY TargetAffinity;
1604     PKPRCB Prcb = KeGetCurrentPrcb();
1605 #endif
1606 
1607     /* Raise the IRQL for the TB Flush */
1608     OldIrql = KeRaiseIrqlToSynchLevel();
1609 
1610 #ifdef CONFIG_SMP
1611     /* FIXME: Use KiTbFlushTimeStamp to synchronize TB flush */
1612 
1613     /* Get the current processor affinity, and exclude ourselves */
1614     TargetAffinity = KeActiveProcessors;
1615     TargetAffinity &= ~Prcb->SetMember;
1616 
1617     /* Make sure this is MP */
1618     if (TargetAffinity)
1619     {
1620         /* Send an IPI TB flush to the other processors */
1621         KiIpiSendPacket(TargetAffinity,
1622                         KiFlushTargetEntireTb,
1623                         NULL,
1624                         0,
1625                         NULL);
1626     }
1627 #endif
1628 
1629     /* Flush the TB for the Current CPU, and update the flush stamp */
1630     KeFlushCurrentTb();
1631 
1632 #ifdef CONFIG_SMP
1633     /* If this is MP, wait for the other processors to finish */
1634     if (TargetAffinity)
1635     {
1636         /* Sanity check */
1637         ASSERT(Prcb == KeGetCurrentPrcb());
1638 
1639         /* FIXME: TODO */
1640         ASSERTMSG("Not yet implemented\n", FALSE);
1641     }
1642 #endif
1643 
1644     /* Update the flush stamp and return to original IRQL */
1645     InterlockedExchangeAdd(&KiTbFlushTimeStamp, 1);
1646     KeLowerIrql(OldIrql);
1647 }
1648 
1649 /*
1650  * @implemented
1651  */
1652 VOID
1653 NTAPI
1654 KeSetDmaIoCoherency(IN ULONG Coherency)
1655 {
1656     /* Save the coherency globally */
1657     KiDmaIoCoherency = Coherency;
1658 }
1659 
1660 /*
1661  * @implemented
1662  */
1663 VOID
1664 __cdecl
1665 KeSaveStateForHibernate(IN PKPROCESSOR_STATE State)
1666 {
1667     /* Capture the context */
1668     RtlCaptureContext(&State->ContextFrame);
1669 
1670     /* Capture the control state */
1671     KiSaveProcessorControlState(State);
1672 }
1673