xref: /reactos/ntoskrnl/ke/i386/cpu.c (revision 3c5a56ed)
1 /*
2  * PROJECT:         ReactOS Kernel
3  * LICENSE:         GPL - See COPYING in the top level directory
4  * FILE:            ntoskrnl/ke/i386/cpu.c
5  * PURPOSE:         Routines for CPU-level support
6  * PROGRAMMERS:     Alex Ionescu (alex.ionescu@reactos.org)
7  */
8 
9 /* INCLUDES *****************************************************************/
10 
11 #include <ntoskrnl.h>
12 #define NDEBUG
13 #include <debug.h>
14 
15 #include <xmmintrin.h>
16 
17 /* GLOBALS *******************************************************************/
18 
19 /* The TSS to use for Double Fault Traps (INT 0x9) */
20 UCHAR KiDoubleFaultTSS[KTSS_IO_MAPS];
21 
22 /* The TSS to use for NMI Fault Traps (INT 0x2) */
23 UCHAR KiNMITSS[KTSS_IO_MAPS];
24 
25 /* CPU Features and Flags */
26 ULONG KeI386CpuType;
27 ULONG KeI386CpuStep;
28 ULONG KiFastSystemCallDisable = 0;
29 ULONG KeI386NpxPresent = TRUE;
30 ULONG KiMXCsrMask = 0;
31 ULONG MxcsrFeatureMask = 0;
32 ULONG KeI386XMMIPresent = 0;
33 ULONG KeI386FxsrPresent = 0;
34 ULONG KeI386MachineType;
35 ULONG Ke386Pae = FALSE;
36 ULONG Ke386NoExecute = FALSE;
37 ULONG KeLargestCacheLine = 0x40;
38 ULONG KeDcacheFlushCount = 0;
39 ULONG KeIcacheFlushCount = 0;
40 ULONG KiDmaIoCoherency = 0;
41 ULONG KePrefetchNTAGranularity = 32;
42 BOOLEAN KiI386PentiumLockErrataPresent;
43 BOOLEAN KiSMTProcessorsPresent;
44 
45 /* The distance between SYSEXIT and IRETD return modes */
46 UCHAR KiSystemCallExitAdjust;
47 
48 /* The offset that was applied -- either 0 or the value above */
49 UCHAR KiSystemCallExitAdjusted;
50 
51 /* Whether the adjustment was already done once */
52 BOOLEAN KiFastCallCopyDoneOnce;
53 
54 /* Flush data */
55 volatile LONG KiTbFlushTimeStamp;
56 
57 /* CPU Signatures */
58 static const CHAR CmpIntelID[]       = "GenuineIntel";
59 static const CHAR CmpAmdID[]         = "AuthenticAMD";
60 static const CHAR CmpCyrixID[]       = "CyrixInstead";
61 static const CHAR CmpTransmetaID[]   = "GenuineTMx86";
62 static const CHAR CmpCentaurID[]     = "CentaurHauls";
63 static const CHAR CmpRiseID[]        = "RiseRiseRise";
64 
65 typedef union _CPU_SIGNATURE
66 {
67     struct
68     {
69         ULONG Step : 4;
70         ULONG Model : 4;
71         ULONG Family : 4;
72         ULONG Unused : 4;
73         ULONG ExtendedModel : 4;
74         ULONG ExtendedFamily : 8;
75         ULONG Unused2 : 4;
76     };
77     ULONG AsULONG;
78 } CPU_SIGNATURE;
79 
80 /* FX area alignment size */
81 #define FXSAVE_ALIGN 15
82 
83 /* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/
84 
85 /* NSC/Cyrix CPU configuration register index */
86 #define CX86_CCR1 0xc1
87 
88 /* NSC/Cyrix CPU indexed register access macros */
89 static __inline
90 UCHAR
91 getCx86(UCHAR reg)
92 {
93     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
94     return READ_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23);
95 }
96 
97 static __inline
98 void
99 setCx86(UCHAR reg, UCHAR data)
100 {
101     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
102     WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data);
103 }
104 
105 /* FUNCTIONS *****************************************************************/
106 
107 CODE_SEG("INIT")
108 ULONG
109 NTAPI
110 KiGetCpuVendor(VOID)
111 {
112     PKPRCB Prcb = KeGetCurrentPrcb();
113     CPU_INFO CpuInfo;
114 
115     /* Get the Vendor ID */
116     KiCpuId(&CpuInfo, 0);
117 
118     /* Copy it to the PRCB and null-terminate it */
119     *(ULONG*)&Prcb->VendorString[0] = CpuInfo.Ebx;
120     *(ULONG*)&Prcb->VendorString[4] = CpuInfo.Edx;
121     *(ULONG*)&Prcb->VendorString[8] = CpuInfo.Ecx;
122     Prcb->VendorString[12] = 0;
123 
124     /* Now check the CPU Type */
125     if (!strcmp(Prcb->VendorString, CmpIntelID))
126     {
127         return CPU_INTEL;
128     }
129     else if (!strcmp(Prcb->VendorString, CmpAmdID))
130     {
131         return CPU_AMD;
132     }
133     else if (!strcmp(Prcb->VendorString, CmpCyrixID))
134     {
135         DPRINT1("Cyrix CPU support not fully tested!\n");
136         return CPU_CYRIX;
137     }
138     else if (!strcmp(Prcb->VendorString, CmpTransmetaID))
139     {
140         DPRINT1("Transmeta CPU support not fully tested!\n");
141         return CPU_TRANSMETA;
142     }
143     else if (!strcmp(Prcb->VendorString, CmpCentaurID))
144     {
145         DPRINT1("Centaur CPU support not fully tested!\n");
146         return CPU_CENTAUR;
147     }
148     else if (!strcmp(Prcb->VendorString, CmpRiseID))
149     {
150         DPRINT1("Rise CPU support not fully tested!\n");
151         return CPU_RISE;
152     }
153 
154     /* Unknown CPU */
155     DPRINT1("%s CPU support not fully tested!\n", Prcb->VendorString);
156     return CPU_UNKNOWN;
157 }
158 
159 CODE_SEG("INIT")
160 VOID
161 NTAPI
162 KiSetProcessorType(VOID)
163 {
164     CPU_INFO CpuInfo;
165     CPU_SIGNATURE CpuSignature;
166     BOOLEAN ExtendModel;
167     ULONG Stepping, Type;
168 
169     /* Do CPUID 1 now */
170     KiCpuId(&CpuInfo, 1);
171 
172     /*
173      * Get the Stepping and Type. The stepping contains both the
174      * Model and the Step, while the Type contains the returned Family.
175      *
176      * For the stepping, we convert this: zzzzzzxy into this: x0y
177      */
178     CpuSignature.AsULONG = CpuInfo.Eax;
179     Stepping = CpuSignature.Model;
180     ExtendModel = (CpuSignature.Family == 15);
181 #if ( (NTDDI_VERSION >= NTDDI_WINXPSP2) && (NTDDI_VERSION < NTDDI_WS03) ) || (NTDDI_VERSION >= NTDDI_WS03SP1)
182     if (CpuSignature.Family == 6)
183     {
184         ULONG Vendor = KiGetCpuVendor();
185         ExtendModel |= (Vendor == CPU_INTEL);
186 #if (NTDDI_VERSION >= NTDDI_WIN8)
187         ExtendModel |= (Vendor == CPU_CENTAUR);
188 #endif
189     }
190 #endif
191     if (ExtendModel)
192     {
193         /* Add ExtendedModel to distinguish from non-extended values. */
194         Stepping |= (CpuSignature.ExtendedModel << 4);
195     }
196     Stepping = (Stepping << 8) | CpuSignature.Step;
197     Type = CpuSignature.Family;
198     if (CpuSignature.Family == 15)
199     {
200         /* Add ExtendedFamily to distinguish from non-extended values.
201          * It must not be larger than 0xF0 to avoid overflow. */
202         Type += min(CpuSignature.ExtendedFamily, 0xF0);
203     }
204 
205     /* Save them in the PRCB */
206     KeGetCurrentPrcb()->CpuID = TRUE;
207     KeGetCurrentPrcb()->CpuType = (UCHAR)Type;
208     KeGetCurrentPrcb()->CpuStep = (USHORT)Stepping;
209 }
210 
211 CODE_SEG("INIT")
212 ULONG
213 NTAPI
214 KiGetFeatureBits(VOID)
215 {
216     PKPRCB Prcb = KeGetCurrentPrcb();
217     ULONG Vendor;
218     ULONG FeatureBits = KF_WORKING_PTE;
219     CPU_INFO CpuInfo, DummyCpuInfo;
220     UCHAR Ccr1;
221     BOOLEAN ExtendedCPUID = TRUE;
222     ULONG CpuFeatures = 0;
223 
224     /* Get the Vendor ID */
225     Vendor = KiGetCpuVendor();
226 
227     /* Make sure we got a valid vendor ID at least. */
228     if (!Vendor) return FeatureBits;
229 
230     /* Get the CPUID Info. Features are in Reg[3]. */
231     KiCpuId(&CpuInfo, 1);
232 
233     /* Set the initial APIC ID */
234     Prcb->InitialApicId = (UCHAR)(CpuInfo.Ebx >> 24);
235 
236     switch (Vendor)
237     {
238         /* Intel CPUs */
239         case CPU_INTEL:
240 
241             /* Check if it's a P6 */
242             if (Prcb->CpuType == 6)
243             {
244                 /* Perform the special sequence to get the MicroCode Signature */
245                 __writemsr(0x8B, 0);
246                 KiCpuId(&DummyCpuInfo, 1);
247                 Prcb->UpdateSignature.QuadPart = __readmsr(0x8B);
248             }
249             else if (Prcb->CpuType == 5)
250             {
251                 /* On P5, enable workaround for the LOCK errata. */
252                 KiI386PentiumLockErrataPresent = TRUE;
253             }
254 
255             /* Check for broken P6 with bad SMP PTE implementation */
256             if (((CpuInfo.Eax & 0x0FF0) == 0x0610 && (CpuInfo.Eax & 0x000F) <= 0x9) ||
257                 ((CpuInfo.Eax & 0x0FF0) == 0x0630 && (CpuInfo.Eax & 0x000F) <= 0x4))
258             {
259                 /* Remove support for correct PTE support. */
260                 FeatureBits &= ~KF_WORKING_PTE;
261             }
262 
263             /* Check if the CPU is too old to support SYSENTER */
264             if ((Prcb->CpuType < 6) ||
265                 ((Prcb->CpuType == 6) && (Prcb->CpuStep < 0x0303)))
266             {
267                 /* Disable it */
268                 CpuInfo.Edx &= ~0x800;
269             }
270 
271             break;
272 
273         /* AMD CPUs */
274         case CPU_AMD:
275 
276             /* Check if this is a K5 or K6. (family 5) */
277             if ((CpuInfo.Eax & 0x0F00) == 0x0500)
278             {
279                 /* Get the Model Number */
280                 switch (CpuInfo.Eax & 0x00F0)
281                 {
282                     /* Model 1: K5 - 5k86 (initial models) */
283                     case 0x0010:
284 
285                         /* Check if this is Step 0 or 1. They don't support PGE */
286                         if ((CpuInfo.Eax & 0x000F) > 0x03) break;
287 
288                     /* Model 0: K5 - SSA5 */
289                     case 0x0000:
290 
291                         /* Model 0 doesn't support PGE at all. */
292                         CpuInfo.Edx &= ~0x2000;
293                         break;
294 
295                     /* Model 8: K6-2 */
296                     case 0x0080:
297 
298                         /* K6-2, Step 8 and over have support for MTRR. */
299                         if ((CpuInfo.Eax & 0x000F) >= 0x8) FeatureBits |= KF_AMDK6MTRR;
300                         break;
301 
302                     /* Model 9: K6-III
303                        Model D: K6-2+, K6-III+ */
304                     case 0x0090:
305                     case 0x00D0:
306 
307                         FeatureBits |= KF_AMDK6MTRR;
308                         break;
309                 }
310             }
311             else if((CpuInfo.Eax & 0x0F00) < 0x0500)
312             {
313                 /* Families below 5 don't support PGE, PSE or CMOV at all */
314                 CpuInfo.Edx &= ~(0x08 | 0x2000 | 0x8000);
315 
316                 /* They also don't support advanced CPUID functions. */
317                 ExtendedCPUID = FALSE;
318             }
319 
320             break;
321 
322         /* Cyrix CPUs */
323         case CPU_CYRIX:
324 
325             /* Workaround the "COMA" bug on 6x family of Cyrix CPUs */
326             if (Prcb->CpuType == 6 &&
327                 Prcb->CpuStep <= 1)
328             {
329                 /* Get CCR1 value */
330                 Ccr1 = getCx86(CX86_CCR1);
331 
332                 /* Enable the NO_LOCK bit */
333                 Ccr1 |= 0x10;
334 
335                 /* Set the new CCR1 value */
336                 setCx86(CX86_CCR1, Ccr1);
337             }
338 
339             break;
340 
341         /* Transmeta CPUs */
342         case CPU_TRANSMETA:
343 
344             /* Enable CMPXCHG8B if the family (>= 5), model and stepping (>= 4.2) support it */
345             if ((CpuInfo.Eax & 0x0FFF) >= 0x0542)
346             {
347                 __writemsr(0x80860004, __readmsr(0x80860004) | 0x0100);
348                 FeatureBits |= KF_CMPXCHG8B;
349             }
350 
351             break;
352 
353         /* Centaur, IDT, Rise and VIA CPUs */
354         case CPU_CENTAUR:
355         case CPU_RISE:
356 
357             /* These CPUs don't report the presence of CMPXCHG8B through CPUID.
358                However, this feature exists and operates properly without any additional steps. */
359             FeatureBits |= KF_CMPXCHG8B;
360 
361             break;
362     }
363 
364     /* Set the current features */
365     CpuFeatures = CpuInfo.Edx;
366 
367     /* Convert all CPUID Feature bits into our format */
368     if (CpuFeatures & 0x00000002) FeatureBits |= KF_V86_VIS | KF_CR4;
369     if (CpuFeatures & 0x00000008) FeatureBits |= KF_LARGE_PAGE | KF_CR4;
370     if (CpuFeatures & 0x00000010) FeatureBits |= KF_RDTSC;
371     if (CpuFeatures & 0x00000100) FeatureBits |= KF_CMPXCHG8B;
372     if (CpuFeatures & 0x00000800) FeatureBits |= KF_FAST_SYSCALL;
373     if (CpuFeatures & 0x00001000) FeatureBits |= KF_MTRR;
374     if (CpuFeatures & 0x00002000) FeatureBits |= KF_GLOBAL_PAGE | KF_CR4;
375     if (CpuFeatures & 0x00008000) FeatureBits |= KF_CMOV;
376     if (CpuFeatures & 0x00010000) FeatureBits |= KF_PAT;
377     if (CpuFeatures & 0x00200000) FeatureBits |= KF_DTS;
378     if (CpuFeatures & 0x00800000) FeatureBits |= KF_MMX;
379     if (CpuFeatures & 0x01000000) FeatureBits |= KF_FXSR;
380     if (CpuFeatures & 0x02000000) FeatureBits |= KF_XMMI;
381     if (CpuFeatures & 0x04000000) FeatureBits |= KF_XMMI64;
382 
383     if (CpuFeatures & 0x00000040)
384     {
385         DPRINT1("Support PAE\n");
386     }
387 
388     /* Check if the CPU has hyper-threading */
389     if (CpuFeatures & 0x10000000)
390     {
391         /* Set the number of logical CPUs */
392         Prcb->LogicalProcessorsPerPhysicalProcessor = (UCHAR)(CpuInfo.Ebx >> 16);
393         if (Prcb->LogicalProcessorsPerPhysicalProcessor > 1)
394         {
395             /* We're on dual-core */
396             KiSMTProcessorsPresent = TRUE;
397         }
398     }
399     else
400     {
401         /* We only have a single CPU */
402         Prcb->LogicalProcessorsPerPhysicalProcessor = 1;
403     }
404 
405     /* Check if CPUID 0x80000000 is supported */
406     if (ExtendedCPUID)
407     {
408         /* Do the call */
409         KiCpuId(&CpuInfo, 0x80000000);
410         if ((CpuInfo.Eax & 0xffffff00) == 0x80000000)
411         {
412             /* Check if CPUID 0x80000001 is supported */
413             if (CpuInfo.Eax >= 0x80000001)
414             {
415                 /* Check which extended features are available. */
416                 KiCpuId(&CpuInfo, 0x80000001);
417 
418                 /* Check if NX-bit is supported */
419                 if (CpuInfo.Edx & 0x00100000) FeatureBits |= KF_NX_BIT;
420 
421                 /* Now handle each features for each CPU Vendor */
422                 switch (Vendor)
423                 {
424                     case CPU_AMD:
425                     case CPU_CENTAUR:
426                         if (CpuInfo.Edx & 0x80000000) FeatureBits |= KF_3DNOW;
427                         break;
428                 }
429             }
430         }
431     }
432 
433 #define print_supported(kf_value) ((FeatureBits & kf_value) ? #kf_value : "")
434     DPRINT1("Supported CPU features : %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s %s\n",
435     print_supported(KF_V86_VIS),
436     print_supported(KF_RDTSC),
437     print_supported(KF_CR4),
438     print_supported(KF_CMOV),
439     print_supported(KF_GLOBAL_PAGE),
440     print_supported(KF_LARGE_PAGE),
441     print_supported(KF_MTRR),
442     print_supported(KF_CMPXCHG8B),
443     print_supported(KF_MMX),
444     print_supported(KF_WORKING_PTE),
445     print_supported(KF_PAT),
446     print_supported(KF_FXSR),
447     print_supported(KF_FAST_SYSCALL),
448     print_supported(KF_XMMI),
449     print_supported(KF_3DNOW),
450     print_supported(KF_AMDK6MTRR),
451     print_supported(KF_XMMI64),
452     print_supported(KF_DTS),
453     print_supported(KF_NX_BIT),
454     print_supported(KF_NX_DISABLED),
455     print_supported(KF_NX_ENABLED));
456 #undef print_supported
457 
458     /* Return the Feature Bits */
459     return FeatureBits;
460 }
461 
462 CODE_SEG("INIT")
463 VOID
464 NTAPI
465 KiGetCacheInformation(VOID)
466 {
467     PKIPCR Pcr = (PKIPCR)KeGetPcr();
468     CPU_INFO CpuInfo;
469     ULONG CacheRequests = 0, i;
470     ULONG CurrentRegister;
471     UCHAR RegisterByte, Associativity = 0;
472     ULONG Size, CacheLine = 64, CurrentSize = 0;
473     BOOLEAN FirstPass = TRUE;
474 
475     /* Set default L2 size */
476     Pcr->SecondLevelCacheSize = 0;
477 
478     /* Check the Vendor ID */
479     switch (KiGetCpuVendor())
480     {
481         /* Handle Intel case */
482         case CPU_INTEL:
483 
484             /*Check if we support CPUID 2 */
485             KiCpuId(&CpuInfo, 0);
486             if (CpuInfo.Eax >= 2)
487             {
488                 /* We need to loop for the number of times CPUID will tell us to */
489                 do
490                 {
491                     /* Do the CPUID call */
492                     KiCpuId(&CpuInfo, 2);
493 
494                     /* Check if it was the first call */
495                     if (FirstPass)
496                     {
497                         /*
498                          * The number of times to loop is the first byte. Read
499                          * it and then destroy it so we don't get confused.
500                          */
501                         CacheRequests = CpuInfo.Eax & 0xFF;
502                         CpuInfo.Eax &= 0xFFFFFF00;
503 
504                         /* Don't go over this again */
505                         FirstPass = FALSE;
506                     }
507 
508                     /* Loop all 4 registers */
509                     for (i = 0; i < 4; i++)
510                     {
511                         /* Get the current register */
512                         CurrentRegister = CpuInfo.AsUINT32[i];
513 
514                         /*
515                          * If the upper bit is set, then this register should
516                          * be skipped.
517                          */
518                         if (CurrentRegister & 0x80000000) continue;
519 
520                         /* Keep looping for every byte inside this register */
521                         while (CurrentRegister)
522                         {
523                             /* Read a byte, skip a byte. */
524                             RegisterByte = (UCHAR)(CurrentRegister & 0xFF);
525                             CurrentRegister >>= 8;
526                             if (!RegisterByte) continue;
527 
528                             Size = 0;
529                             switch (RegisterByte)
530                             {
531                                 case 0x06:
532                                 case 0x08:
533                                     KePrefetchNTAGranularity = 32;
534                                     break;
535                                 case 0x09:
536                                     KePrefetchNTAGranularity = 64;
537                                     break;
538                                 case 0x0a:
539                                 case 0x0c:
540                                     KePrefetchNTAGranularity = 32;
541                                     break;
542                                 case 0x0d:
543                                 case 0x0e:
544                                     KePrefetchNTAGranularity = 64;
545                                     break;
546                                 case 0x1d:
547                                     Size = 128 * 1024;
548                                     Associativity = 2;
549                                     break;
550                                 case 0x21:
551                                     Size = 256 * 1024;
552                                     Associativity = 8;
553                                     break;
554                                 case 0x24:
555                                     Size = 1024 * 1024;
556                                     Associativity = 16;
557                                     break;
558                                 case 0x2c:
559                                 case 0x30:
560                                     KePrefetchNTAGranularity = 64;
561                                     break;
562                                 case 0x41:
563                                 case 0x42:
564                                 case 0x43:
565                                 case 0x44:
566                                 case 0x45:
567                                     Size = (1 << (RegisterByte - 0x41)) * 128 * 1024;
568                                     Associativity = 4;
569                                     break;
570                                 case 0x48:
571                                     Size = 3 * 1024 * 1024;
572                                     Associativity = 12;
573                                     break;
574                                 case 0x49:
575                                     Size = 4 * 1024 * 1024;
576                                     Associativity = 16;
577                                     break;
578                                 case 0x4e:
579                                     Size = 6 * 1024 * 1024;
580                                     Associativity = 24;
581                                     break;
582                                 case 0x60:
583                                 case 0x66:
584                                 case 0x67:
585                                 case 0x68:
586                                     KePrefetchNTAGranularity = 64;
587                                     break;
588                                 case 0x78:
589                                     Size = 1024 * 1024;
590                                     Associativity = 4;
591                                     break;
592                                 case 0x79:
593                                 case 0x7a:
594                                 case 0x7b:
595                                 case 0x7c:
596                                 case 0x7d:
597                                     Size = (1 << (RegisterByte - 0x79)) * 128 * 1024;
598                                     Associativity = 8;
599                                     break;
600                                 case 0x7f:
601                                     Size = 512 * 1024;
602                                     Associativity = 2;
603                                     break;
604                                 case 0x80:
605                                     Size = 512 * 1024;
606                                     Associativity = 8;
607                                     break;
608                                 case 0x82:
609                                 case 0x83:
610                                 case 0x84:
611                                 case 0x85:
612                                     Size = (1 << (RegisterByte - 0x82)) * 256 * 1024;
613                                     Associativity = 8;
614                                     break;
615                                 case 0x86:
616                                     Size = 512 * 1024;
617                                     Associativity = 4;
618                                     break;
619                                 case 0x87:
620                                     Size = 1024 * 1024;
621                                     Associativity = 8;
622                                     break;
623                                 case 0xf0:
624                                     KePrefetchNTAGranularity = 64;
625                                     break;
626                                 case 0xf1:
627                                     KePrefetchNTAGranularity = 128;
628                                     break;
629                             }
630                             if (Size && (Size / Associativity) > CurrentSize)
631                             {
632                                 /* Set the L2 Cache Size and Associativity */
633                                 CurrentSize = Size / Associativity;
634                                 Pcr->SecondLevelCacheSize = Size;
635                                 Pcr->SecondLevelCacheAssociativity = Associativity;
636                             }
637                         }
638                     }
639                 } while (--CacheRequests);
640             }
641             break;
642 
643         case CPU_AMD:
644 
645             /* Check if we support CPUID 0x80000005 */
646             KiCpuId(&CpuInfo, 0x80000000);
647             if (CpuInfo.Eax >= 0x80000005)
648             {
649                 /* Get L1 size first */
650                 KiCpuId(&CpuInfo, 0x80000005);
651                 KePrefetchNTAGranularity = CpuInfo.Ecx & 0xFF;
652 
653                 /* Check if we support CPUID 0x80000006 */
654                 KiCpuId(&CpuInfo, 0x80000000);
655                 if (CpuInfo.Eax >= 0x80000006)
656                 {
657                     /* Get 2nd level cache and tlb size */
658                     KiCpuId(&CpuInfo, 0x80000006);
659 
660                     /* Cache line size */
661                     CacheLine = CpuInfo.Ecx & 0xFF;
662 
663                     /* Hardcode associativity */
664                     RegisterByte = (CpuInfo.Ecx >> 12) & 0xFF;
665                     switch (RegisterByte)
666                     {
667                         case 2:
668                             Associativity = 2;
669                             break;
670 
671                         case 4:
672                             Associativity = 4;
673                             break;
674 
675                         case 6:
676                             Associativity = 8;
677                             break;
678 
679                         case 8:
680                         case 15:
681                             Associativity = 16;
682                             break;
683 
684                         default:
685                             Associativity = 1;
686                             break;
687                     }
688 
689                     /* Compute size */
690                     Size = (CpuInfo.Ecx >> 16) << 10;
691 
692                     /* Hack for Model 6, Steping 300 */
693                     if ((KeGetCurrentPrcb()->CpuType == 6) &&
694                         (KeGetCurrentPrcb()->CpuStep == 0x300))
695                     {
696                         /* Stick 64K in there */
697                         Size = 64 * 1024;
698                     }
699 
700                     /* Set the L2 Cache Size and associativity */
701                     Pcr->SecondLevelCacheSize = Size;
702                     Pcr->SecondLevelCacheAssociativity = Associativity;
703                 }
704             }
705             break;
706 
707         case CPU_CYRIX:
708         case CPU_TRANSMETA:
709         case CPU_CENTAUR:
710         case CPU_RISE:
711 
712             /* FIXME */
713             break;
714     }
715 
716     /* Set the cache line */
717     if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine;
718     DPRINT1("Prefetch Cache: %lu bytes\tL2 Cache: %lu bytes\tL2 Cache Line: %lu bytes\tL2 Cache Associativity: %lu\n",
719             KePrefetchNTAGranularity,
720             Pcr->SecondLevelCacheSize,
721             KeLargestCacheLine,
722             Pcr->SecondLevelCacheAssociativity);
723 }
724 
725 CODE_SEG("INIT")
726 VOID
727 NTAPI
728 KiSetCR0Bits(VOID)
729 {
730     ULONG Cr0;
731 
732     /* Save current CR0 */
733     Cr0 = __readcr0();
734 
735     /* If this is a 486, enable Write-Protection */
736     if (KeGetCurrentPrcb()->CpuType > 3) Cr0 |= CR0_WP;
737 
738     /* Set new Cr0 */
739     __writecr0(Cr0);
740 }
741 
742 CODE_SEG("INIT")
743 VOID
744 NTAPI
745 KiInitializeTSS2(IN PKTSS Tss,
746                  IN PKGDTENTRY TssEntry OPTIONAL)
747 {
748     PUCHAR p;
749 
750     /* Make sure the GDT Entry is valid */
751     if (TssEntry)
752     {
753         /* Set the Limit */
754         TssEntry->LimitLow = sizeof(KTSS) - 1;
755         TssEntry->HighWord.Bits.LimitHi = 0;
756     }
757 
758     /* Now clear the I/O Map */
759     ASSERT(IOPM_COUNT == 1);
760     RtlFillMemory(Tss->IoMaps[0].IoMap, IOPM_FULL_SIZE, 0xFF);
761 
762     /* Initialize Interrupt Direction Maps */
763     p = (PUCHAR)(Tss->IoMaps[0].DirectionMap);
764     RtlZeroMemory(p, IOPM_DIRECTION_MAP_SIZE);
765 
766     /* Add DPMI support for interrupts */
767     p[0] = 4;
768     p[3] = 0x18;
769     p[4] = 0x18;
770 
771     /* Initialize the default Interrupt Direction Map */
772     p = Tss->IntDirectionMap;
773     RtlZeroMemory(Tss->IntDirectionMap, IOPM_DIRECTION_MAP_SIZE);
774 
775     /* Add DPMI support */
776     p[0] = 4;
777     p[3] = 0x18;
778     p[4] = 0x18;
779 }
780 
781 VOID
782 NTAPI
783 KiInitializeTSS(IN PKTSS Tss)
784 {
785     /* Set an invalid map base */
786     Tss->IoMapBase = KiComputeIopmOffset(IO_ACCESS_MAP_NONE);
787 
788     /* Disable traps during Task Switches */
789     Tss->Flags = 0;
790 
791     /* Set LDT and Ring 0 SS */
792     Tss->LDT = 0;
793     Tss->Ss0 = KGDT_R0_DATA;
794 }
795 
796 CODE_SEG("INIT")
797 VOID
798 FASTCALL
799 Ki386InitializeTss(IN PKTSS Tss,
800                    IN PKIDTENTRY Idt,
801                    IN PKGDTENTRY Gdt)
802 {
803     PKGDTENTRY TssEntry, TaskGateEntry;
804 
805     /* Initialize the boot TSS. */
806     TssEntry = &Gdt[KGDT_TSS / sizeof(KGDTENTRY)];
807     TssEntry->HighWord.Bits.Type = I386_TSS;
808     TssEntry->HighWord.Bits.Pres = 1;
809     TssEntry->HighWord.Bits.Dpl = 0;
810     KiInitializeTSS2(Tss, TssEntry);
811     KiInitializeTSS(Tss);
812 
813     /* Load the task register */
814     Ke386SetTr(KGDT_TSS);
815 
816     /* Setup the Task Gate for Double Fault Traps */
817     TaskGateEntry = (PKGDTENTRY)&Idt[8];
818     TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
819     TaskGateEntry->HighWord.Bits.Pres = 1;
820     TaskGateEntry->HighWord.Bits.Dpl = 0;
821     ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_DF_TSS;
822 
823     /* Initialize the TSS used for handling double faults. */
824     Tss = (PKTSS)KiDoubleFaultTSS;
825     KiInitializeTSS(Tss);
826     Tss->CR3 = __readcr3();
827     Tss->Esp0 = KiDoubleFaultStack;
828     Tss->Esp = KiDoubleFaultStack;
829     Tss->Eip = PtrToUlong(KiTrap08);
830     Tss->Cs = KGDT_R0_CODE;
831     Tss->Fs = KGDT_R0_PCR;
832     Tss->Ss = Ke386GetSs();
833     Tss->Es = KGDT_R3_DATA | RPL_MASK;
834     Tss->Ds = KGDT_R3_DATA | RPL_MASK;
835 
836     /* Setup the Double Trap TSS entry in the GDT */
837     TssEntry = &Gdt[KGDT_DF_TSS / sizeof(KGDTENTRY)];
838     TssEntry->HighWord.Bits.Type = I386_TSS;
839     TssEntry->HighWord.Bits.Pres = 1;
840     TssEntry->HighWord.Bits.Dpl = 0;
841     TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
842     TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
843     TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
844     TssEntry->LimitLow = KTSS_IO_MAPS;
845 
846     /* Now setup the NMI Task Gate */
847     TaskGateEntry = (PKGDTENTRY)&Idt[2];
848     TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
849     TaskGateEntry->HighWord.Bits.Pres = 1;
850     TaskGateEntry->HighWord.Bits.Dpl = 0;
851     ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_NMI_TSS;
852 
853     /* Initialize the actual TSS */
854     Tss = (PKTSS)KiNMITSS;
855     KiInitializeTSS(Tss);
856     Tss->CR3 = __readcr3();
857     Tss->Esp0 = KiDoubleFaultStack;
858     Tss->Esp = KiDoubleFaultStack;
859     Tss->Eip = PtrToUlong(KiTrap02);
860     Tss->Cs = KGDT_R0_CODE;
861     Tss->Fs = KGDT_R0_PCR;
862     Tss->Ss = Ke386GetSs();
863     Tss->Es = KGDT_R3_DATA | RPL_MASK;
864     Tss->Ds = KGDT_R3_DATA | RPL_MASK;
865 
866     /* And its associated TSS Entry */
867     TssEntry = &Gdt[KGDT_NMI_TSS / sizeof(KGDTENTRY)];
868     TssEntry->HighWord.Bits.Type = I386_TSS;
869     TssEntry->HighWord.Bits.Pres = 1;
870     TssEntry->HighWord.Bits.Dpl = 0;
871     TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
872     TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
873     TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
874     TssEntry->LimitLow = KTSS_IO_MAPS;
875 }
876 
877 VOID
878 NTAPI
879 KeFlushCurrentTb(VOID)
880 {
881 
882 #if !defined(_GLOBAL_PAGES_ARE_AWESOME_)
883 
884     /* Flush the TLB by resetting CR3 */
885     __writecr3(__readcr3());
886 
887 #else
888 
889     /* Check if global pages are enabled */
890     if (KeFeatureBits & KF_GLOBAL_PAGE)
891     {
892         ULONG Cr4;
893 
894         /* Disable PGE (Note: may not have been enabled yet) */
895         Cr4 = __readcr4();
896         __writecr4(Cr4 & ~CR4_PGE);
897 
898         /* Flush everything */
899         __writecr3(__readcr3());
900 
901         /* Re-enable PGE */
902         __writecr4(Cr4);
903     }
904     else
905     {
906         /* No global pages, resetting CR3 is enough */
907         __writecr3(__readcr3());
908     }
909 
910 #endif
911 
912 }
913 
914 VOID
915 NTAPI
916 KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState)
917 {
918     PKGDTENTRY TssEntry;
919 
920     //
921     // Restore the CR registers
922     //
923     __writecr0(ProcessorState->SpecialRegisters.Cr0);
924     Ke386SetCr2(ProcessorState->SpecialRegisters.Cr2);
925     __writecr3(ProcessorState->SpecialRegisters.Cr3);
926     if (KeFeatureBits & KF_CR4) __writecr4(ProcessorState->SpecialRegisters.Cr4);
927 
928     //
929     // Restore the DR registers
930     //
931     __writedr(0, ProcessorState->SpecialRegisters.KernelDr0);
932     __writedr(1, ProcessorState->SpecialRegisters.KernelDr1);
933     __writedr(2, ProcessorState->SpecialRegisters.KernelDr2);
934     __writedr(3, ProcessorState->SpecialRegisters.KernelDr3);
935     __writedr(6, ProcessorState->SpecialRegisters.KernelDr6);
936     __writedr(7, ProcessorState->SpecialRegisters.KernelDr7);
937 
938     //
939     // Restore GDT and IDT
940     //
941     Ke386SetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
942     __lidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
943 
944     //
945     // Clear the busy flag so we don't crash if we reload the same selector
946     //
947     TssEntry = (PKGDTENTRY)(ProcessorState->SpecialRegisters.Gdtr.Base +
948                             ProcessorState->SpecialRegisters.Tr);
949     TssEntry->HighWord.Bytes.Flags1 &= ~0x2;
950 
951     //
952     // Restore TSS and LDT
953     //
954     Ke386SetTr(ProcessorState->SpecialRegisters.Tr);
955     Ke386SetLocalDescriptorTable(ProcessorState->SpecialRegisters.Ldtr);
956 }
957 
958 VOID
959 NTAPI
960 KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)
961 {
962     /* Save the CR registers */
963     ProcessorState->SpecialRegisters.Cr0 = __readcr0();
964     ProcessorState->SpecialRegisters.Cr2 = __readcr2();
965     ProcessorState->SpecialRegisters.Cr3 = __readcr3();
966     ProcessorState->SpecialRegisters.Cr4 = (KeFeatureBits & KF_CR4) ?
967                                            __readcr4() : 0;
968 
969     /* Save the DR registers */
970     ProcessorState->SpecialRegisters.KernelDr0 = __readdr(0);
971     ProcessorState->SpecialRegisters.KernelDr1 = __readdr(1);
972     ProcessorState->SpecialRegisters.KernelDr2 = __readdr(2);
973     ProcessorState->SpecialRegisters.KernelDr3 = __readdr(3);
974     ProcessorState->SpecialRegisters.KernelDr6 = __readdr(6);
975     ProcessorState->SpecialRegisters.KernelDr7 = __readdr(7);
976     __writedr(7, 0);
977 
978     /* Save GDT, IDT, LDT and TSS */
979     Ke386GetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
980     __sidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
981     ProcessorState->SpecialRegisters.Tr = Ke386GetTr();
982     Ke386GetLocalDescriptorTable(&ProcessorState->SpecialRegisters.Ldtr);
983 }
984 
985 CODE_SEG("INIT")
986 VOID
987 NTAPI
988 KiInitializeMachineType(VOID)
989 {
990     /* Set the Machine Type we got from NTLDR */
991     KeI386MachineType = KeLoaderBlock->u.I386.MachineType & 0x000FF;
992 }
993 
994 CODE_SEG("INIT")
995 ULONG_PTR
996 NTAPI
997 KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)
998 {
999     /* Set CS and ESP */
1000     __writemsr(0x174, KGDT_R0_CODE);
1001     __writemsr(0x175, (ULONG_PTR)KeGetCurrentPrcb()->DpcStack);
1002 
1003     /* Set LSTAR */
1004     __writemsr(0x176, (ULONG_PTR)KiFastCallEntry);
1005     return 0;
1006 }
1007 
1008 CODE_SEG("INIT")
1009 VOID
1010 NTAPI
1011 KiRestoreFastSyscallReturnState(VOID)
1012 {
1013     /* Check if the CPU Supports fast system call */
1014     if (KeFeatureBits & KF_FAST_SYSCALL)
1015     {
1016         /* Check if it has been disabled */
1017         if (KiFastSystemCallDisable)
1018         {
1019             /* Disable fast system call */
1020             KeFeatureBits &= ~KF_FAST_SYSCALL;
1021             KiFastCallExitHandler = KiSystemCallTrapReturn;
1022             DPRINT1("Support for SYSENTER disabled.\n");
1023         }
1024         else
1025         {
1026             /* Do an IPI to enable it */
1027             KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
1028 
1029             /* It's enabled, so use the proper exit stub */
1030             KiFastCallExitHandler = KiSystemCallSysExitReturn;
1031             DPRINT("Support for SYSENTER detected.\n");
1032         }
1033     }
1034     else
1035     {
1036         /* Use the IRET handler */
1037         KiFastCallExitHandler = KiSystemCallTrapReturn;
1038         DPRINT1("No support for SYSENTER detected.\n");
1039     }
1040 }
1041 
1042 CODE_SEG("INIT")
1043 ULONG_PTR
1044 NTAPI
1045 Ki386EnableDE(IN ULONG_PTR Context)
1046 {
1047     /* Enable DE */
1048     __writecr4(__readcr4() | CR4_DE);
1049     return 0;
1050 }
1051 
1052 CODE_SEG("INIT")
1053 ULONG_PTR
1054 NTAPI
1055 Ki386EnableFxsr(IN ULONG_PTR Context)
1056 {
1057     /* Enable FXSR */
1058     __writecr4(__readcr4() | CR4_FXSR);
1059     return 0;
1060 }
1061 
1062 CODE_SEG("INIT")
1063 ULONG_PTR
1064 NTAPI
1065 Ki386EnableXMMIExceptions(IN ULONG_PTR Context)
1066 {
1067     PKIDTENTRY IdtEntry;
1068 
1069     /* Get the IDT Entry for Interrupt 0x13 */
1070     IdtEntry = &((PKIPCR)KeGetPcr())->IDT[0x13];
1071 
1072     /* Set it up */
1073     IdtEntry->Selector = KGDT_R0_CODE;
1074     IdtEntry->Offset = ((ULONG_PTR)KiTrap13 & 0xFFFF);
1075     IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap13 >> 16) & 0xFFFF;
1076     ((PKIDT_ACCESS)&IdtEntry->Access)->Dpl = 0;
1077     ((PKIDT_ACCESS)&IdtEntry->Access)->Present = 1;
1078     ((PKIDT_ACCESS)&IdtEntry->Access)->SegmentType = I386_INTERRUPT_GATE;
1079 
1080     /* Enable XMMI exceptions */
1081     __writecr4(__readcr4() | CR4_XMMEXCPT);
1082     return 0;
1083 }
1084 
1085 CODE_SEG("INIT")
1086 VOID
1087 NTAPI
1088 KiI386PentiumLockErrataFixup(VOID)
1089 {
1090     KDESCRIPTOR IdtDescriptor = {0, 0, 0};
1091     PKIDTENTRY NewIdt, NewIdt2;
1092     PMMPTE PointerPte;
1093 
1094     /* Allocate memory for a new IDT */
1095     NewIdt = ExAllocatePool(NonPagedPool, 2 * PAGE_SIZE);
1096 
1097     /* Put everything after the first 7 entries on a new page */
1098     NewIdt2 = (PVOID)((ULONG_PTR)NewIdt + PAGE_SIZE - (7 * sizeof(KIDTENTRY)));
1099 
1100     /* Disable interrupts */
1101     _disable();
1102 
1103     /* Get the current IDT and copy it */
1104     __sidt(&IdtDescriptor.Limit);
1105     RtlCopyMemory(NewIdt2,
1106                   (PVOID)IdtDescriptor.Base,
1107                   IdtDescriptor.Limit + 1);
1108     IdtDescriptor.Base = (ULONG)NewIdt2;
1109 
1110     /* Set the new IDT */
1111     __lidt(&IdtDescriptor.Limit);
1112     ((PKIPCR)KeGetPcr())->IDT = NewIdt2;
1113 
1114     /* Restore interrupts */
1115     _enable();
1116 
1117     /* Set the first 7 entries as read-only to produce a fault */
1118     PointerPte = MiAddressToPte(NewIdt);
1119     ASSERT(PointerPte->u.Hard.Write == 1);
1120     PointerPte->u.Hard.Write = 0;
1121     KeInvalidateTlbEntry(NewIdt);
1122 }
1123 
1124 BOOLEAN
1125 NTAPI
1126 KeInvalidateAllCaches(VOID)
1127 {
1128     /* Only supported on Pentium Pro and higher */
1129     if (KeI386CpuType < 6) return FALSE;
1130 
1131     /* Invalidate all caches */
1132     __wbinvd();
1133     return TRUE;
1134 }
1135 
1136 VOID
1137 NTAPI
1138 KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,
1139                      IN PKEXCEPTION_FRAME ExceptionFrame)
1140 {
1141     PKPRCB Prcb = KeGetCurrentPrcb();
1142 
1143     //
1144     // Save full context
1145     //
1146     Prcb->ProcessorState.ContextFrame.ContextFlags = CONTEXT_FULL |
1147                                                      CONTEXT_DEBUG_REGISTERS;
1148     KeTrapFrameToContext(TrapFrame, NULL, &Prcb->ProcessorState.ContextFrame);
1149 
1150     //
1151     // Save control registers
1152     //
1153     KiSaveProcessorControlState(&Prcb->ProcessorState);
1154 }
1155 
1156 CODE_SEG("INIT")
1157 BOOLEAN
1158 NTAPI
1159 KiIsNpxErrataPresent(VOID)
1160 {
1161     static double Value1 = 4195835.0, Value2 = 3145727.0;
1162     INT ErrataPresent;
1163     ULONG Cr0;
1164 
1165     /* Interrupts have to be disabled here. */
1166     ASSERT(!(__readeflags() & EFLAGS_INTERRUPT_MASK));
1167 
1168     /* Read CR0 and remove FPU flags */
1169     Cr0 = __readcr0();
1170     __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1171 
1172     /* Initialize FPU state */
1173     Ke386FnInit();
1174 
1175     /* Multiply the magic values and divide, we should get the result back */
1176 #ifdef __GNUC__
1177     __asm__ __volatile__
1178     (
1179         "fldl %1\n\t"
1180         "fdivl %2\n\t"
1181         "fmull %2\n\t"
1182         "fldl %1\n\t"
1183         "fsubp\n\t"
1184         "fistpl %0\n\t"
1185         : "=m" (ErrataPresent)
1186         : "m" (Value1),
1187           "m" (Value2)
1188     );
1189 #else
1190     __asm
1191     {
1192         fld Value1
1193         fdiv Value2
1194         fmul Value2
1195         fld Value1
1196         fsubp st(1), st(0)
1197         fistp ErrataPresent
1198     };
1199 #endif
1200 
1201     /* Restore CR0 */
1202     __writecr0(Cr0);
1203 
1204     /* Return if there's an errata */
1205     return ErrataPresent != 0;
1206 }
1207 
1208 VOID
1209 NTAPI
1210 KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)
1211 {
1212     ULONG EFlags, Cr0;
1213     PKTHREAD Thread, NpxThread;
1214     PFX_SAVE_AREA FxSaveArea;
1215 
1216     /* Save volatiles and disable interrupts */
1217     EFlags = __readeflags();
1218     _disable();
1219 
1220     /* Save the PCR and get the current thread */
1221     Thread = KeGetCurrentThread();
1222 
1223     /* Check if we're already loaded */
1224     if (Thread->NpxState != NPX_STATE_LOADED)
1225     {
1226         /* If there's nothing to load, quit */
1227         if (!SaveArea)
1228         {
1229             /* Restore interrupt state and return */
1230             __writeeflags(EFlags);
1231             return;
1232         }
1233 
1234         /* Need FXSR support for this */
1235         ASSERT(KeI386FxsrPresent == TRUE);
1236 
1237         /* Check for sane CR0 */
1238         Cr0 = __readcr0();
1239         if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1240         {
1241             /* Mask out FPU flags */
1242             __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1243         }
1244 
1245         /* Get the NPX thread and check its FPU state */
1246         NpxThread = KeGetCurrentPrcb()->NpxThread;
1247         if ((NpxThread) && (NpxThread->NpxState == NPX_STATE_LOADED))
1248         {
1249             /* Get the FX frame and store the state there */
1250             FxSaveArea = KiGetThreadNpxArea(NpxThread);
1251             Ke386FxSave(FxSaveArea);
1252 
1253             /* NPX thread has lost its state */
1254             NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1255         }
1256 
1257         /* Now load NPX state from the NPX area */
1258         FxSaveArea = KiGetThreadNpxArea(Thread);
1259         Ke386FxStore(FxSaveArea);
1260     }
1261     else
1262     {
1263         /* Check for sane CR0 */
1264         Cr0 = __readcr0();
1265         if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1266         {
1267             /* Mask out FPU flags */
1268             __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1269         }
1270 
1271         /* Get FX frame */
1272         FxSaveArea = KiGetThreadNpxArea(Thread);
1273         Thread->NpxState = NPX_STATE_NOT_LOADED;
1274 
1275         /* Save state if supported by CPU */
1276         if (KeI386FxsrPresent) Ke386FxSave(FxSaveArea);
1277     }
1278 
1279     /* Now save the FN state wherever it was requested */
1280     if (SaveArea) Ke386FnSave(SaveArea);
1281 
1282     /* Clear NPX thread */
1283     KeGetCurrentPrcb()->NpxThread = NULL;
1284 
1285     /* Add the CR0 from the NPX frame */
1286     Cr0 |= NPX_STATE_NOT_LOADED;
1287     Cr0 |= FxSaveArea->Cr0NpxState;
1288     __writecr0(Cr0);
1289 
1290     /* Restore interrupt state */
1291     __writeeflags(EFlags);
1292 }
1293 
1294 /* PUBLIC FUNCTIONS **********************************************************/
1295 
1296 /*
1297  * @implemented
1298  */
1299 VOID
1300 NTAPI
1301 KiCoprocessorError(VOID)
1302 {
1303     PFX_SAVE_AREA NpxArea;
1304 
1305     /* Get the FPU area */
1306     NpxArea = KiGetThreadNpxArea(KeGetCurrentThread());
1307 
1308     /* Set CR0_TS */
1309     NpxArea->Cr0NpxState = CR0_TS;
1310     __writecr0(__readcr0() | CR0_TS);
1311 }
1312 
1313 /**
1314  * @brief
1315  * Saves the current floating point unit state
1316  * context of the current calling thread.
1317  *
1318  * @param[out] Save
1319  * The saved floating point context given to the
1320  * caller at the end of function's operations.
1321  * The structure whose data contents are opaque
1322  * to the calling thread.
1323  *
1324  * @return
1325  * Returns STATUS_SUCCESS if the function has
1326  * successfully completed its operations.
1327  * STATUS_INSUFFICIENT_RESOURCES is returned
1328  * if the function couldn't allocate memory
1329  * for FPU state information.
1330  *
1331  * @remarks
1332  * The function performs a FPU state save
1333  * in two ways. A normal FPU save (FNSAVE)
1334  * is performed if the system doesn't have
1335  * SSE/SSE2, otherwise the function performs
1336  * a save of FPU, MMX and SSE states save (FXSAVE).
1337  */
1338 #if defined(__clang__)
1339 __attribute__((__target__("sse")))
1340 #endif
1341 NTSTATUS
1342 NTAPI
1343 KeSaveFloatingPointState(
1344     _Out_ PKFLOATING_SAVE Save)
1345 {
1346     PFLOATING_SAVE_CONTEXT FsContext;
1347     PFX_SAVE_AREA FxSaveAreaFrame;
1348     PKPRCB CurrentPrcb;
1349 
1350     /* Sanity checks */
1351     ASSERT(Save);
1352     ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1353     ASSERT(KeI386NpxPresent);
1354 
1355     /* Initialize the floating point context */
1356     FsContext = ExAllocatePoolWithTag(NonPagedPool,
1357                                       sizeof(FLOATING_SAVE_CONTEXT),
1358                                       TAG_FLOATING_POINT_CONTEXT);
1359     if (!FsContext)
1360     {
1361         /* Bail out if we failed */
1362         return STATUS_INSUFFICIENT_RESOURCES;
1363     }
1364 
1365     /*
1366      * Allocate some memory pool for the buffer. The size
1367      * of this allocated buffer is the FX area plus the
1368      * alignment requirement needed for FXSAVE as a 16-byte
1369      * aligned pointer is compulsory in order to save the
1370      * FPU state.
1371      */
1372     FsContext->Buffer = ExAllocatePoolWithTag(NonPagedPool,
1373                                               sizeof(FX_SAVE_AREA) + FXSAVE_ALIGN,
1374                                               TAG_FLOATING_POINT_FX);
1375     if (!FsContext->Buffer)
1376     {
1377         /* Bail out if we failed */
1378         ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1379         return STATUS_INSUFFICIENT_RESOURCES;
1380     }
1381 
1382     /*
1383      * Now cache the allocated buffer into the save area
1384      * and align the said area to a 16-byte boundary. Why
1385      * do we have to do this is because of ExAllocate function.
1386      * We gave the necessary alignment requirement in the pool
1387      * allocation size although the function will always return
1388      * a 8-byte aligned pointer. Aligning the given pointer directly
1389      * can cause issues when freeing it from memory afterwards. With
1390      * that said, we have to cache the buffer to the area so that we
1391      * do not touch or mess the allocated buffer any further.
1392      */
1393     FsContext->PfxSaveArea = ALIGN_UP_POINTER_BY(FsContext->Buffer, 16);
1394 
1395     /* Disable interrupts and get the current processor control region */
1396     _disable();
1397     CurrentPrcb = KeGetCurrentPrcb();
1398 
1399     /* Store the current thread to context */
1400     FsContext->CurrentThread = KeGetCurrentThread();
1401 
1402     /*
1403      * Save the previous NPX thread state registers (aka Numeric
1404      * Processor eXtension) into the current context so that
1405      * we are informing the scheduler the current FPU state
1406      * belongs to this thread.
1407      */
1408     if (FsContext->CurrentThread != CurrentPrcb->NpxThread)
1409     {
1410         if ((CurrentPrcb->NpxThread != NULL) &&
1411             (CurrentPrcb->NpxThread->NpxState == NPX_STATE_LOADED))
1412         {
1413             /* Get the FX frame */
1414             FxSaveAreaFrame = KiGetThreadNpxArea(CurrentPrcb->NpxThread);
1415 
1416             /* Save the FPU state */
1417             Ke386SaveFpuState(FxSaveAreaFrame);
1418 
1419             /* NPX thread has lost its state */
1420             CurrentPrcb->NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1421             FxSaveAreaFrame->NpxSavedCpu = 0;
1422         }
1423 
1424         /* The new NPX thread is the current thread */
1425         CurrentPrcb->NpxThread = FsContext->CurrentThread;
1426     }
1427 
1428     /* Perform the save */
1429     Ke386SaveFpuState(FsContext->PfxSaveArea);
1430 
1431     /* Store the NPX IRQL */
1432     FsContext->OldNpxIrql = FsContext->CurrentThread->Header.NpxIrql;
1433 
1434     /* Set the current IRQL to NPX */
1435     FsContext->CurrentThread->Header.NpxIrql = KeGetCurrentIrql();
1436 
1437     /* Initialize the FPU */
1438     Ke386FnInit();
1439 
1440     /* Enable interrupts back */
1441     _enable();
1442 
1443     /* Give the saved FPU context to the caller */
1444     *((PVOID *) Save) = FsContext;
1445     return STATUS_SUCCESS;
1446 }
1447 
1448 /**
1449  * @brief
1450  * Restores the original FPU state context that has
1451  * been saved by a API call of KeSaveFloatingPointState.
1452  * Callers are expected to restore the floating point
1453  * state by calling this function when they've finished
1454  * doing FPU operations.
1455  *
1456  * @param[in] Save
1457  * The saved floating point context that is to be given
1458  * to the function to restore the FPU state.
1459  *
1460  * @return
1461  * Returns STATUS_SUCCESS indicating the function
1462  * has fully completed its operations.
1463  */
1464 #if defined(__clang__)
1465 __attribute__((__target__("sse")))
1466 #endif
1467 NTSTATUS
1468 NTAPI
1469 KeRestoreFloatingPointState(
1470     _In_ PKFLOATING_SAVE Save)
1471 {
1472     PFLOATING_SAVE_CONTEXT FsContext;
1473 
1474     /* Sanity checks */
1475     ASSERT(Save);
1476     ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1477     ASSERT(KeI386NpxPresent);
1478 
1479     /* Cache the saved FS context */
1480     FsContext = *((PVOID *) Save);
1481 
1482     /*
1483      * We have to restore the regular saved FPU
1484      * state. For this we must first do some
1485      * validation checks so that we are sure
1486      * ourselves the state context is saved
1487      * properly. Check if we are in the same
1488      * calling thread.
1489      */
1490     if (FsContext->CurrentThread != KeGetCurrentThread())
1491     {
1492         /*
1493          * This isn't the thread that saved the
1494          * FPU state context, crash the system!
1495          */
1496         KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1497                      0x2,
1498                      (ULONG_PTR)FsContext->CurrentThread,
1499                      (ULONG_PTR)KeGetCurrentThread(),
1500                      0);
1501     }
1502 
1503     /* Are we under the same NPX interrupt level? */
1504     if (FsContext->CurrentThread->Header.NpxIrql != KeGetCurrentIrql())
1505     {
1506         /* The interrupt level has changed, crash the system! */
1507         KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1508                      0x1,
1509                      (ULONG_PTR)FsContext->CurrentThread->Header.NpxIrql,
1510                      (ULONG_PTR)KeGetCurrentIrql(),
1511                      0);
1512     }
1513 
1514     /* Disable interrupts */
1515     _disable();
1516 
1517     /*
1518      * The saved FPU state context is valid,
1519      * it's time to restore the state. First,
1520      * clear FPU exceptions now.
1521      */
1522     Ke386ClearFpExceptions();
1523 
1524     /* Restore the state */
1525     Ke386RestoreFpuState(FsContext->PfxSaveArea);
1526 
1527     /* Give the saved NPX IRQL back to the NPX thread */
1528     FsContext->CurrentThread->Header.NpxIrql = FsContext->OldNpxIrql;
1529 
1530     /* Enable interrupts back */
1531     _enable();
1532 
1533     /* We're done, free the allocated area and context */
1534     ExFreePoolWithTag(FsContext->Buffer, TAG_FLOATING_POINT_FX);
1535     ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1536 
1537     return STATUS_SUCCESS;
1538 }
1539 
1540 /*
1541  * @implemented
1542  */
1543 ULONG
1544 NTAPI
1545 KeGetRecommendedSharedDataAlignment(VOID)
1546 {
1547     /* Return the global variable */
1548     return KeLargestCacheLine;
1549 }
1550 
1551 VOID
1552 NTAPI
1553 KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext,
1554                       IN PVOID Ignored1,
1555                       IN PVOID Ignored2,
1556                       IN PVOID Ignored3)
1557 {
1558     /* Signal this packet as done */
1559     KiIpiSignalPacketDone(PacketContext);
1560 
1561     /* Flush the TB for the Current CPU */
1562     KeFlushCurrentTb();
1563 }
1564 
1565 /*
1566  * @implemented
1567  */
1568 VOID
1569 NTAPI
1570 KeFlushEntireTb(IN BOOLEAN Invalid,
1571                 IN BOOLEAN AllProcessors)
1572 {
1573     KIRQL OldIrql;
1574 #ifdef CONFIG_SMP
1575     KAFFINITY TargetAffinity;
1576     PKPRCB Prcb = KeGetCurrentPrcb();
1577 #endif
1578 
1579     /* Raise the IRQL for the TB Flush */
1580     OldIrql = KeRaiseIrqlToSynchLevel();
1581 
1582 #ifdef CONFIG_SMP
1583     /* FIXME: Use KiTbFlushTimeStamp to synchronize TB flush */
1584 
1585     /* Get the current processor affinity, and exclude ourselves */
1586     TargetAffinity = KeActiveProcessors;
1587     TargetAffinity &= ~Prcb->SetMember;
1588 
1589     /* Make sure this is MP */
1590     if (TargetAffinity)
1591     {
1592         /* Send an IPI TB flush to the other processors */
1593         KiIpiSendPacket(TargetAffinity,
1594                         KiFlushTargetEntireTb,
1595                         NULL,
1596                         0,
1597                         NULL);
1598     }
1599 #endif
1600 
1601     /* Flush the TB for the Current CPU, and update the flush stamp */
1602     KeFlushCurrentTb();
1603 
1604 #ifdef CONFIG_SMP
1605     /* If this is MP, wait for the other processors to finish */
1606     if (TargetAffinity)
1607     {
1608         /* Sanity check */
1609         ASSERT(Prcb == KeGetCurrentPrcb());
1610 
1611         /* FIXME: TODO */
1612         ASSERTMSG("Not yet implemented\n", FALSE);
1613     }
1614 #endif
1615 
1616     /* Update the flush stamp and return to original IRQL */
1617     InterlockedExchangeAdd(&KiTbFlushTimeStamp, 1);
1618     KeLowerIrql(OldIrql);
1619 }
1620 
1621 /*
1622  * @implemented
1623  */
1624 VOID
1625 NTAPI
1626 KeSetDmaIoCoherency(IN ULONG Coherency)
1627 {
1628     /* Save the coherency globally */
1629     KiDmaIoCoherency = Coherency;
1630 }
1631 
1632 /*
1633  * @implemented
1634  */
1635 KAFFINITY
1636 NTAPI
1637 KeQueryActiveProcessors(VOID)
1638 {
1639     PAGED_CODE();
1640 
1641     /* Simply return the number of active processors */
1642     return KeActiveProcessors;
1643 }
1644 
1645 /*
1646  * @implemented
1647  */
1648 VOID
1649 __cdecl
1650 KeSaveStateForHibernate(IN PKPROCESSOR_STATE State)
1651 {
1652     /* Capture the context */
1653     RtlCaptureContext(&State->ContextFrame);
1654 
1655     /* Capture the control state */
1656     KiSaveProcessorControlState(State);
1657 }
1658