1 /*
2 * PROJECT: ReactOS Kernel
3 * LICENSE: GPL - See COPYING in the top level directory
4 * FILE: ntoskrnl/ke/i386/cpu.c
5 * PURPOSE: Routines for CPU-level support
6 * PROGRAMMERS: Alex Ionescu (alex.ionescu@reactos.org)
7 */
8
9 /* INCLUDES *****************************************************************/
10
11 #include <ntoskrnl.h>
12 #define NDEBUG
13 #include <debug.h>
14
15 #include <xmmintrin.h>
16
17 /* GLOBALS *******************************************************************/
18
19 /* The TSS to use for Double Fault Traps (INT 0x9) */
20 UCHAR KiDoubleFaultTSS[KTSS_IO_MAPS];
21
22 /* The TSS to use for NMI Fault Traps (INT 0x2) */
23 UCHAR KiNMITSS[KTSS_IO_MAPS];
24
25 /* CPU Features and Flags */
26 ULONG KeI386CpuType;
27 ULONG KeI386CpuStep;
28 ULONG KiFastSystemCallDisable = 0;
29 ULONG KeI386NpxPresent = TRUE;
30 ULONG KiMXCsrMask = 0;
31 ULONG MxcsrFeatureMask = 0;
32 ULONG KeI386XMMIPresent = 0;
33 ULONG KeI386FxsrPresent = 0;
34 ULONG KeI386MachineType;
35 ULONG Ke386Pae = FALSE;
36 ULONG Ke386NoExecute = FALSE;
37 ULONG KeLargestCacheLine = 0x40;
38 ULONG KeDcacheFlushCount = 0;
39 ULONG KeIcacheFlushCount = 0;
40 ULONG KiDmaIoCoherency = 0;
41 ULONG KePrefetchNTAGranularity = 32;
42 BOOLEAN KiI386PentiumLockErrataPresent;
43 BOOLEAN KiSMTProcessorsPresent;
44
45 /* The distance between SYSEXIT and IRETD return modes */
46 UCHAR KiSystemCallExitAdjust;
47
48 /* The offset that was applied -- either 0 or the value above */
49 UCHAR KiSystemCallExitAdjusted;
50
51 /* Whether the adjustment was already done once */
52 BOOLEAN KiFastCallCopyDoneOnce;
53
54 /* Flush data */
55 volatile LONG KiTbFlushTimeStamp;
56
57 /* CPU Signatures */
58 static const CHAR CmpIntelID[] = "GenuineIntel";
59 static const CHAR CmpAmdID[] = "AuthenticAMD";
60 static const CHAR CmpCyrixID[] = "CyrixInstead";
61 static const CHAR CmpTransmetaID[] = "GenuineTMx86";
62 static const CHAR CmpCentaurID[] = "CentaurHauls";
63 static const CHAR CmpRiseID[] = "RiseRiseRise";
64
65 typedef union _CPU_SIGNATURE
66 {
67 struct
68 {
69 ULONG Step : 4;
70 ULONG Model : 4;
71 ULONG Family : 4;
72 ULONG Unused : 4;
73 ULONG ExtendedModel : 4;
74 ULONG ExtendedFamily : 8;
75 ULONG Unused2 : 4;
76 };
77 ULONG AsULONG;
78 } CPU_SIGNATURE;
79
80 /* FX area alignment size */
81 #define FXSAVE_ALIGN 15
82
83 /* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/
84
85 /* NSC/Cyrix CPU configuration register index */
86 #define CX86_CCR1 0xc1
87
88 /* NSC/Cyrix CPU indexed register access macros */
89 static __inline
90 UCHAR
getCx86(UCHAR reg)91 getCx86(UCHAR reg)
92 {
93 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
94 return READ_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23);
95 }
96
97 static __inline
98 void
setCx86(UCHAR reg,UCHAR data)99 setCx86(UCHAR reg, UCHAR data)
100 {
101 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg);
102 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data);
103 }
104
105 /* FUNCTIONS *****************************************************************/
106
107 CODE_SEG("INIT")
108 ULONG
109 NTAPI
KiGetCpuVendor(VOID)110 KiGetCpuVendor(VOID)
111 {
112 PKPRCB Prcb = KeGetCurrentPrcb();
113 CPU_INFO CpuInfo;
114
115 /* Get the Vendor ID */
116 KiCpuId(&CpuInfo, 0);
117
118 /* Copy it to the PRCB and null-terminate it */
119 *(ULONG*)&Prcb->VendorString[0] = CpuInfo.Ebx;
120 *(ULONG*)&Prcb->VendorString[4] = CpuInfo.Edx;
121 *(ULONG*)&Prcb->VendorString[8] = CpuInfo.Ecx;
122 Prcb->VendorString[12] = 0;
123
124 /* Now check the CPU Type */
125 if (!strcmp(Prcb->VendorString, CmpIntelID))
126 {
127 return CPU_INTEL;
128 }
129 else if (!strcmp(Prcb->VendorString, CmpAmdID))
130 {
131 return CPU_AMD;
132 }
133 else if (!strcmp(Prcb->VendorString, CmpCyrixID))
134 {
135 DPRINT1("Cyrix CPU support not fully tested!\n");
136 return CPU_CYRIX;
137 }
138 else if (!strcmp(Prcb->VendorString, CmpTransmetaID))
139 {
140 DPRINT1("Transmeta CPU support not fully tested!\n");
141 return CPU_TRANSMETA;
142 }
143 else if (!strcmp(Prcb->VendorString, CmpCentaurID))
144 {
145 DPRINT1("Centaur CPU support not fully tested!\n");
146 return CPU_CENTAUR;
147 }
148 else if (!strcmp(Prcb->VendorString, CmpRiseID))
149 {
150 DPRINT1("Rise CPU support not fully tested!\n");
151 return CPU_RISE;
152 }
153
154 /* Unknown CPU */
155 DPRINT1("%s CPU support not fully tested!\n", Prcb->VendorString);
156 return CPU_UNKNOWN;
157 }
158
159 CODE_SEG("INIT")
160 VOID
161 NTAPI
KiSetProcessorType(VOID)162 KiSetProcessorType(VOID)
163 {
164 CPU_INFO CpuInfo;
165 CPU_SIGNATURE CpuSignature;
166 BOOLEAN ExtendModel;
167 ULONG Stepping, Type;
168
169 /* Do CPUID 1 now */
170 KiCpuId(&CpuInfo, 1);
171
172 /*
173 * Get the Stepping and Type. The stepping contains both the
174 * Model and the Step, while the Type contains the returned Family.
175 *
176 * For the stepping, we convert this: zzzzzzxy into this: x0y
177 */
178 CpuSignature.AsULONG = CpuInfo.Eax;
179 Stepping = CpuSignature.Model;
180 ExtendModel = (CpuSignature.Family == 15);
181 #if ( (NTDDI_VERSION >= NTDDI_WINXPSP2) && (NTDDI_VERSION < NTDDI_WS03) ) || (NTDDI_VERSION >= NTDDI_WS03SP1)
182 if (CpuSignature.Family == 6)
183 {
184 ULONG Vendor = KiGetCpuVendor();
185 ExtendModel |= (Vendor == CPU_INTEL);
186 #if (NTDDI_VERSION >= NTDDI_WIN8)
187 ExtendModel |= (Vendor == CPU_CENTAUR);
188 #endif
189 }
190 #endif
191 if (ExtendModel)
192 {
193 /* Add ExtendedModel to distinguish from non-extended values. */
194 Stepping |= (CpuSignature.ExtendedModel << 4);
195 }
196 Stepping = (Stepping << 8) | CpuSignature.Step;
197 Type = CpuSignature.Family;
198 if (CpuSignature.Family == 15)
199 {
200 /* Add ExtendedFamily to distinguish from non-extended values.
201 * It must not be larger than 0xF0 to avoid overflow. */
202 Type += min(CpuSignature.ExtendedFamily, 0xF0);
203 }
204
205 /* Save them in the PRCB */
206 KeGetCurrentPrcb()->CpuID = TRUE;
207 KeGetCurrentPrcb()->CpuType = (UCHAR)Type;
208 KeGetCurrentPrcb()->CpuStep = (USHORT)Stepping;
209 }
210
211 CODE_SEG("INIT")
212 ULONG
213 NTAPI
KiGetFeatureBits(VOID)214 KiGetFeatureBits(VOID)
215 {
216 PKPRCB Prcb = KeGetCurrentPrcb();
217 ULONG Vendor;
218 ULONG FeatureBits = KF_WORKING_PTE;
219 CPU_INFO CpuInfo, DummyCpuInfo;
220 UCHAR Ccr1;
221 BOOLEAN ExtendedCPUID = TRUE;
222 ULONG CpuFeatures = 0;
223
224 /* Get the Vendor ID */
225 Vendor = KiGetCpuVendor();
226
227 /* Make sure we got a valid vendor ID at least. */
228 if (!Vendor) return FeatureBits;
229
230 /* Get the CPUID Info. Features are in Reg[3]. */
231 KiCpuId(&CpuInfo, 1);
232
233 /* Set the initial APIC ID */
234 Prcb->InitialApicId = (UCHAR)(CpuInfo.Ebx >> 24);
235
236 switch (Vendor)
237 {
238 /* Intel CPUs */
239 case CPU_INTEL:
240
241 /* Check if it's a P6 */
242 if (Prcb->CpuType == 6)
243 {
244 /* Perform the special sequence to get the MicroCode Signature */
245 __writemsr(0x8B, 0);
246 KiCpuId(&DummyCpuInfo, 1);
247 Prcb->UpdateSignature.QuadPart = __readmsr(0x8B);
248 }
249 else if (Prcb->CpuType == 5)
250 {
251 /* On P5, enable workaround for the LOCK errata. */
252 KiI386PentiumLockErrataPresent = TRUE;
253 }
254
255 /* Check for broken P6 with bad SMP PTE implementation */
256 if (((CpuInfo.Eax & 0x0FF0) == 0x0610 && (CpuInfo.Eax & 0x000F) <= 0x9) ||
257 ((CpuInfo.Eax & 0x0FF0) == 0x0630 && (CpuInfo.Eax & 0x000F) <= 0x4))
258 {
259 /* Remove support for correct PTE support. */
260 FeatureBits &= ~KF_WORKING_PTE;
261 }
262
263 /* Check if the CPU is too old to support SYSENTER */
264 if ((Prcb->CpuType < 6) ||
265 ((Prcb->CpuType == 6) && (Prcb->CpuStep < 0x0303)))
266 {
267 /* Disable it */
268 CpuInfo.Edx &= ~0x800;
269 }
270
271 break;
272
273 /* AMD CPUs */
274 case CPU_AMD:
275
276 /* Check if this is a K5 or K6. (family 5) */
277 if ((CpuInfo.Eax & 0x0F00) == 0x0500)
278 {
279 /* Get the Model Number */
280 switch (CpuInfo.Eax & 0x00F0)
281 {
282 /* Model 1: K5 - 5k86 (initial models) */
283 case 0x0010:
284
285 /* Check if this is Step 0 or 1. They don't support PGE */
286 if ((CpuInfo.Eax & 0x000F) > 0x03) break;
287
288 /* Model 0: K5 - SSA5 */
289 case 0x0000:
290
291 /* Model 0 doesn't support PGE at all. */
292 CpuInfo.Edx &= ~0x2000;
293 break;
294
295 /* Model 8: K6-2 */
296 case 0x0080:
297
298 /* K6-2, Step 8 and over have support for MTRR. */
299 if ((CpuInfo.Eax & 0x000F) >= 0x8) FeatureBits |= KF_AMDK6MTRR;
300 break;
301
302 /* Model 9: K6-III
303 Model D: K6-2+, K6-III+ */
304 case 0x0090:
305 case 0x00D0:
306
307 FeatureBits |= KF_AMDK6MTRR;
308 break;
309 }
310 }
311 else if((CpuInfo.Eax & 0x0F00) < 0x0500)
312 {
313 /* Families below 5 don't support PGE, PSE or CMOV at all */
314 CpuInfo.Edx &= ~(0x08 | 0x2000 | 0x8000);
315
316 /* They also don't support advanced CPUID functions. */
317 ExtendedCPUID = FALSE;
318 }
319
320 break;
321
322 /* Cyrix CPUs */
323 case CPU_CYRIX:
324
325 /* Workaround the "COMA" bug on 6x family of Cyrix CPUs */
326 if (Prcb->CpuType == 6 &&
327 Prcb->CpuStep <= 1)
328 {
329 /* Get CCR1 value */
330 Ccr1 = getCx86(CX86_CCR1);
331
332 /* Enable the NO_LOCK bit */
333 Ccr1 |= 0x10;
334
335 /* Set the new CCR1 value */
336 setCx86(CX86_CCR1, Ccr1);
337 }
338
339 break;
340
341 /* Transmeta CPUs */
342 case CPU_TRANSMETA:
343
344 /* Enable CMPXCHG8B if the family (>= 5), model and stepping (>= 4.2) support it */
345 if ((CpuInfo.Eax & 0x0FFF) >= 0x0542)
346 {
347 __writemsr(0x80860004, __readmsr(0x80860004) | 0x0100);
348 FeatureBits |= KF_CMPXCHG8B;
349 }
350
351 break;
352
353 /* Centaur, IDT, Rise and VIA CPUs */
354 case CPU_CENTAUR:
355 case CPU_RISE:
356
357 /* These CPUs don't report the presence of CMPXCHG8B through CPUID.
358 However, this feature exists and operates properly without any additional steps. */
359 FeatureBits |= KF_CMPXCHG8B;
360
361 break;
362 }
363
364 /* Set the current features */
365 CpuFeatures = CpuInfo.Edx;
366
367 /* Convert all CPUID Feature bits into our format */
368 if (CpuFeatures & X86_FEATURE_VME) FeatureBits |= KF_V86_VIS | KF_CR4;
369 if (CpuFeatures & X86_FEATURE_PSE) FeatureBits |= KF_LARGE_PAGE | KF_CR4;
370 if (CpuFeatures & X86_FEATURE_TSC) FeatureBits |= KF_RDTSC;
371 if (CpuFeatures & X86_FEATURE_CX8) FeatureBits |= KF_CMPXCHG8B;
372 if (CpuFeatures & X86_FEATURE_SYSCALL) FeatureBits |= KF_FAST_SYSCALL;
373 if (CpuFeatures & X86_FEATURE_MTTR) FeatureBits |= KF_MTRR;
374 if (CpuFeatures & X86_FEATURE_PGE) FeatureBits |= KF_GLOBAL_PAGE | KF_CR4;
375 if (CpuFeatures & X86_FEATURE_CMOV) FeatureBits |= KF_CMOV;
376 if (CpuFeatures & X86_FEATURE_PAT) FeatureBits |= KF_PAT;
377 if (CpuFeatures & X86_FEATURE_DS) FeatureBits |= KF_DTS;
378 if (CpuFeatures & X86_FEATURE_MMX) FeatureBits |= KF_MMX;
379 if (CpuFeatures & X86_FEATURE_FXSR) FeatureBits |= KF_FXSR;
380 if (CpuFeatures & X86_FEATURE_SSE) FeatureBits |= KF_XMMI;
381 if (CpuFeatures & X86_FEATURE_SSE2) FeatureBits |= KF_XMMI64;
382
383 /* Check if the CPU has hyper-threading */
384 if (CpuFeatures & X86_FEATURE_HT)
385 {
386 /* Set the number of logical CPUs */
387 Prcb->LogicalProcessorsPerPhysicalProcessor = (UCHAR)(CpuInfo.Ebx >> 16);
388 if (Prcb->LogicalProcessorsPerPhysicalProcessor > 1)
389 {
390 /* We're on dual-core */
391 KiSMTProcessorsPresent = TRUE;
392 }
393 }
394 else
395 {
396 /* We only have a single CPU */
397 Prcb->LogicalProcessorsPerPhysicalProcessor = 1;
398 }
399
400 /* Check if CPUID 0x80000000 is supported */
401 if (ExtendedCPUID)
402 {
403 /* Do the call */
404 KiCpuId(&CpuInfo, 0x80000000);
405 if ((CpuInfo.Eax & 0xffffff00) == 0x80000000)
406 {
407 /* Check if CPUID 0x80000001 is supported */
408 if (CpuInfo.Eax >= 0x80000001)
409 {
410 /* Check which extended features are available. */
411 KiCpuId(&CpuInfo, 0x80000001);
412
413 /* Check if NX-bit is supported */
414 if (CpuInfo.Edx & X86_FEATURE_NX) FeatureBits |= KF_NX_BIT;
415
416 /* Now handle each features for each CPU Vendor */
417 switch (Vendor)
418 {
419 case CPU_AMD:
420 case CPU_CENTAUR:
421 if (CpuInfo.Edx & 0x80000000) FeatureBits |= KF_3DNOW;
422 break;
423 }
424 }
425 }
426 }
427
428 /* Return the Feature Bits */
429 return FeatureBits;
430 }
431
432 #if DBG
433 CODE_SEG("INIT")
434 VOID
KiReportCpuFeatures(VOID)435 KiReportCpuFeatures(VOID)
436 {
437 ULONG CpuFeatures = 0;
438 CPU_INFO CpuInfo;
439
440 if (KiGetCpuVendor())
441 {
442 KiCpuId(&CpuInfo, 1);
443 CpuFeatures = CpuInfo.Edx;
444 }
445
446 DPRINT1("Supported CPU features: ");
447
448 #define print_kf_bit(kf_value) if (KeFeatureBits & kf_value) DbgPrint(#kf_value " ")
449 print_kf_bit(KF_V86_VIS);
450 print_kf_bit(KF_RDTSC);
451 print_kf_bit(KF_CR4);
452 print_kf_bit(KF_CMOV);
453 print_kf_bit(KF_GLOBAL_PAGE);
454 print_kf_bit(KF_LARGE_PAGE);
455 print_kf_bit(KF_MTRR);
456 print_kf_bit(KF_CMPXCHG8B);
457 print_kf_bit(KF_MMX);
458 print_kf_bit(KF_WORKING_PTE);
459 print_kf_bit(KF_PAT);
460 print_kf_bit(KF_FXSR);
461 print_kf_bit(KF_FAST_SYSCALL);
462 print_kf_bit(KF_XMMI);
463 print_kf_bit(KF_3DNOW);
464 print_kf_bit(KF_AMDK6MTRR);
465 print_kf_bit(KF_XMMI64);
466 print_kf_bit(KF_DTS);
467 print_kf_bit(KF_NX_BIT);
468 print_kf_bit(KF_NX_DISABLED);
469 print_kf_bit(KF_NX_ENABLED);
470 #undef print_kf_bit
471
472 #define print_cf(cpu_flag) if (CpuFeatures & cpu_flag) DbgPrint(#cpu_flag " ")
473 print_cf(X86_FEATURE_PAE);
474 print_cf(X86_FEATURE_APIC);
475 print_cf(X86_FEATURE_HT);
476 #undef print_cf
477
478 DbgPrint("\n");
479 }
480 #endif // DBG
481
482 CODE_SEG("INIT")
483 VOID
484 NTAPI
KiGetCacheInformation(VOID)485 KiGetCacheInformation(VOID)
486 {
487 PKIPCR Pcr = (PKIPCR)KeGetPcr();
488 CPU_INFO CpuInfo;
489 ULONG CacheRequests = 0, i;
490 ULONG CurrentRegister;
491 UCHAR RegisterByte, Associativity = 0;
492 ULONG Size, CacheLine = 64, CurrentSize = 0;
493 BOOLEAN FirstPass = TRUE;
494
495 /* Set default L2 size */
496 Pcr->SecondLevelCacheSize = 0;
497
498 /* Check the Vendor ID */
499 switch (KiGetCpuVendor())
500 {
501 /* Handle Intel case */
502 case CPU_INTEL:
503
504 /* Check if we support CPUID 2 */
505 KiCpuId(&CpuInfo, 0);
506 if (CpuInfo.Eax >= 2)
507 {
508 /* We need to loop for the number of times CPUID will tell us to */
509 do
510 {
511 /* Do the CPUID call */
512 KiCpuId(&CpuInfo, 2);
513
514 /* Check if it was the first call */
515 if (FirstPass)
516 {
517 /*
518 * The number of times to loop is the first byte. Read
519 * it and then destroy it so we don't get confused.
520 */
521 CacheRequests = CpuInfo.Eax & 0xFF;
522 CpuInfo.Eax &= 0xFFFFFF00;
523
524 /* Don't go over this again */
525 FirstPass = FALSE;
526 }
527
528 /* Loop all 4 registers */
529 for (i = 0; i < 4; i++)
530 {
531 /* Get the current register */
532 CurrentRegister = CpuInfo.AsUINT32[i];
533
534 /*
535 * If the upper bit is set, then this register should
536 * be skipped.
537 */
538 if (CurrentRegister & 0x80000000) continue;
539
540 /* Keep looping for every byte inside this register */
541 while (CurrentRegister)
542 {
543 /* Read a byte, skip a byte. */
544 RegisterByte = (UCHAR)(CurrentRegister & 0xFF);
545 CurrentRegister >>= 8;
546 if (!RegisterByte) continue;
547
548 Size = 0;
549 switch (RegisterByte)
550 {
551 case 0x06:
552 case 0x08:
553 KePrefetchNTAGranularity = 32;
554 break;
555 case 0x09:
556 KePrefetchNTAGranularity = 64;
557 break;
558 case 0x0a:
559 case 0x0c:
560 KePrefetchNTAGranularity = 32;
561 break;
562 case 0x0d:
563 case 0x0e:
564 KePrefetchNTAGranularity = 64;
565 break;
566 case 0x1d:
567 Size = 128 * 1024;
568 Associativity = 2;
569 break;
570 case 0x21:
571 Size = 256 * 1024;
572 Associativity = 8;
573 break;
574 case 0x24:
575 Size = 1024 * 1024;
576 Associativity = 16;
577 break;
578 case 0x2c:
579 case 0x30:
580 KePrefetchNTAGranularity = 64;
581 break;
582 case 0x41:
583 case 0x42:
584 case 0x43:
585 case 0x44:
586 case 0x45:
587 Size = (1 << (RegisterByte - 0x41)) * 128 * 1024;
588 Associativity = 4;
589 break;
590 case 0x48:
591 Size = 3 * 1024 * 1024;
592 Associativity = 12;
593 break;
594 case 0x49:
595 Size = 4 * 1024 * 1024;
596 Associativity = 16;
597 break;
598 case 0x4e:
599 Size = 6 * 1024 * 1024;
600 Associativity = 24;
601 break;
602 case 0x60:
603 case 0x66:
604 case 0x67:
605 case 0x68:
606 KePrefetchNTAGranularity = 64;
607 break;
608 case 0x78:
609 Size = 1024 * 1024;
610 Associativity = 4;
611 break;
612 case 0x79:
613 case 0x7a:
614 case 0x7b:
615 case 0x7c:
616 case 0x7d:
617 Size = (1 << (RegisterByte - 0x79)) * 128 * 1024;
618 Associativity = 8;
619 break;
620 case 0x7f:
621 Size = 512 * 1024;
622 Associativity = 2;
623 break;
624 case 0x80:
625 Size = 512 * 1024;
626 Associativity = 8;
627 break;
628 case 0x82:
629 case 0x83:
630 case 0x84:
631 case 0x85:
632 Size = (1 << (RegisterByte - 0x82)) * 256 * 1024;
633 Associativity = 8;
634 break;
635 case 0x86:
636 Size = 512 * 1024;
637 Associativity = 4;
638 break;
639 case 0x87:
640 Size = 1024 * 1024;
641 Associativity = 8;
642 break;
643 case 0xf0:
644 KePrefetchNTAGranularity = 64;
645 break;
646 case 0xf1:
647 KePrefetchNTAGranularity = 128;
648 break;
649 }
650 if (Size && (Size / Associativity) > CurrentSize)
651 {
652 /* Set the L2 Cache Size and Associativity */
653 CurrentSize = Size / Associativity;
654 Pcr->SecondLevelCacheSize = Size;
655 Pcr->SecondLevelCacheAssociativity = Associativity;
656 }
657 }
658 }
659 } while (--CacheRequests);
660 }
661 break;
662
663 case CPU_AMD:
664
665 /* Check if we support CPUID 0x80000005 */
666 KiCpuId(&CpuInfo, 0x80000000);
667 if (CpuInfo.Eax >= 0x80000005)
668 {
669 /* Get L1 size first */
670 KiCpuId(&CpuInfo, 0x80000005);
671 KePrefetchNTAGranularity = CpuInfo.Ecx & 0xFF;
672
673 /* Check if we support CPUID 0x80000006 */
674 KiCpuId(&CpuInfo, 0x80000000);
675 if (CpuInfo.Eax >= 0x80000006)
676 {
677 /* Get 2nd level cache and tlb size */
678 KiCpuId(&CpuInfo, 0x80000006);
679
680 /* Cache line size */
681 CacheLine = CpuInfo.Ecx & 0xFF;
682
683 /* Hardcode associativity */
684 RegisterByte = (CpuInfo.Ecx >> 12) & 0xFF;
685 switch (RegisterByte)
686 {
687 case 2:
688 Associativity = 2;
689 break;
690
691 case 4:
692 Associativity = 4;
693 break;
694
695 case 6:
696 Associativity = 8;
697 break;
698
699 case 8:
700 case 15:
701 Associativity = 16;
702 break;
703
704 default:
705 Associativity = 1;
706 break;
707 }
708
709 /* Compute size */
710 Size = (CpuInfo.Ecx >> 16) << 10;
711
712 /* Hack for Model 6, Steping 300 */
713 if ((KeGetCurrentPrcb()->CpuType == 6) &&
714 (KeGetCurrentPrcb()->CpuStep == 0x300))
715 {
716 /* Stick 64K in there */
717 Size = 64 * 1024;
718 }
719
720 /* Set the L2 Cache Size and associativity */
721 Pcr->SecondLevelCacheSize = Size;
722 Pcr->SecondLevelCacheAssociativity = Associativity;
723 }
724 }
725 break;
726
727 case CPU_CYRIX:
728 case CPU_TRANSMETA:
729 case CPU_CENTAUR:
730 case CPU_RISE:
731
732 /* FIXME */
733 break;
734 }
735
736 /* Set the cache line */
737 if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine;
738 DPRINT1("Prefetch Cache: %lu bytes\tL2 Cache: %lu bytes\tL2 Cache Line: %lu bytes\tL2 Cache Associativity: %lu\n",
739 KePrefetchNTAGranularity,
740 Pcr->SecondLevelCacheSize,
741 KeLargestCacheLine,
742 Pcr->SecondLevelCacheAssociativity);
743 }
744
745 CODE_SEG("INIT")
746 VOID
747 NTAPI
KiSetCR0Bits(VOID)748 KiSetCR0Bits(VOID)
749 {
750 ULONG Cr0;
751
752 /* Save current CR0 */
753 Cr0 = __readcr0();
754
755 /* If this is a 486, enable Write-Protection */
756 if (KeGetCurrentPrcb()->CpuType > 3) Cr0 |= CR0_WP;
757
758 /* Set new Cr0 */
759 __writecr0(Cr0);
760 }
761
762 CODE_SEG("INIT")
763 VOID
764 NTAPI
KiInitializeTSS2(IN PKTSS Tss,IN PKGDTENTRY TssEntry OPTIONAL)765 KiInitializeTSS2(IN PKTSS Tss,
766 IN PKGDTENTRY TssEntry OPTIONAL)
767 {
768 PUCHAR p;
769
770 /* Make sure the GDT Entry is valid */
771 if (TssEntry)
772 {
773 /* Set the Limit */
774 TssEntry->LimitLow = sizeof(KTSS) - 1;
775 TssEntry->HighWord.Bits.LimitHi = 0;
776 }
777
778 /* Now clear the I/O Map */
779 ASSERT(IOPM_COUNT == 1);
780 RtlFillMemory(Tss->IoMaps[0].IoMap, IOPM_FULL_SIZE, 0xFF);
781
782 /* Initialize Interrupt Direction Maps */
783 p = (PUCHAR)(Tss->IoMaps[0].DirectionMap);
784 RtlZeroMemory(p, IOPM_DIRECTION_MAP_SIZE);
785
786 /* Add DPMI support for interrupts */
787 p[0] = 4;
788 p[3] = 0x18;
789 p[4] = 0x18;
790
791 /* Initialize the default Interrupt Direction Map */
792 p = Tss->IntDirectionMap;
793 RtlZeroMemory(Tss->IntDirectionMap, IOPM_DIRECTION_MAP_SIZE);
794
795 /* Add DPMI support */
796 p[0] = 4;
797 p[3] = 0x18;
798 p[4] = 0x18;
799 }
800
801 VOID
802 NTAPI
KiInitializeTSS(IN PKTSS Tss)803 KiInitializeTSS(IN PKTSS Tss)
804 {
805 /* Set an invalid map base */
806 Tss->IoMapBase = KiComputeIopmOffset(IO_ACCESS_MAP_NONE);
807
808 /* Disable traps during Task Switches */
809 Tss->Flags = 0;
810
811 /* Set LDT and Ring 0 SS */
812 Tss->LDT = 0;
813 Tss->Ss0 = KGDT_R0_DATA;
814 }
815
816 CODE_SEG("INIT")
817 VOID
818 FASTCALL
Ki386InitializeTss(IN PKTSS Tss,IN PKIDTENTRY Idt,IN PKGDTENTRY Gdt)819 Ki386InitializeTss(IN PKTSS Tss,
820 IN PKIDTENTRY Idt,
821 IN PKGDTENTRY Gdt)
822 {
823 PKGDTENTRY TssEntry, TaskGateEntry;
824
825 /* Initialize the boot TSS. */
826 TssEntry = &Gdt[KGDT_TSS / sizeof(KGDTENTRY)];
827 TssEntry->HighWord.Bits.Type = I386_TSS;
828 TssEntry->HighWord.Bits.Pres = 1;
829 TssEntry->HighWord.Bits.Dpl = 0;
830 KiInitializeTSS2(Tss, TssEntry);
831 KiInitializeTSS(Tss);
832
833 /* Load the task register */
834 Ke386SetTr(KGDT_TSS);
835
836 /* Setup the Task Gate for Double Fault Traps */
837 TaskGateEntry = (PKGDTENTRY)&Idt[8];
838 TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
839 TaskGateEntry->HighWord.Bits.Pres = 1;
840 TaskGateEntry->HighWord.Bits.Dpl = 0;
841 ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_DF_TSS;
842
843 /* Initialize the TSS used for handling double faults. */
844 Tss = (PKTSS)KiDoubleFaultTSS;
845 KiInitializeTSS(Tss);
846 Tss->CR3 = __readcr3();
847 Tss->Esp0 = KiDoubleFaultStack;
848 Tss->Esp = KiDoubleFaultStack;
849 Tss->Eip = PtrToUlong(KiTrap08);
850 Tss->Cs = KGDT_R0_CODE;
851 Tss->Fs = KGDT_R0_PCR;
852 Tss->Ss = Ke386GetSs();
853 Tss->Es = KGDT_R3_DATA | RPL_MASK;
854 Tss->Ds = KGDT_R3_DATA | RPL_MASK;
855
856 /* Setup the Double Trap TSS entry in the GDT */
857 TssEntry = &Gdt[KGDT_DF_TSS / sizeof(KGDTENTRY)];
858 TssEntry->HighWord.Bits.Type = I386_TSS;
859 TssEntry->HighWord.Bits.Pres = 1;
860 TssEntry->HighWord.Bits.Dpl = 0;
861 TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
862 TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
863 TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
864 TssEntry->LimitLow = KTSS_IO_MAPS;
865
866 /* Now setup the NMI Task Gate */
867 TaskGateEntry = (PKGDTENTRY)&Idt[2];
868 TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE;
869 TaskGateEntry->HighWord.Bits.Pres = 1;
870 TaskGateEntry->HighWord.Bits.Dpl = 0;
871 ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_NMI_TSS;
872
873 /* Initialize the actual TSS */
874 Tss = (PKTSS)KiNMITSS;
875 KiInitializeTSS(Tss);
876 Tss->CR3 = __readcr3();
877 Tss->Esp0 = KiDoubleFaultStack;
878 Tss->Esp = KiDoubleFaultStack;
879 Tss->Eip = PtrToUlong(KiTrap02);
880 Tss->Cs = KGDT_R0_CODE;
881 Tss->Fs = KGDT_R0_PCR;
882 Tss->Ss = Ke386GetSs();
883 Tss->Es = KGDT_R3_DATA | RPL_MASK;
884 Tss->Ds = KGDT_R3_DATA | RPL_MASK;
885
886 /* And its associated TSS Entry */
887 TssEntry = &Gdt[KGDT_NMI_TSS / sizeof(KGDTENTRY)];
888 TssEntry->HighWord.Bits.Type = I386_TSS;
889 TssEntry->HighWord.Bits.Pres = 1;
890 TssEntry->HighWord.Bits.Dpl = 0;
891 TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF);
892 TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16);
893 TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24);
894 TssEntry->LimitLow = KTSS_IO_MAPS;
895 }
896
897 VOID
898 NTAPI
KeFlushCurrentTb(VOID)899 KeFlushCurrentTb(VOID)
900 {
901
902 #if !defined(_GLOBAL_PAGES_ARE_AWESOME_)
903
904 /* Flush the TLB by resetting CR3 */
905 __writecr3(__readcr3());
906
907 #else
908
909 /* Check if global pages are enabled */
910 if (KeFeatureBits & KF_GLOBAL_PAGE)
911 {
912 ULONG Cr4;
913
914 /* Disable PGE (Note: may not have been enabled yet) */
915 Cr4 = __readcr4();
916 __writecr4(Cr4 & ~CR4_PGE);
917
918 /* Flush everything */
919 __writecr3(__readcr3());
920
921 /* Re-enable PGE */
922 __writecr4(Cr4);
923 }
924 else
925 {
926 /* No global pages, resetting CR3 is enough */
927 __writecr3(__readcr3());
928 }
929
930 #endif
931
932 }
933
934 VOID
935 NTAPI
KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState)936 KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState)
937 {
938 PKGDTENTRY TssEntry;
939
940 //
941 // Restore the CR registers
942 //
943 __writecr0(ProcessorState->SpecialRegisters.Cr0);
944 Ke386SetCr2(ProcessorState->SpecialRegisters.Cr2);
945 __writecr3(ProcessorState->SpecialRegisters.Cr3);
946 if (KeFeatureBits & KF_CR4) __writecr4(ProcessorState->SpecialRegisters.Cr4);
947
948 //
949 // Restore the DR registers
950 //
951 __writedr(0, ProcessorState->SpecialRegisters.KernelDr0);
952 __writedr(1, ProcessorState->SpecialRegisters.KernelDr1);
953 __writedr(2, ProcessorState->SpecialRegisters.KernelDr2);
954 __writedr(3, ProcessorState->SpecialRegisters.KernelDr3);
955 __writedr(6, ProcessorState->SpecialRegisters.KernelDr6);
956 __writedr(7, ProcessorState->SpecialRegisters.KernelDr7);
957
958 //
959 // Restore GDT and IDT
960 //
961 Ke386SetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
962 __lidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
963
964 //
965 // Clear the busy flag so we don't crash if we reload the same selector
966 //
967 TssEntry = (PKGDTENTRY)(ProcessorState->SpecialRegisters.Gdtr.Base +
968 ProcessorState->SpecialRegisters.Tr);
969 TssEntry->HighWord.Bytes.Flags1 &= ~0x2;
970
971 //
972 // Restore TSS and LDT
973 //
974 Ke386SetTr(ProcessorState->SpecialRegisters.Tr);
975 Ke386SetLocalDescriptorTable(ProcessorState->SpecialRegisters.Ldtr);
976 }
977
978 VOID
979 NTAPI
KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)980 KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState)
981 {
982 /* Save the CR registers */
983 ProcessorState->SpecialRegisters.Cr0 = __readcr0();
984 ProcessorState->SpecialRegisters.Cr2 = __readcr2();
985 ProcessorState->SpecialRegisters.Cr3 = __readcr3();
986 ProcessorState->SpecialRegisters.Cr4 = (KeFeatureBits & KF_CR4) ?
987 __readcr4() : 0;
988
989 /* Save the DR registers */
990 ProcessorState->SpecialRegisters.KernelDr0 = __readdr(0);
991 ProcessorState->SpecialRegisters.KernelDr1 = __readdr(1);
992 ProcessorState->SpecialRegisters.KernelDr2 = __readdr(2);
993 ProcessorState->SpecialRegisters.KernelDr3 = __readdr(3);
994 ProcessorState->SpecialRegisters.KernelDr6 = __readdr(6);
995 ProcessorState->SpecialRegisters.KernelDr7 = __readdr(7);
996 __writedr(7, 0);
997
998 /* Save GDT, IDT, LDT and TSS */
999 Ke386GetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit);
1000 __sidt(&ProcessorState->SpecialRegisters.Idtr.Limit);
1001 ProcessorState->SpecialRegisters.Tr = Ke386GetTr();
1002 Ke386GetLocalDescriptorTable(&ProcessorState->SpecialRegisters.Ldtr);
1003 }
1004
1005 CODE_SEG("INIT")
1006 VOID
1007 NTAPI
KiInitializeMachineType(VOID)1008 KiInitializeMachineType(VOID)
1009 {
1010 /* Set the Machine Type we got from NTLDR */
1011 KeI386MachineType = KeLoaderBlock->u.I386.MachineType & 0x000FF;
1012 }
1013
1014 CODE_SEG("INIT")
1015 ULONG_PTR
1016 NTAPI
KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)1017 KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context)
1018 {
1019 /* Set CS and ESP */
1020 __writemsr(0x174, KGDT_R0_CODE);
1021 __writemsr(0x175, (ULONG_PTR)KeGetCurrentPrcb()->DpcStack);
1022
1023 /* Set LSTAR */
1024 __writemsr(0x176, (ULONG_PTR)KiFastCallEntry);
1025 return 0;
1026 }
1027
1028 CODE_SEG("INIT")
1029 VOID
1030 NTAPI
KiRestoreFastSyscallReturnState(VOID)1031 KiRestoreFastSyscallReturnState(VOID)
1032 {
1033 /* Check if the CPU Supports fast system call */
1034 if (KeFeatureBits & KF_FAST_SYSCALL)
1035 {
1036 /* Check if it has been disabled */
1037 if (KiFastSystemCallDisable)
1038 {
1039 /* Disable fast system call */
1040 KeFeatureBits &= ~KF_FAST_SYSCALL;
1041 KiFastCallExitHandler = KiSystemCallTrapReturn;
1042 DPRINT1("Support for SYSENTER disabled.\n");
1043 }
1044 else
1045 {
1046 /* Do an IPI to enable it */
1047 KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0);
1048
1049 /* It's enabled, so use the proper exit stub */
1050 KiFastCallExitHandler = KiSystemCallSysExitReturn;
1051 DPRINT("Support for SYSENTER detected.\n");
1052 }
1053 }
1054 else
1055 {
1056 /* Use the IRET handler */
1057 KiFastCallExitHandler = KiSystemCallTrapReturn;
1058 DPRINT1("No support for SYSENTER detected.\n");
1059 }
1060 }
1061
1062 CODE_SEG("INIT")
1063 ULONG_PTR
1064 NTAPI
Ki386EnableDE(IN ULONG_PTR Context)1065 Ki386EnableDE(IN ULONG_PTR Context)
1066 {
1067 /* Enable DE */
1068 __writecr4(__readcr4() | CR4_DE);
1069 return 0;
1070 }
1071
1072 CODE_SEG("INIT")
1073 ULONG_PTR
1074 NTAPI
Ki386EnableFxsr(IN ULONG_PTR Context)1075 Ki386EnableFxsr(IN ULONG_PTR Context)
1076 {
1077 /* Enable FXSR */
1078 __writecr4(__readcr4() | CR4_FXSR);
1079 return 0;
1080 }
1081
1082 CODE_SEG("INIT")
1083 ULONG_PTR
1084 NTAPI
Ki386EnableXMMIExceptions(IN ULONG_PTR Context)1085 Ki386EnableXMMIExceptions(IN ULONG_PTR Context)
1086 {
1087 PKIDTENTRY IdtEntry;
1088
1089 /* Get the IDT Entry for Interrupt 0x13 */
1090 IdtEntry = &((PKIPCR)KeGetPcr())->IDT[0x13];
1091
1092 /* Set it up */
1093 IdtEntry->Selector = KGDT_R0_CODE;
1094 IdtEntry->Offset = ((ULONG_PTR)KiTrap13 & 0xFFFF);
1095 IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap13 >> 16) & 0xFFFF;
1096 ((PKIDT_ACCESS)&IdtEntry->Access)->Dpl = 0;
1097 ((PKIDT_ACCESS)&IdtEntry->Access)->Present = 1;
1098 ((PKIDT_ACCESS)&IdtEntry->Access)->SegmentType = I386_INTERRUPT_GATE;
1099
1100 /* Enable XMMI exceptions */
1101 __writecr4(__readcr4() | CR4_XMMEXCPT);
1102 return 0;
1103 }
1104
1105 CODE_SEG("INIT")
1106 VOID
1107 NTAPI
KiI386PentiumLockErrataFixup(VOID)1108 KiI386PentiumLockErrataFixup(VOID)
1109 {
1110 KDESCRIPTOR IdtDescriptor = {0, 0, 0};
1111 PKIDTENTRY NewIdt, NewIdt2;
1112 PMMPTE PointerPte;
1113
1114 /* Allocate memory for a new IDT */
1115 NewIdt = ExAllocatePool(NonPagedPool, 2 * PAGE_SIZE);
1116
1117 /* Put everything after the first 7 entries on a new page */
1118 NewIdt2 = (PVOID)((ULONG_PTR)NewIdt + PAGE_SIZE - (7 * sizeof(KIDTENTRY)));
1119
1120 /* Disable interrupts */
1121 _disable();
1122
1123 /* Get the current IDT and copy it */
1124 __sidt(&IdtDescriptor.Limit);
1125 RtlCopyMemory(NewIdt2,
1126 (PVOID)IdtDescriptor.Base,
1127 IdtDescriptor.Limit + 1);
1128 IdtDescriptor.Base = (ULONG)NewIdt2;
1129
1130 /* Set the new IDT */
1131 __lidt(&IdtDescriptor.Limit);
1132 ((PKIPCR)KeGetPcr())->IDT = NewIdt2;
1133
1134 /* Restore interrupts */
1135 _enable();
1136
1137 /* Set the first 7 entries as read-only to produce a fault */
1138 PointerPte = MiAddressToPte(NewIdt);
1139 ASSERT(PointerPte->u.Hard.Write == 1);
1140 PointerPte->u.Hard.Write = 0;
1141 KeInvalidateTlbEntry(NewIdt);
1142 }
1143
1144 BOOLEAN
1145 NTAPI
KeInvalidateAllCaches(VOID)1146 KeInvalidateAllCaches(VOID)
1147 {
1148 /* Only supported on Pentium Pro and higher */
1149 if (KeI386CpuType < 6) return FALSE;
1150
1151 /* Invalidate all caches */
1152 __wbinvd();
1153 return TRUE;
1154 }
1155
1156 VOID
1157 NTAPI
KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,IN PKEXCEPTION_FRAME ExceptionFrame)1158 KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame,
1159 IN PKEXCEPTION_FRAME ExceptionFrame)
1160 {
1161 PKPRCB Prcb = KeGetCurrentPrcb();
1162
1163 //
1164 // Save full context
1165 //
1166 Prcb->ProcessorState.ContextFrame.ContextFlags = CONTEXT_FULL |
1167 CONTEXT_DEBUG_REGISTERS;
1168 KeTrapFrameToContext(TrapFrame, NULL, &Prcb->ProcessorState.ContextFrame);
1169
1170 //
1171 // Save control registers
1172 //
1173 KiSaveProcessorControlState(&Prcb->ProcessorState);
1174 }
1175
1176 CODE_SEG("INIT")
1177 BOOLEAN
1178 NTAPI
KiIsNpxErrataPresent(VOID)1179 KiIsNpxErrataPresent(VOID)
1180 {
1181 static double Value1 = 4195835.0, Value2 = 3145727.0;
1182 INT ErrataPresent;
1183 ULONG Cr0;
1184
1185 /* Interrupts have to be disabled here. */
1186 ASSERT(!(__readeflags() & EFLAGS_INTERRUPT_MASK));
1187
1188 /* Read CR0 and remove FPU flags */
1189 Cr0 = __readcr0();
1190 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1191
1192 /* Initialize FPU state */
1193 Ke386FnInit();
1194
1195 /* Multiply the magic values and divide, we should get the result back */
1196 #ifdef __GNUC__
1197 __asm__ __volatile__
1198 (
1199 "fldl %1\n\t"
1200 "fdivl %2\n\t"
1201 "fmull %2\n\t"
1202 "fldl %1\n\t"
1203 "fsubp\n\t"
1204 "fistpl %0\n\t"
1205 : "=m" (ErrataPresent)
1206 : "m" (Value1),
1207 "m" (Value2)
1208 );
1209 #else
1210 __asm
1211 {
1212 fld Value1
1213 fdiv Value2
1214 fmul Value2
1215 fld Value1
1216 fsubp st(1), st(0)
1217 fistp ErrataPresent
1218 };
1219 #endif
1220
1221 /* Restore CR0 */
1222 __writecr0(Cr0);
1223
1224 /* Return if there's an errata */
1225 return ErrataPresent != 0;
1226 }
1227
1228 VOID
1229 NTAPI
KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)1230 KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea)
1231 {
1232 ULONG EFlags, Cr0;
1233 PKTHREAD Thread, NpxThread;
1234 PFX_SAVE_AREA FxSaveArea;
1235
1236 /* Save volatiles and disable interrupts */
1237 EFlags = __readeflags();
1238 _disable();
1239
1240 /* Save the PCR and get the current thread */
1241 Thread = KeGetCurrentThread();
1242
1243 /* Check if we're already loaded */
1244 if (Thread->NpxState != NPX_STATE_LOADED)
1245 {
1246 /* If there's nothing to load, quit */
1247 if (!SaveArea)
1248 {
1249 /* Restore interrupt state and return */
1250 __writeeflags(EFlags);
1251 return;
1252 }
1253
1254 /* Need FXSR support for this */
1255 ASSERT(KeI386FxsrPresent == TRUE);
1256
1257 /* Check for sane CR0 */
1258 Cr0 = __readcr0();
1259 if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1260 {
1261 /* Mask out FPU flags */
1262 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1263 }
1264
1265 /* Get the NPX thread and check its FPU state */
1266 NpxThread = KeGetCurrentPrcb()->NpxThread;
1267 if ((NpxThread) && (NpxThread->NpxState == NPX_STATE_LOADED))
1268 {
1269 /* Get the FX frame and store the state there */
1270 FxSaveArea = KiGetThreadNpxArea(NpxThread);
1271 Ke386FxSave(FxSaveArea);
1272
1273 /* NPX thread has lost its state */
1274 NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1275 }
1276
1277 /* Now load NPX state from the NPX area */
1278 FxSaveArea = KiGetThreadNpxArea(Thread);
1279 Ke386FxStore(FxSaveArea);
1280 }
1281 else
1282 {
1283 /* Check for sane CR0 */
1284 Cr0 = __readcr0();
1285 if (Cr0 & (CR0_MP | CR0_TS | CR0_EM))
1286 {
1287 /* Mask out FPU flags */
1288 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM));
1289 }
1290
1291 /* Get FX frame */
1292 FxSaveArea = KiGetThreadNpxArea(Thread);
1293 Thread->NpxState = NPX_STATE_NOT_LOADED;
1294
1295 /* Save state if supported by CPU */
1296 if (KeI386FxsrPresent) Ke386FxSave(FxSaveArea);
1297 }
1298
1299 /* Now save the FN state wherever it was requested */
1300 if (SaveArea) Ke386FnSave(SaveArea);
1301
1302 /* Clear NPX thread */
1303 KeGetCurrentPrcb()->NpxThread = NULL;
1304
1305 /* Add the CR0 from the NPX frame */
1306 Cr0 |= NPX_STATE_NOT_LOADED;
1307 Cr0 |= FxSaveArea->Cr0NpxState;
1308 __writecr0(Cr0);
1309
1310 /* Restore interrupt state */
1311 __writeeflags(EFlags);
1312 }
1313
1314 /* PUBLIC FUNCTIONS **********************************************************/
1315
1316 /*
1317 * @implemented
1318 */
1319 VOID
1320 NTAPI
KiCoprocessorError(VOID)1321 KiCoprocessorError(VOID)
1322 {
1323 PFX_SAVE_AREA NpxArea;
1324
1325 /* Get the FPU area */
1326 NpxArea = KiGetThreadNpxArea(KeGetCurrentThread());
1327
1328 /* Set CR0_TS */
1329 NpxArea->Cr0NpxState = CR0_TS;
1330 __writecr0(__readcr0() | CR0_TS);
1331 }
1332
1333 /**
1334 * @brief
1335 * Saves the current floating point unit state
1336 * context of the current calling thread.
1337 *
1338 * @param[out] Save
1339 * The saved floating point context given to the
1340 * caller at the end of function's operations.
1341 * The structure whose data contents are opaque
1342 * to the calling thread.
1343 *
1344 * @return
1345 * Returns STATUS_SUCCESS if the function has
1346 * successfully completed its operations.
1347 * STATUS_INSUFFICIENT_RESOURCES is returned
1348 * if the function couldn't allocate memory
1349 * for FPU state information.
1350 *
1351 * @remarks
1352 * The function performs a FPU state save
1353 * in two ways. A normal FPU save (FNSAVE)
1354 * is performed if the system doesn't have
1355 * SSE/SSE2, otherwise the function performs
1356 * a save of FPU, MMX and SSE states save (FXSAVE).
1357 */
1358 #if defined(__clang__)
1359 __attribute__((__target__("sse")))
1360 #endif
1361 NTSTATUS
1362 NTAPI
KeSaveFloatingPointState(_Out_ PKFLOATING_SAVE Save)1363 KeSaveFloatingPointState(
1364 _Out_ PKFLOATING_SAVE Save)
1365 {
1366 PFLOATING_SAVE_CONTEXT FsContext;
1367 PFX_SAVE_AREA FxSaveAreaFrame;
1368 PKPRCB CurrentPrcb;
1369
1370 /* Sanity checks */
1371 ASSERT(Save);
1372 ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1373 ASSERT(KeI386NpxPresent);
1374
1375 /* Initialize the floating point context */
1376 FsContext = ExAllocatePoolWithTag(NonPagedPool,
1377 sizeof(FLOATING_SAVE_CONTEXT),
1378 TAG_FLOATING_POINT_CONTEXT);
1379 if (!FsContext)
1380 {
1381 /* Bail out if we failed */
1382 return STATUS_INSUFFICIENT_RESOURCES;
1383 }
1384
1385 /*
1386 * Allocate some memory pool for the buffer. The size
1387 * of this allocated buffer is the FX area plus the
1388 * alignment requirement needed for FXSAVE as a 16-byte
1389 * aligned pointer is compulsory in order to save the
1390 * FPU state.
1391 */
1392 FsContext->Buffer = ExAllocatePoolWithTag(NonPagedPool,
1393 sizeof(FX_SAVE_AREA) + FXSAVE_ALIGN,
1394 TAG_FLOATING_POINT_FX);
1395 if (!FsContext->Buffer)
1396 {
1397 /* Bail out if we failed */
1398 ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1399 return STATUS_INSUFFICIENT_RESOURCES;
1400 }
1401
1402 /*
1403 * Now cache the allocated buffer into the save area
1404 * and align the said area to a 16-byte boundary. Why
1405 * do we have to do this is because of ExAllocate function.
1406 * We gave the necessary alignment requirement in the pool
1407 * allocation size although the function will always return
1408 * a 8-byte aligned pointer. Aligning the given pointer directly
1409 * can cause issues when freeing it from memory afterwards. With
1410 * that said, we have to cache the buffer to the area so that we
1411 * do not touch or mess the allocated buffer any further.
1412 */
1413 FsContext->PfxSaveArea = ALIGN_UP_POINTER_BY(FsContext->Buffer, 16);
1414
1415 /* Disable interrupts and get the current processor control region */
1416 _disable();
1417 CurrentPrcb = KeGetCurrentPrcb();
1418
1419 /* Store the current thread to context */
1420 FsContext->CurrentThread = KeGetCurrentThread();
1421
1422 /*
1423 * Save the previous NPX thread state registers (aka Numeric
1424 * Processor eXtension) into the current context so that
1425 * we are informing the scheduler the current FPU state
1426 * belongs to this thread.
1427 */
1428 if (FsContext->CurrentThread != CurrentPrcb->NpxThread)
1429 {
1430 if ((CurrentPrcb->NpxThread != NULL) &&
1431 (CurrentPrcb->NpxThread->NpxState == NPX_STATE_LOADED))
1432 {
1433 /* Get the FX frame */
1434 FxSaveAreaFrame = KiGetThreadNpxArea(CurrentPrcb->NpxThread);
1435
1436 /* Save the FPU state */
1437 Ke386SaveFpuState(FxSaveAreaFrame);
1438
1439 /* NPX thread has lost its state */
1440 CurrentPrcb->NpxThread->NpxState = NPX_STATE_NOT_LOADED;
1441 FxSaveAreaFrame->NpxSavedCpu = 0;
1442 }
1443
1444 /* The new NPX thread is the current thread */
1445 CurrentPrcb->NpxThread = FsContext->CurrentThread;
1446 }
1447
1448 /* Perform the save */
1449 Ke386SaveFpuState(FsContext->PfxSaveArea);
1450
1451 /* Store the NPX IRQL */
1452 FsContext->OldNpxIrql = FsContext->CurrentThread->Header.NpxIrql;
1453
1454 /* Set the current IRQL to NPX */
1455 FsContext->CurrentThread->Header.NpxIrql = KeGetCurrentIrql();
1456
1457 /* Initialize the FPU */
1458 Ke386FnInit();
1459
1460 /* Enable interrupts back */
1461 _enable();
1462
1463 /* Give the saved FPU context to the caller */
1464 *((PVOID *) Save) = FsContext;
1465 return STATUS_SUCCESS;
1466 }
1467
1468 /**
1469 * @brief
1470 * Restores the original FPU state context that has
1471 * been saved by a API call of KeSaveFloatingPointState.
1472 * Callers are expected to restore the floating point
1473 * state by calling this function when they've finished
1474 * doing FPU operations.
1475 *
1476 * @param[in] Save
1477 * The saved floating point context that is to be given
1478 * to the function to restore the FPU state.
1479 *
1480 * @return
1481 * Returns STATUS_SUCCESS indicating the function
1482 * has fully completed its operations.
1483 */
1484 #if defined(__clang__)
1485 __attribute__((__target__("sse")))
1486 #endif
1487 NTSTATUS
1488 NTAPI
KeRestoreFloatingPointState(_In_ PKFLOATING_SAVE Save)1489 KeRestoreFloatingPointState(
1490 _In_ PKFLOATING_SAVE Save)
1491 {
1492 PFLOATING_SAVE_CONTEXT FsContext;
1493
1494 /* Sanity checks */
1495 ASSERT(Save);
1496 ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL);
1497 ASSERT(KeI386NpxPresent);
1498
1499 /* Cache the saved FS context */
1500 FsContext = *((PVOID *) Save);
1501
1502 /*
1503 * We have to restore the regular saved FPU
1504 * state. For this we must first do some
1505 * validation checks so that we are sure
1506 * ourselves the state context is saved
1507 * properly. Check if we are in the same
1508 * calling thread.
1509 */
1510 if (FsContext->CurrentThread != KeGetCurrentThread())
1511 {
1512 /*
1513 * This isn't the thread that saved the
1514 * FPU state context, crash the system!
1515 */
1516 KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1517 0x2,
1518 (ULONG_PTR)FsContext->CurrentThread,
1519 (ULONG_PTR)KeGetCurrentThread(),
1520 0);
1521 }
1522
1523 /* Are we under the same NPX interrupt level? */
1524 if (FsContext->CurrentThread->Header.NpxIrql != KeGetCurrentIrql())
1525 {
1526 /* The interrupt level has changed, crash the system! */
1527 KeBugCheckEx(INVALID_FLOATING_POINT_STATE,
1528 0x1,
1529 (ULONG_PTR)FsContext->CurrentThread->Header.NpxIrql,
1530 (ULONG_PTR)KeGetCurrentIrql(),
1531 0);
1532 }
1533
1534 /* Disable interrupts */
1535 _disable();
1536
1537 /*
1538 * The saved FPU state context is valid,
1539 * it's time to restore the state. First,
1540 * clear FPU exceptions now.
1541 */
1542 Ke386ClearFpExceptions();
1543
1544 /* Restore the state */
1545 Ke386RestoreFpuState(FsContext->PfxSaveArea);
1546
1547 /* Give the saved NPX IRQL back to the NPX thread */
1548 FsContext->CurrentThread->Header.NpxIrql = FsContext->OldNpxIrql;
1549
1550 /* Enable interrupts back */
1551 _enable();
1552
1553 /* We're done, free the allocated area and context */
1554 ExFreePoolWithTag(FsContext->Buffer, TAG_FLOATING_POINT_FX);
1555 ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT);
1556
1557 return STATUS_SUCCESS;
1558 }
1559
1560 /*
1561 * @implemented
1562 */
1563 ULONG
1564 NTAPI
KeGetRecommendedSharedDataAlignment(VOID)1565 KeGetRecommendedSharedDataAlignment(VOID)
1566 {
1567 /* Return the global variable */
1568 return KeLargestCacheLine;
1569 }
1570
1571 VOID
1572 NTAPI
KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext,IN PVOID Ignored1,IN PVOID Ignored2,IN PVOID Ignored3)1573 KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext,
1574 IN PVOID Ignored1,
1575 IN PVOID Ignored2,
1576 IN PVOID Ignored3)
1577 {
1578 /* Signal this packet as done */
1579 KiIpiSignalPacketDone(PacketContext);
1580
1581 /* Flush the TB for the Current CPU */
1582 KeFlushCurrentTb();
1583 }
1584
1585 /*
1586 * @implemented
1587 */
1588 VOID
1589 NTAPI
KeFlushEntireTb(IN BOOLEAN Invalid,IN BOOLEAN AllProcessors)1590 KeFlushEntireTb(IN BOOLEAN Invalid,
1591 IN BOOLEAN AllProcessors)
1592 {
1593 KIRQL OldIrql;
1594 #ifdef CONFIG_SMP
1595 KAFFINITY TargetAffinity;
1596 PKPRCB Prcb = KeGetCurrentPrcb();
1597 #endif
1598
1599 /* Raise the IRQL for the TB Flush */
1600 OldIrql = KeRaiseIrqlToSynchLevel();
1601
1602 #ifdef CONFIG_SMP
1603 /* FIXME: Use KiTbFlushTimeStamp to synchronize TB flush */
1604
1605 /* Get the current processor affinity, and exclude ourselves */
1606 TargetAffinity = KeActiveProcessors;
1607 TargetAffinity &= ~Prcb->SetMember;
1608
1609 /* Make sure this is MP */
1610 if (TargetAffinity)
1611 {
1612 /* Send an IPI TB flush to the other processors */
1613 KiIpiSendPacket(TargetAffinity,
1614 KiFlushTargetEntireTb,
1615 NULL,
1616 0,
1617 NULL);
1618 }
1619 #endif
1620
1621 /* Flush the TB for the Current CPU, and update the flush stamp */
1622 KeFlushCurrentTb();
1623
1624 #ifdef CONFIG_SMP
1625 /* If this is MP, wait for the other processors to finish */
1626 if (TargetAffinity)
1627 {
1628 /* Sanity check */
1629 ASSERT(Prcb == KeGetCurrentPrcb());
1630
1631 /* FIXME: TODO */
1632 ASSERTMSG("Not yet implemented\n", FALSE);
1633 }
1634 #endif
1635
1636 /* Update the flush stamp and return to original IRQL */
1637 InterlockedExchangeAdd(&KiTbFlushTimeStamp, 1);
1638 KeLowerIrql(OldIrql);
1639 }
1640
1641 /*
1642 * @implemented
1643 */
1644 VOID
1645 NTAPI
KeSetDmaIoCoherency(IN ULONG Coherency)1646 KeSetDmaIoCoherency(IN ULONG Coherency)
1647 {
1648 /* Save the coherency globally */
1649 KiDmaIoCoherency = Coherency;
1650 }
1651
1652 /*
1653 * @implemented
1654 */
1655 KAFFINITY
1656 NTAPI
KeQueryActiveProcessors(VOID)1657 KeQueryActiveProcessors(VOID)
1658 {
1659 PAGED_CODE();
1660
1661 /* Simply return the number of active processors */
1662 return KeActiveProcessors;
1663 }
1664
1665 /*
1666 * @implemented
1667 */
1668 VOID
1669 __cdecl
KeSaveStateForHibernate(IN PKPROCESSOR_STATE State)1670 KeSaveStateForHibernate(IN PKPROCESSOR_STATE State)
1671 {
1672 /* Capture the context */
1673 RtlCaptureContext(&State->ContextFrame);
1674
1675 /* Capture the control state */
1676 KiSaveProcessorControlState(State);
1677 }
1678