1 /* 2 * PROJECT: ReactOS Kernel 3 * LICENSE: GPL - See COPYING in the top level directory 4 * FILE: ntoskrnl/ke/i386/cpu.c 5 * PURPOSE: Routines for CPU-level support 6 * PROGRAMMERS: Alex Ionescu (alex.ionescu@reactos.org) 7 */ 8 9 /* INCLUDES *****************************************************************/ 10 11 #include <ntoskrnl.h> 12 #define NDEBUG 13 #include <debug.h> 14 15 #include <xmmintrin.h> 16 17 /* GLOBALS *******************************************************************/ 18 19 /* The TSS to use for Double Fault Traps (INT 0x9) */ 20 UCHAR KiDoubleFaultTSS[KTSS_IO_MAPS]; 21 22 /* The TSS to use for NMI Fault Traps (INT 0x2) */ 23 UCHAR KiNMITSS[KTSS_IO_MAPS]; 24 25 /* CPU Features and Flags */ 26 ULONG KeI386CpuType; 27 ULONG KeI386CpuStep; 28 ULONG KiFastSystemCallDisable = 0; 29 ULONG KeI386NpxPresent = TRUE; 30 ULONG KiMXCsrMask = 0; 31 ULONG MxcsrFeatureMask = 0; 32 ULONG KeI386XMMIPresent = 0; 33 ULONG KeI386FxsrPresent = 0; 34 ULONG KeI386MachineType; 35 ULONG Ke386Pae = FALSE; 36 ULONG Ke386NoExecute = FALSE; 37 ULONG KeLargestCacheLine = 0x40; 38 ULONG KeDcacheFlushCount = 0; 39 ULONG KeIcacheFlushCount = 0; 40 ULONG KiDmaIoCoherency = 0; 41 ULONG KePrefetchNTAGranularity = 32; 42 BOOLEAN KiI386PentiumLockErrataPresent; 43 BOOLEAN KiSMTProcessorsPresent; 44 45 /* The distance between SYSEXIT and IRETD return modes */ 46 UCHAR KiSystemCallExitAdjust; 47 48 /* The offset that was applied -- either 0 or the value above */ 49 UCHAR KiSystemCallExitAdjusted; 50 51 /* Whether the adjustment was already done once */ 52 BOOLEAN KiFastCallCopyDoneOnce; 53 54 /* Flush data */ 55 volatile LONG KiTbFlushTimeStamp; 56 57 /* CPU Signatures */ 58 static const CHAR CmpIntelID[] = "GenuineIntel"; 59 static const CHAR CmpAmdID[] = "AuthenticAMD"; 60 static const CHAR CmpCyrixID[] = "CyrixInstead"; 61 static const CHAR CmpTransmetaID[] = "GenuineTMx86"; 62 static const CHAR CmpCentaurID[] = "CentaurHauls"; 63 static const CHAR CmpRiseID[] = "RiseRiseRise"; 64 65 typedef union _CPU_SIGNATURE 66 { 67 struct 68 { 69 ULONG Step : 4; 70 ULONG Model : 4; 71 ULONG Family : 4; 72 ULONG Unused : 4; 73 ULONG ExtendedModel : 4; 74 ULONG ExtendedFamily : 8; 75 ULONG Unused2 : 4; 76 }; 77 ULONG AsULONG; 78 } CPU_SIGNATURE; 79 80 /* FX area alignment size */ 81 #define FXSAVE_ALIGN 15 82 83 /* SUPPORT ROUTINES FOR MSVC COMPATIBILITY ***********************************/ 84 85 /* NSC/Cyrix CPU configuration register index */ 86 #define CX86_CCR1 0xc1 87 88 /* NSC/Cyrix CPU indexed register access macros */ 89 static __inline 90 UCHAR 91 getCx86(UCHAR reg) 92 { 93 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg); 94 return READ_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23); 95 } 96 97 static __inline 98 void 99 setCx86(UCHAR reg, UCHAR data) 100 { 101 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x22, reg); 102 WRITE_PORT_UCHAR((PUCHAR)(ULONG_PTR)0x23, data); 103 } 104 105 /* FUNCTIONS *****************************************************************/ 106 107 CODE_SEG("INIT") 108 ULONG 109 NTAPI 110 KiGetCpuVendor(VOID) 111 { 112 PKPRCB Prcb = KeGetCurrentPrcb(); 113 CPU_INFO CpuInfo; 114 115 /* Get the Vendor ID */ 116 KiCpuId(&CpuInfo, 0); 117 118 /* Copy it to the PRCB and null-terminate it */ 119 *(ULONG*)&Prcb->VendorString[0] = CpuInfo.Ebx; 120 *(ULONG*)&Prcb->VendorString[4] = CpuInfo.Edx; 121 *(ULONG*)&Prcb->VendorString[8] = CpuInfo.Ecx; 122 Prcb->VendorString[12] = 0; 123 124 /* Now check the CPU Type */ 125 if (!strcmp(Prcb->VendorString, CmpIntelID)) 126 { 127 return CPU_INTEL; 128 } 129 else if (!strcmp(Prcb->VendorString, CmpAmdID)) 130 { 131 return CPU_AMD; 132 } 133 else if (!strcmp(Prcb->VendorString, CmpCyrixID)) 134 { 135 DPRINT1("Cyrix CPU support not fully tested!\n"); 136 return CPU_CYRIX; 137 } 138 else if (!strcmp(Prcb->VendorString, CmpTransmetaID)) 139 { 140 DPRINT1("Transmeta CPU support not fully tested!\n"); 141 return CPU_TRANSMETA; 142 } 143 else if (!strcmp(Prcb->VendorString, CmpCentaurID)) 144 { 145 DPRINT1("Centaur CPU support not fully tested!\n"); 146 return CPU_CENTAUR; 147 } 148 else if (!strcmp(Prcb->VendorString, CmpRiseID)) 149 { 150 DPRINT1("Rise CPU support not fully tested!\n"); 151 return CPU_RISE; 152 } 153 154 /* Unknown CPU */ 155 DPRINT1("%s CPU support not fully tested!\n", Prcb->VendorString); 156 return CPU_UNKNOWN; 157 } 158 159 CODE_SEG("INIT") 160 VOID 161 NTAPI 162 KiSetProcessorType(VOID) 163 { 164 CPU_INFO CpuInfo; 165 CPU_SIGNATURE CpuSignature; 166 BOOLEAN ExtendModel; 167 ULONG Stepping, Type; 168 169 /* Do CPUID 1 now */ 170 KiCpuId(&CpuInfo, 1); 171 172 /* 173 * Get the Stepping and Type. The stepping contains both the 174 * Model and the Step, while the Type contains the returned Family. 175 * 176 * For the stepping, we convert this: zzzzzzxy into this: x0y 177 */ 178 CpuSignature.AsULONG = CpuInfo.Eax; 179 Stepping = CpuSignature.Model; 180 ExtendModel = (CpuSignature.Family == 15); 181 #if ( (NTDDI_VERSION >= NTDDI_WINXPSP2) && (NTDDI_VERSION < NTDDI_WS03) ) || (NTDDI_VERSION >= NTDDI_WS03SP1) 182 if (CpuSignature.Family == 6) 183 { 184 ULONG Vendor = KiGetCpuVendor(); 185 ExtendModel |= (Vendor == CPU_INTEL); 186 #if (NTDDI_VERSION >= NTDDI_WIN8) 187 ExtendModel |= (Vendor == CPU_CENTAUR); 188 #endif 189 } 190 #endif 191 if (ExtendModel) 192 { 193 /* Add ExtendedModel to distinguish from non-extended values. */ 194 Stepping |= (CpuSignature.ExtendedModel << 4); 195 } 196 Stepping = (Stepping << 8) | CpuSignature.Step; 197 Type = CpuSignature.Family; 198 if (CpuSignature.Family == 15) 199 { 200 /* Add ExtendedFamily to distinguish from non-extended values. 201 * It must not be larger than 0xF0 to avoid overflow. */ 202 Type += min(CpuSignature.ExtendedFamily, 0xF0); 203 } 204 205 /* Save them in the PRCB */ 206 KeGetCurrentPrcb()->CpuID = TRUE; 207 KeGetCurrentPrcb()->CpuType = (UCHAR)Type; 208 KeGetCurrentPrcb()->CpuStep = (USHORT)Stepping; 209 } 210 211 CODE_SEG("INIT") 212 ULONG64 213 NTAPI 214 KiGetFeatureBits(VOID) 215 { 216 PKPRCB Prcb = KeGetCurrentPrcb(); 217 ULONG Vendor; 218 ULONG64 FeatureBits = KF_WORKING_PTE; 219 CPU_INFO CpuInfo, DummyCpuInfo; 220 UCHAR Ccr1; 221 BOOLEAN ExtendedCPUID = TRUE; 222 ULONG CpuFeatures = 0; 223 224 /* Get the Vendor ID */ 225 Vendor = KiGetCpuVendor(); 226 227 /* Make sure we got a valid vendor ID at least. */ 228 if (!Vendor) return FeatureBits; 229 230 /* Get the CPUID Info. Features are in Reg[3]. */ 231 KiCpuId(&CpuInfo, 1); 232 233 /* Set the initial APIC ID */ 234 Prcb->InitialApicId = (UCHAR)(CpuInfo.Ebx >> 24); 235 236 switch (Vendor) 237 { 238 /* Intel CPUs */ 239 case CPU_INTEL: 240 241 /* Check if it's a P6 */ 242 if (Prcb->CpuType == 6) 243 { 244 /* Perform the special sequence to get the MicroCode Signature */ 245 __writemsr(0x8B, 0); 246 KiCpuId(&DummyCpuInfo, 1); 247 Prcb->UpdateSignature.QuadPart = __readmsr(0x8B); 248 } 249 else if (Prcb->CpuType == 5) 250 { 251 /* On P5, enable workaround for the LOCK errata. */ 252 KiI386PentiumLockErrataPresent = TRUE; 253 } 254 255 /* Check for broken P6 with bad SMP PTE implementation */ 256 if (((CpuInfo.Eax & 0x0FF0) == 0x0610 && (CpuInfo.Eax & 0x000F) <= 0x9) || 257 ((CpuInfo.Eax & 0x0FF0) == 0x0630 && (CpuInfo.Eax & 0x000F) <= 0x4)) 258 { 259 /* Remove support for correct PTE support. */ 260 FeatureBits &= ~KF_WORKING_PTE; 261 } 262 263 /* Check if the CPU is too old to support SYSENTER */ 264 if ((Prcb->CpuType < 6) || 265 ((Prcb->CpuType == 6) && (Prcb->CpuStep < 0x0303))) 266 { 267 /* Disable it */ 268 CpuInfo.Edx &= ~0x800; 269 } 270 271 break; 272 273 /* AMD CPUs */ 274 case CPU_AMD: 275 276 /* Check if this is a K5 or K6. (family 5) */ 277 if ((CpuInfo.Eax & 0x0F00) == 0x0500) 278 { 279 /* Get the Model Number */ 280 switch (CpuInfo.Eax & 0x00F0) 281 { 282 /* Model 1: K5 - 5k86 (initial models) */ 283 case 0x0010: 284 285 /* Check if this is Step 0 or 1. They don't support PGE */ 286 if ((CpuInfo.Eax & 0x000F) > 0x03) break; 287 288 /* Model 0: K5 - SSA5 */ 289 case 0x0000: 290 291 /* Model 0 doesn't support PGE at all. */ 292 CpuInfo.Edx &= ~0x2000; 293 break; 294 295 /* Model 8: K6-2 */ 296 case 0x0080: 297 298 /* K6-2, Step 8 and over have support for MTRR. */ 299 if ((CpuInfo.Eax & 0x000F) >= 0x8) FeatureBits |= KF_AMDK6MTRR; 300 break; 301 302 /* Model 9: K6-III 303 Model D: K6-2+, K6-III+ */ 304 case 0x0090: 305 case 0x00D0: 306 307 FeatureBits |= KF_AMDK6MTRR; 308 break; 309 } 310 } 311 else if((CpuInfo.Eax & 0x0F00) < 0x0500) 312 { 313 /* Families below 5 don't support PGE, PSE or CMOV at all */ 314 CpuInfo.Edx &= ~(0x08 | 0x2000 | 0x8000); 315 316 /* They also don't support advanced CPUID functions. */ 317 ExtendedCPUID = FALSE; 318 } 319 320 break; 321 322 /* Cyrix CPUs */ 323 case CPU_CYRIX: 324 325 /* Workaround the "COMA" bug on 6x family of Cyrix CPUs */ 326 if (Prcb->CpuType == 6 && 327 Prcb->CpuStep <= 1) 328 { 329 /* Get CCR1 value */ 330 Ccr1 = getCx86(CX86_CCR1); 331 332 /* Enable the NO_LOCK bit */ 333 Ccr1 |= 0x10; 334 335 /* Set the new CCR1 value */ 336 setCx86(CX86_CCR1, Ccr1); 337 } 338 339 break; 340 341 /* Transmeta CPUs */ 342 case CPU_TRANSMETA: 343 344 /* Enable CMPXCHG8B if the family (>= 5), model and stepping (>= 4.2) support it */ 345 if ((CpuInfo.Eax & 0x0FFF) >= 0x0542) 346 { 347 __writemsr(0x80860004, __readmsr(0x80860004) | 0x0100); 348 FeatureBits |= KF_CMPXCHG8B; 349 } 350 351 break; 352 353 /* Centaur, IDT, Rise and VIA CPUs */ 354 case CPU_CENTAUR: 355 case CPU_RISE: 356 357 /* These CPUs don't report the presence of CMPXCHG8B through CPUID. 358 However, this feature exists and operates properly without any additional steps. */ 359 FeatureBits |= KF_CMPXCHG8B; 360 361 break; 362 } 363 364 /* Get some features from ECX */ 365 if (CpuInfo.Ecx & X86_FEATURE_SSE3) FeatureBits |= KF_SSE3; 366 if (CpuInfo.Ecx & X86_FEATURE_SSSE3) FeatureBits |= KF_SSSE3; 367 if (CpuInfo.Ecx & X86_FEATURE_SSE4_1) FeatureBits |= KF_SSE4_1; 368 if (CpuInfo.Ecx & X86_FEATURE_SSE4_2) FeatureBits |= KF_SSE4_2; 369 if (CpuInfo.Ecx & X86_FEATURE_XSAVE) FeatureBits |= KF_XSTATE; 370 if (CpuInfo.Ecx & X86_FEATURE_RDRAND) FeatureBits |= KF_RDRAND; 371 372 /* Set the current features */ 373 CpuFeatures = CpuInfo.Edx; 374 375 /* Convert all CPUID Feature bits into our format */ 376 if (CpuFeatures & X86_FEATURE_VME) FeatureBits |= KF_V86_VIS | KF_CR4; 377 if (CpuFeatures & X86_FEATURE_PSE) FeatureBits |= KF_LARGE_PAGE | KF_CR4; 378 if (CpuFeatures & X86_FEATURE_TSC) FeatureBits |= KF_RDTSC; 379 if (CpuFeatures & X86_FEATURE_CX8) FeatureBits |= KF_CMPXCHG8B; 380 if (CpuFeatures & X86_FEATURE_SYSCALL) FeatureBits |= KF_FAST_SYSCALL; 381 if (CpuFeatures & X86_FEATURE_MTTR) FeatureBits |= KF_MTRR; 382 if (CpuFeatures & X86_FEATURE_PGE) FeatureBits |= KF_GLOBAL_PAGE | KF_CR4; 383 if (CpuFeatures & X86_FEATURE_CMOV) FeatureBits |= KF_CMOV; 384 if (CpuFeatures & X86_FEATURE_PAT) FeatureBits |= KF_PAT; 385 if (CpuFeatures & X86_FEATURE_DS) FeatureBits |= KF_DTS; 386 if (CpuFeatures & X86_FEATURE_MMX) FeatureBits |= KF_MMX; 387 if (CpuFeatures & X86_FEATURE_FXSR) FeatureBits |= KF_FXSR; 388 if (CpuFeatures & X86_FEATURE_SSE) FeatureBits |= KF_XMMI; 389 if (CpuFeatures & X86_FEATURE_SSE2) FeatureBits |= KF_XMMI64; 390 391 /* Check if the CPU has hyper-threading */ 392 if (CpuFeatures & X86_FEATURE_HT) 393 { 394 /* Set the number of logical CPUs */ 395 Prcb->LogicalProcessorsPerPhysicalProcessor = (UCHAR)(CpuInfo.Ebx >> 16); 396 if (Prcb->LogicalProcessorsPerPhysicalProcessor > 1) 397 { 398 /* We're on dual-core */ 399 KiSMTProcessorsPresent = TRUE; 400 } 401 } 402 else 403 { 404 /* We only have a single CPU */ 405 Prcb->LogicalProcessorsPerPhysicalProcessor = 1; 406 } 407 408 /* Check if CPUID 0x80000000 is supported */ 409 if (ExtendedCPUID) 410 { 411 /* Do the call */ 412 KiCpuId(&CpuInfo, 0x80000000); 413 if ((CpuInfo.Eax & 0xffffff00) == 0x80000000) 414 { 415 /* Check if CPUID 0x80000001 is supported */ 416 if (CpuInfo.Eax >= 0x80000001) 417 { 418 /* Check which extended features are available. */ 419 KiCpuId(&CpuInfo, 0x80000001); 420 421 /* Check if NX-bit is supported */ 422 if (CpuInfo.Edx & X86_FEATURE_NX) FeatureBits |= KF_NX_BIT; 423 424 /* Now handle each features for each CPU Vendor */ 425 switch (Vendor) 426 { 427 case CPU_AMD: 428 case CPU_CENTAUR: 429 if (CpuInfo.Edx & 0x80000000) FeatureBits |= KF_3DNOW; 430 break; 431 } 432 } 433 } 434 } 435 436 /* Return the Feature Bits */ 437 return FeatureBits; 438 } 439 440 #if DBG 441 CODE_SEG("INIT") 442 VOID 443 KiReportCpuFeatures(VOID) 444 { 445 ULONG CpuFeatures = 0; 446 CPU_INFO CpuInfo; 447 448 if (KiGetCpuVendor()) 449 { 450 KiCpuId(&CpuInfo, 1); 451 CpuFeatures = CpuInfo.Edx; 452 } 453 454 DPRINT1("Supported CPU features: "); 455 456 #define print_kf_bit(kf_value) if (KeFeatureBits & kf_value) DbgPrint(#kf_value " ") 457 print_kf_bit(KF_V86_VIS); 458 print_kf_bit(KF_RDTSC); 459 print_kf_bit(KF_CR4); 460 print_kf_bit(KF_CMOV); 461 print_kf_bit(KF_GLOBAL_PAGE); 462 print_kf_bit(KF_LARGE_PAGE); 463 print_kf_bit(KF_MTRR); 464 print_kf_bit(KF_CMPXCHG8B); 465 print_kf_bit(KF_MMX); 466 print_kf_bit(KF_WORKING_PTE); 467 print_kf_bit(KF_PAT); 468 print_kf_bit(KF_FXSR); 469 print_kf_bit(KF_FAST_SYSCALL); 470 print_kf_bit(KF_XMMI); 471 print_kf_bit(KF_3DNOW); 472 print_kf_bit(KF_AMDK6MTRR); 473 print_kf_bit(KF_XMMI64); 474 print_kf_bit(KF_DTS); 475 print_kf_bit(KF_NX_BIT); 476 print_kf_bit(KF_NX_DISABLED); 477 print_kf_bit(KF_NX_ENABLED); 478 #undef print_kf_bit 479 480 #define print_cf(cpu_flag) if (CpuFeatures & cpu_flag) DbgPrint(#cpu_flag " ") 481 print_cf(X86_FEATURE_PAE); 482 print_cf(X86_FEATURE_APIC); 483 print_cf(X86_FEATURE_HT); 484 #undef print_cf 485 486 DbgPrint("\n"); 487 } 488 #endif // DBG 489 490 CODE_SEG("INIT") 491 VOID 492 NTAPI 493 KiGetCacheInformation(VOID) 494 { 495 PKIPCR Pcr = (PKIPCR)KeGetPcr(); 496 CPU_INFO CpuInfo; 497 ULONG CacheRequests = 0, i; 498 ULONG CurrentRegister; 499 UCHAR RegisterByte, Associativity = 0; 500 ULONG Size, CacheLine = 64, CurrentSize = 0; 501 BOOLEAN FirstPass = TRUE; 502 503 /* Set default L2 size */ 504 Pcr->SecondLevelCacheSize = 0; 505 506 /* Check the Vendor ID */ 507 switch (KiGetCpuVendor()) 508 { 509 /* Handle Intel case */ 510 case CPU_INTEL: 511 512 /* Check if we support CPUID 2 */ 513 KiCpuId(&CpuInfo, 0); 514 if (CpuInfo.Eax >= 2) 515 { 516 /* We need to loop for the number of times CPUID will tell us to */ 517 do 518 { 519 /* Do the CPUID call */ 520 KiCpuId(&CpuInfo, 2); 521 522 /* Check if it was the first call */ 523 if (FirstPass) 524 { 525 /* 526 * The number of times to loop is the first byte. Read 527 * it and then destroy it so we don't get confused. 528 */ 529 CacheRequests = CpuInfo.Eax & 0xFF; 530 CpuInfo.Eax &= 0xFFFFFF00; 531 532 /* Don't go over this again */ 533 FirstPass = FALSE; 534 } 535 536 /* Loop all 4 registers */ 537 for (i = 0; i < 4; i++) 538 { 539 /* Get the current register */ 540 CurrentRegister = CpuInfo.AsUINT32[i]; 541 542 /* 543 * If the upper bit is set, then this register should 544 * be skipped. 545 */ 546 if (CurrentRegister & 0x80000000) continue; 547 548 /* Keep looping for every byte inside this register */ 549 while (CurrentRegister) 550 { 551 /* Read a byte, skip a byte. */ 552 RegisterByte = (UCHAR)(CurrentRegister & 0xFF); 553 CurrentRegister >>= 8; 554 if (!RegisterByte) continue; 555 556 Size = 0; 557 switch (RegisterByte) 558 { 559 case 0x06: 560 case 0x08: 561 KePrefetchNTAGranularity = 32; 562 break; 563 case 0x09: 564 KePrefetchNTAGranularity = 64; 565 break; 566 case 0x0a: 567 case 0x0c: 568 KePrefetchNTAGranularity = 32; 569 break; 570 case 0x0d: 571 case 0x0e: 572 KePrefetchNTAGranularity = 64; 573 break; 574 case 0x1d: 575 Size = 128 * 1024; 576 Associativity = 2; 577 break; 578 case 0x21: 579 Size = 256 * 1024; 580 Associativity = 8; 581 break; 582 case 0x24: 583 Size = 1024 * 1024; 584 Associativity = 16; 585 break; 586 case 0x2c: 587 case 0x30: 588 KePrefetchNTAGranularity = 64; 589 break; 590 case 0x41: 591 case 0x42: 592 case 0x43: 593 case 0x44: 594 case 0x45: 595 Size = (1 << (RegisterByte - 0x41)) * 128 * 1024; 596 Associativity = 4; 597 break; 598 case 0x48: 599 Size = 3 * 1024 * 1024; 600 Associativity = 12; 601 break; 602 case 0x49: 603 Size = 4 * 1024 * 1024; 604 Associativity = 16; 605 break; 606 case 0x4e: 607 Size = 6 * 1024 * 1024; 608 Associativity = 24; 609 break; 610 case 0x60: 611 case 0x66: 612 case 0x67: 613 case 0x68: 614 KePrefetchNTAGranularity = 64; 615 break; 616 case 0x78: 617 Size = 1024 * 1024; 618 Associativity = 4; 619 break; 620 case 0x79: 621 case 0x7a: 622 case 0x7b: 623 case 0x7c: 624 case 0x7d: 625 Size = (1 << (RegisterByte - 0x79)) * 128 * 1024; 626 Associativity = 8; 627 break; 628 case 0x7f: 629 Size = 512 * 1024; 630 Associativity = 2; 631 break; 632 case 0x80: 633 Size = 512 * 1024; 634 Associativity = 8; 635 break; 636 case 0x82: 637 case 0x83: 638 case 0x84: 639 case 0x85: 640 Size = (1 << (RegisterByte - 0x82)) * 256 * 1024; 641 Associativity = 8; 642 break; 643 case 0x86: 644 Size = 512 * 1024; 645 Associativity = 4; 646 break; 647 case 0x87: 648 Size = 1024 * 1024; 649 Associativity = 8; 650 break; 651 case 0xf0: 652 KePrefetchNTAGranularity = 64; 653 break; 654 case 0xf1: 655 KePrefetchNTAGranularity = 128; 656 break; 657 } 658 if (Size && (Size / Associativity) > CurrentSize) 659 { 660 /* Set the L2 Cache Size and Associativity */ 661 CurrentSize = Size / Associativity; 662 Pcr->SecondLevelCacheSize = Size; 663 Pcr->SecondLevelCacheAssociativity = Associativity; 664 } 665 } 666 } 667 } while (--CacheRequests); 668 } 669 break; 670 671 case CPU_AMD: 672 673 /* Check if we support CPUID 0x80000005 */ 674 KiCpuId(&CpuInfo, 0x80000000); 675 if (CpuInfo.Eax >= 0x80000005) 676 { 677 /* Get L1 size first */ 678 KiCpuId(&CpuInfo, 0x80000005); 679 KePrefetchNTAGranularity = CpuInfo.Ecx & 0xFF; 680 681 /* Check if we support CPUID 0x80000006 */ 682 KiCpuId(&CpuInfo, 0x80000000); 683 if (CpuInfo.Eax >= 0x80000006) 684 { 685 /* Get 2nd level cache and tlb size */ 686 KiCpuId(&CpuInfo, 0x80000006); 687 688 /* Cache line size */ 689 CacheLine = CpuInfo.Ecx & 0xFF; 690 691 /* Hardcode associativity */ 692 RegisterByte = (CpuInfo.Ecx >> 12) & 0xFF; 693 switch (RegisterByte) 694 { 695 case 2: 696 Associativity = 2; 697 break; 698 699 case 4: 700 Associativity = 4; 701 break; 702 703 case 6: 704 Associativity = 8; 705 break; 706 707 case 8: 708 case 15: 709 Associativity = 16; 710 break; 711 712 default: 713 Associativity = 1; 714 break; 715 } 716 717 /* Compute size */ 718 Size = (CpuInfo.Ecx >> 16) << 10; 719 720 /* Hack for Model 6, Steping 300 */ 721 if ((KeGetCurrentPrcb()->CpuType == 6) && 722 (KeGetCurrentPrcb()->CpuStep == 0x300)) 723 { 724 /* Stick 64K in there */ 725 Size = 64 * 1024; 726 } 727 728 /* Set the L2 Cache Size and associativity */ 729 Pcr->SecondLevelCacheSize = Size; 730 Pcr->SecondLevelCacheAssociativity = Associativity; 731 } 732 } 733 break; 734 735 case CPU_CYRIX: 736 case CPU_TRANSMETA: 737 case CPU_CENTAUR: 738 case CPU_RISE: 739 740 /* FIXME */ 741 break; 742 } 743 744 /* Set the cache line */ 745 if (CacheLine > KeLargestCacheLine) KeLargestCacheLine = CacheLine; 746 DPRINT1("Prefetch Cache: %lu bytes\tL2 Cache: %lu bytes\tL2 Cache Line: %lu bytes\tL2 Cache Associativity: %lu\n", 747 KePrefetchNTAGranularity, 748 Pcr->SecondLevelCacheSize, 749 KeLargestCacheLine, 750 Pcr->SecondLevelCacheAssociativity); 751 } 752 753 CODE_SEG("INIT") 754 VOID 755 NTAPI 756 KiSetCR0Bits(VOID) 757 { 758 ULONG Cr0; 759 760 /* Save current CR0 */ 761 Cr0 = __readcr0(); 762 763 /* If this is a 486, enable Write-Protection */ 764 if (KeGetCurrentPrcb()->CpuType > 3) Cr0 |= CR0_WP; 765 766 /* Set new Cr0 */ 767 __writecr0(Cr0); 768 } 769 770 CODE_SEG("INIT") 771 VOID 772 NTAPI 773 KiInitializeTSS2(IN PKTSS Tss, 774 IN PKGDTENTRY TssEntry OPTIONAL) 775 { 776 PUCHAR p; 777 778 /* Make sure the GDT Entry is valid */ 779 if (TssEntry) 780 { 781 /* Set the Limit */ 782 TssEntry->LimitLow = sizeof(KTSS) - 1; 783 TssEntry->HighWord.Bits.LimitHi = 0; 784 } 785 786 /* Now clear the I/O Map */ 787 ASSERT(IOPM_COUNT == 1); 788 RtlFillMemory(Tss->IoMaps[0].IoMap, IOPM_FULL_SIZE, 0xFF); 789 790 /* Initialize Interrupt Direction Maps */ 791 p = (PUCHAR)(Tss->IoMaps[0].DirectionMap); 792 RtlZeroMemory(p, IOPM_DIRECTION_MAP_SIZE); 793 794 /* Add DPMI support for interrupts */ 795 p[0] = 4; 796 p[3] = 0x18; 797 p[4] = 0x18; 798 799 /* Initialize the default Interrupt Direction Map */ 800 p = Tss->IntDirectionMap; 801 RtlZeroMemory(Tss->IntDirectionMap, IOPM_DIRECTION_MAP_SIZE); 802 803 /* Add DPMI support */ 804 p[0] = 4; 805 p[3] = 0x18; 806 p[4] = 0x18; 807 } 808 809 VOID 810 NTAPI 811 KiInitializeTSS(IN PKTSS Tss) 812 { 813 /* Set an invalid map base */ 814 Tss->IoMapBase = KiComputeIopmOffset(IO_ACCESS_MAP_NONE); 815 816 /* Disable traps during Task Switches */ 817 Tss->Flags = 0; 818 819 /* Set LDT and Ring 0 SS */ 820 Tss->LDT = 0; 821 Tss->Ss0 = KGDT_R0_DATA; 822 } 823 824 CODE_SEG("INIT") 825 VOID 826 FASTCALL 827 Ki386InitializeTss(IN PKTSS Tss, 828 IN PKIDTENTRY Idt, 829 IN PKGDTENTRY Gdt) 830 { 831 PKGDTENTRY TssEntry, TaskGateEntry; 832 833 /* Initialize the boot TSS. */ 834 TssEntry = &Gdt[KGDT_TSS / sizeof(KGDTENTRY)]; 835 TssEntry->HighWord.Bits.Type = I386_TSS; 836 TssEntry->HighWord.Bits.Pres = 1; 837 TssEntry->HighWord.Bits.Dpl = 0; 838 KiInitializeTSS2(Tss, TssEntry); 839 KiInitializeTSS(Tss); 840 841 /* Load the task register */ 842 Ke386SetTr(KGDT_TSS); 843 844 /* Setup the Task Gate for Double Fault Traps */ 845 TaskGateEntry = (PKGDTENTRY)&Idt[8]; 846 TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE; 847 TaskGateEntry->HighWord.Bits.Pres = 1; 848 TaskGateEntry->HighWord.Bits.Dpl = 0; 849 ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_DF_TSS; 850 851 /* Initialize the TSS used for handling double faults. */ 852 Tss = (PKTSS)KiDoubleFaultTSS; 853 KiInitializeTSS(Tss); 854 Tss->CR3 = __readcr3(); 855 Tss->Esp0 = KiDoubleFaultStack; 856 Tss->Esp = KiDoubleFaultStack; 857 Tss->Eip = PtrToUlong(KiTrap08); 858 Tss->Cs = KGDT_R0_CODE; 859 Tss->Fs = KGDT_R0_PCR; 860 Tss->Ss = Ke386GetSs(); 861 Tss->Es = KGDT_R3_DATA | RPL_MASK; 862 Tss->Ds = KGDT_R3_DATA | RPL_MASK; 863 864 /* Setup the Double Trap TSS entry in the GDT */ 865 TssEntry = &Gdt[KGDT_DF_TSS / sizeof(KGDTENTRY)]; 866 TssEntry->HighWord.Bits.Type = I386_TSS; 867 TssEntry->HighWord.Bits.Pres = 1; 868 TssEntry->HighWord.Bits.Dpl = 0; 869 TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF); 870 TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16); 871 TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24); 872 TssEntry->LimitLow = KTSS_IO_MAPS; 873 874 /* Now setup the NMI Task Gate */ 875 TaskGateEntry = (PKGDTENTRY)&Idt[2]; 876 TaskGateEntry->HighWord.Bits.Type = I386_TASK_GATE; 877 TaskGateEntry->HighWord.Bits.Pres = 1; 878 TaskGateEntry->HighWord.Bits.Dpl = 0; 879 ((PKIDTENTRY)TaskGateEntry)->Selector = KGDT_NMI_TSS; 880 881 /* Initialize the actual TSS */ 882 Tss = (PKTSS)KiNMITSS; 883 KiInitializeTSS(Tss); 884 Tss->CR3 = __readcr3(); 885 Tss->Esp0 = KiDoubleFaultStack; 886 Tss->Esp = KiDoubleFaultStack; 887 Tss->Eip = PtrToUlong(KiTrap02); 888 Tss->Cs = KGDT_R0_CODE; 889 Tss->Fs = KGDT_R0_PCR; 890 Tss->Ss = Ke386GetSs(); 891 Tss->Es = KGDT_R3_DATA | RPL_MASK; 892 Tss->Ds = KGDT_R3_DATA | RPL_MASK; 893 894 /* And its associated TSS Entry */ 895 TssEntry = &Gdt[KGDT_NMI_TSS / sizeof(KGDTENTRY)]; 896 TssEntry->HighWord.Bits.Type = I386_TSS; 897 TssEntry->HighWord.Bits.Pres = 1; 898 TssEntry->HighWord.Bits.Dpl = 0; 899 TssEntry->BaseLow = (USHORT)((ULONG_PTR)Tss & 0xFFFF); 900 TssEntry->HighWord.Bytes.BaseMid = (UCHAR)((ULONG_PTR)Tss >> 16); 901 TssEntry->HighWord.Bytes.BaseHi = (UCHAR)((ULONG_PTR)Tss >> 24); 902 TssEntry->LimitLow = KTSS_IO_MAPS; 903 } 904 905 VOID 906 NTAPI 907 KeFlushCurrentTb(VOID) 908 { 909 910 #if !defined(_GLOBAL_PAGES_ARE_AWESOME_) 911 912 /* Flush the TLB by resetting CR3 */ 913 __writecr3(__readcr3()); 914 915 #else 916 917 /* Check if global pages are enabled */ 918 if (KeFeatureBits & KF_GLOBAL_PAGE) 919 { 920 ULONG Cr4; 921 922 /* Disable PGE (Note: may not have been enabled yet) */ 923 Cr4 = __readcr4(); 924 __writecr4(Cr4 & ~CR4_PGE); 925 926 /* Flush everything */ 927 __writecr3(__readcr3()); 928 929 /* Re-enable PGE */ 930 __writecr4(Cr4); 931 } 932 else 933 { 934 /* No global pages, resetting CR3 is enough */ 935 __writecr3(__readcr3()); 936 } 937 938 #endif 939 940 } 941 942 VOID 943 NTAPI 944 KiRestoreProcessorControlState(PKPROCESSOR_STATE ProcessorState) 945 { 946 PKGDTENTRY TssEntry; 947 948 // 949 // Restore the CR registers 950 // 951 __writecr0(ProcessorState->SpecialRegisters.Cr0); 952 Ke386SetCr2(ProcessorState->SpecialRegisters.Cr2); 953 __writecr3(ProcessorState->SpecialRegisters.Cr3); 954 if (KeFeatureBits & KF_CR4) __writecr4(ProcessorState->SpecialRegisters.Cr4); 955 956 // 957 // Restore the DR registers 958 // 959 __writedr(0, ProcessorState->SpecialRegisters.KernelDr0); 960 __writedr(1, ProcessorState->SpecialRegisters.KernelDr1); 961 __writedr(2, ProcessorState->SpecialRegisters.KernelDr2); 962 __writedr(3, ProcessorState->SpecialRegisters.KernelDr3); 963 __writedr(6, ProcessorState->SpecialRegisters.KernelDr6); 964 __writedr(7, ProcessorState->SpecialRegisters.KernelDr7); 965 966 // 967 // Restore GDT and IDT 968 // 969 Ke386SetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit); 970 __lidt(&ProcessorState->SpecialRegisters.Idtr.Limit); 971 972 // 973 // Clear the busy flag so we don't crash if we reload the same selector 974 // 975 TssEntry = (PKGDTENTRY)(ProcessorState->SpecialRegisters.Gdtr.Base + 976 ProcessorState->SpecialRegisters.Tr); 977 TssEntry->HighWord.Bytes.Flags1 &= ~0x2; 978 979 // 980 // Restore TSS and LDT 981 // 982 Ke386SetTr(ProcessorState->SpecialRegisters.Tr); 983 Ke386SetLocalDescriptorTable(ProcessorState->SpecialRegisters.Ldtr); 984 } 985 986 VOID 987 NTAPI 988 KiSaveProcessorControlState(OUT PKPROCESSOR_STATE ProcessorState) 989 { 990 /* Save the CR registers */ 991 ProcessorState->SpecialRegisters.Cr0 = __readcr0(); 992 ProcessorState->SpecialRegisters.Cr2 = __readcr2(); 993 ProcessorState->SpecialRegisters.Cr3 = __readcr3(); 994 ProcessorState->SpecialRegisters.Cr4 = (KeFeatureBits & KF_CR4) ? 995 __readcr4() : 0; 996 997 /* Save the DR registers */ 998 ProcessorState->SpecialRegisters.KernelDr0 = __readdr(0); 999 ProcessorState->SpecialRegisters.KernelDr1 = __readdr(1); 1000 ProcessorState->SpecialRegisters.KernelDr2 = __readdr(2); 1001 ProcessorState->SpecialRegisters.KernelDr3 = __readdr(3); 1002 ProcessorState->SpecialRegisters.KernelDr6 = __readdr(6); 1003 ProcessorState->SpecialRegisters.KernelDr7 = __readdr(7); 1004 __writedr(7, 0); 1005 1006 /* Save GDT, IDT, LDT and TSS */ 1007 Ke386GetGlobalDescriptorTable(&ProcessorState->SpecialRegisters.Gdtr.Limit); 1008 __sidt(&ProcessorState->SpecialRegisters.Idtr.Limit); 1009 ProcessorState->SpecialRegisters.Tr = Ke386GetTr(); 1010 Ke386GetLocalDescriptorTable(&ProcessorState->SpecialRegisters.Ldtr); 1011 } 1012 1013 CODE_SEG("INIT") 1014 VOID 1015 NTAPI 1016 KiInitializeMachineType(VOID) 1017 { 1018 /* Set the Machine Type we got from NTLDR */ 1019 KeI386MachineType = KeLoaderBlock->u.I386.MachineType & 0x000FF; 1020 } 1021 1022 CODE_SEG("INIT") 1023 ULONG_PTR 1024 NTAPI 1025 KiLoadFastSyscallMachineSpecificRegisters(IN ULONG_PTR Context) 1026 { 1027 /* Set CS and ESP */ 1028 __writemsr(0x174, KGDT_R0_CODE); 1029 __writemsr(0x175, (ULONG_PTR)KeGetCurrentPrcb()->DpcStack); 1030 1031 /* Set LSTAR */ 1032 __writemsr(0x176, (ULONG_PTR)KiFastCallEntry); 1033 return 0; 1034 } 1035 1036 CODE_SEG("INIT") 1037 VOID 1038 NTAPI 1039 KiRestoreFastSyscallReturnState(VOID) 1040 { 1041 /* Check if the CPU Supports fast system call */ 1042 if (KeFeatureBits & KF_FAST_SYSCALL) 1043 { 1044 /* Check if it has been disabled */ 1045 if (KiFastSystemCallDisable) 1046 { 1047 /* Disable fast system call */ 1048 KeFeatureBits &= ~KF_FAST_SYSCALL; 1049 KiFastCallExitHandler = KiSystemCallTrapReturn; 1050 DPRINT1("Support for SYSENTER disabled.\n"); 1051 } 1052 else 1053 { 1054 /* Do an IPI to enable it */ 1055 KeIpiGenericCall(KiLoadFastSyscallMachineSpecificRegisters, 0); 1056 1057 /* It's enabled, so use the proper exit stub */ 1058 KiFastCallExitHandler = KiSystemCallSysExitReturn; 1059 DPRINT("Support for SYSENTER detected.\n"); 1060 } 1061 } 1062 else 1063 { 1064 /* Use the IRET handler */ 1065 KiFastCallExitHandler = KiSystemCallTrapReturn; 1066 DPRINT1("No support for SYSENTER detected.\n"); 1067 } 1068 } 1069 1070 CODE_SEG("INIT") 1071 ULONG_PTR 1072 NTAPI 1073 Ki386EnableDE(IN ULONG_PTR Context) 1074 { 1075 /* Enable DE */ 1076 __writecr4(__readcr4() | CR4_DE); 1077 return 0; 1078 } 1079 1080 CODE_SEG("INIT") 1081 ULONG_PTR 1082 NTAPI 1083 Ki386EnableFxsr(IN ULONG_PTR Context) 1084 { 1085 /* Enable FXSR */ 1086 __writecr4(__readcr4() | CR4_FXSR); 1087 return 0; 1088 } 1089 1090 CODE_SEG("INIT") 1091 ULONG_PTR 1092 NTAPI 1093 Ki386EnableXMMIExceptions(IN ULONG_PTR Context) 1094 { 1095 PKIDTENTRY IdtEntry; 1096 1097 /* Get the IDT Entry for Interrupt 0x13 */ 1098 IdtEntry = &((PKIPCR)KeGetPcr())->IDT[0x13]; 1099 1100 /* Set it up */ 1101 IdtEntry->Selector = KGDT_R0_CODE; 1102 IdtEntry->Offset = ((ULONG_PTR)KiTrap13 & 0xFFFF); 1103 IdtEntry->ExtendedOffset = ((ULONG_PTR)KiTrap13 >> 16) & 0xFFFF; 1104 ((PKIDT_ACCESS)&IdtEntry->Access)->Dpl = 0; 1105 ((PKIDT_ACCESS)&IdtEntry->Access)->Present = 1; 1106 ((PKIDT_ACCESS)&IdtEntry->Access)->SegmentType = I386_INTERRUPT_GATE; 1107 1108 /* Enable XMMI exceptions */ 1109 __writecr4(__readcr4() | CR4_XMMEXCPT); 1110 return 0; 1111 } 1112 1113 CODE_SEG("INIT") 1114 VOID 1115 NTAPI 1116 KiI386PentiumLockErrataFixup(VOID) 1117 { 1118 KDESCRIPTOR IdtDescriptor = {0, 0, 0}; 1119 PKIDTENTRY NewIdt, NewIdt2; 1120 PMMPTE PointerPte; 1121 1122 /* Allocate memory for a new IDT */ 1123 NewIdt = ExAllocatePool(NonPagedPool, 2 * PAGE_SIZE); 1124 1125 /* Put everything after the first 7 entries on a new page */ 1126 NewIdt2 = (PVOID)((ULONG_PTR)NewIdt + PAGE_SIZE - (7 * sizeof(KIDTENTRY))); 1127 1128 /* Disable interrupts */ 1129 _disable(); 1130 1131 /* Get the current IDT and copy it */ 1132 __sidt(&IdtDescriptor.Limit); 1133 RtlCopyMemory(NewIdt2, 1134 (PVOID)IdtDescriptor.Base, 1135 IdtDescriptor.Limit + 1); 1136 IdtDescriptor.Base = (ULONG)NewIdt2; 1137 1138 /* Set the new IDT */ 1139 __lidt(&IdtDescriptor.Limit); 1140 ((PKIPCR)KeGetPcr())->IDT = NewIdt2; 1141 1142 /* Restore interrupts */ 1143 _enable(); 1144 1145 /* Set the first 7 entries as read-only to produce a fault */ 1146 PointerPte = MiAddressToPte(NewIdt); 1147 ASSERT(PointerPte->u.Hard.Write == 1); 1148 PointerPte->u.Hard.Write = 0; 1149 KeInvalidateTlbEntry(NewIdt); 1150 } 1151 1152 BOOLEAN 1153 NTAPI 1154 KeInvalidateAllCaches(VOID) 1155 { 1156 /* Only supported on Pentium Pro and higher */ 1157 if (KeI386CpuType < 6) return FALSE; 1158 1159 /* Invalidate all caches */ 1160 __wbinvd(); 1161 return TRUE; 1162 } 1163 1164 VOID 1165 NTAPI 1166 KiSaveProcessorState(IN PKTRAP_FRAME TrapFrame, 1167 IN PKEXCEPTION_FRAME ExceptionFrame) 1168 { 1169 PKPRCB Prcb = KeGetCurrentPrcb(); 1170 1171 // 1172 // Save full context 1173 // 1174 Prcb->ProcessorState.ContextFrame.ContextFlags = CONTEXT_FULL | 1175 CONTEXT_DEBUG_REGISTERS; 1176 KeTrapFrameToContext(TrapFrame, NULL, &Prcb->ProcessorState.ContextFrame); 1177 1178 // 1179 // Save control registers 1180 // 1181 KiSaveProcessorControlState(&Prcb->ProcessorState); 1182 } 1183 1184 CODE_SEG("INIT") 1185 BOOLEAN 1186 NTAPI 1187 KiIsNpxErrataPresent(VOID) 1188 { 1189 static double Value1 = 4195835.0, Value2 = 3145727.0; 1190 INT ErrataPresent; 1191 ULONG Cr0; 1192 1193 /* Interrupts have to be disabled here. */ 1194 ASSERT(!(__readeflags() & EFLAGS_INTERRUPT_MASK)); 1195 1196 /* Read CR0 and remove FPU flags */ 1197 Cr0 = __readcr0(); 1198 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM)); 1199 1200 /* Initialize FPU state */ 1201 Ke386FnInit(); 1202 1203 /* Multiply the magic values and divide, we should get the result back */ 1204 #ifdef __GNUC__ 1205 __asm__ __volatile__ 1206 ( 1207 "fldl %1\n\t" 1208 "fdivl %2\n\t" 1209 "fmull %2\n\t" 1210 "fldl %1\n\t" 1211 "fsubp\n\t" 1212 "fistpl %0\n\t" 1213 : "=m" (ErrataPresent) 1214 : "m" (Value1), 1215 "m" (Value2) 1216 ); 1217 #else 1218 __asm 1219 { 1220 fld Value1 1221 fdiv Value2 1222 fmul Value2 1223 fld Value1 1224 fsubp st(1), st(0) 1225 fistp ErrataPresent 1226 }; 1227 #endif 1228 1229 /* Restore CR0 */ 1230 __writecr0(Cr0); 1231 1232 /* Return if there's an errata */ 1233 return ErrataPresent != 0; 1234 } 1235 1236 VOID 1237 NTAPI 1238 KiFlushNPXState(IN PFLOATING_SAVE_AREA SaveArea) 1239 { 1240 ULONG EFlags, Cr0; 1241 PKTHREAD Thread, NpxThread; 1242 PFX_SAVE_AREA FxSaveArea; 1243 1244 /* Save volatiles and disable interrupts */ 1245 EFlags = __readeflags(); 1246 _disable(); 1247 1248 /* Save the PCR and get the current thread */ 1249 Thread = KeGetCurrentThread(); 1250 1251 /* Check if we're already loaded */ 1252 if (Thread->NpxState != NPX_STATE_LOADED) 1253 { 1254 /* If there's nothing to load, quit */ 1255 if (!SaveArea) 1256 { 1257 /* Restore interrupt state and return */ 1258 __writeeflags(EFlags); 1259 return; 1260 } 1261 1262 /* Need FXSR support for this */ 1263 ASSERT(KeI386FxsrPresent == TRUE); 1264 1265 /* Check for sane CR0 */ 1266 Cr0 = __readcr0(); 1267 if (Cr0 & (CR0_MP | CR0_TS | CR0_EM)) 1268 { 1269 /* Mask out FPU flags */ 1270 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM)); 1271 } 1272 1273 /* Get the NPX thread and check its FPU state */ 1274 NpxThread = KeGetCurrentPrcb()->NpxThread; 1275 if ((NpxThread) && (NpxThread->NpxState == NPX_STATE_LOADED)) 1276 { 1277 /* Get the FX frame and store the state there */ 1278 FxSaveArea = KiGetThreadNpxArea(NpxThread); 1279 Ke386FxSave(FxSaveArea); 1280 1281 /* NPX thread has lost its state */ 1282 NpxThread->NpxState = NPX_STATE_NOT_LOADED; 1283 } 1284 1285 /* Now load NPX state from the NPX area */ 1286 FxSaveArea = KiGetThreadNpxArea(Thread); 1287 Ke386FxStore(FxSaveArea); 1288 } 1289 else 1290 { 1291 /* Check for sane CR0 */ 1292 Cr0 = __readcr0(); 1293 if (Cr0 & (CR0_MP | CR0_TS | CR0_EM)) 1294 { 1295 /* Mask out FPU flags */ 1296 __writecr0(Cr0 & ~(CR0_MP | CR0_TS | CR0_EM)); 1297 } 1298 1299 /* Get FX frame */ 1300 FxSaveArea = KiGetThreadNpxArea(Thread); 1301 Thread->NpxState = NPX_STATE_NOT_LOADED; 1302 1303 /* Save state if supported by CPU */ 1304 if (KeI386FxsrPresent) Ke386FxSave(FxSaveArea); 1305 } 1306 1307 /* Now save the FN state wherever it was requested */ 1308 if (SaveArea) Ke386FnSave(SaveArea); 1309 1310 /* Clear NPX thread */ 1311 KeGetCurrentPrcb()->NpxThread = NULL; 1312 1313 /* Add the CR0 from the NPX frame */ 1314 Cr0 |= NPX_STATE_NOT_LOADED; 1315 Cr0 |= FxSaveArea->Cr0NpxState; 1316 __writecr0(Cr0); 1317 1318 /* Restore interrupt state */ 1319 __writeeflags(EFlags); 1320 } 1321 1322 /* PUBLIC FUNCTIONS **********************************************************/ 1323 1324 /* 1325 * @implemented 1326 */ 1327 VOID 1328 NTAPI 1329 KiCoprocessorError(VOID) 1330 { 1331 PFX_SAVE_AREA NpxArea; 1332 1333 /* Get the FPU area */ 1334 NpxArea = KiGetThreadNpxArea(KeGetCurrentThread()); 1335 1336 /* Set CR0_TS */ 1337 NpxArea->Cr0NpxState = CR0_TS; 1338 __writecr0(__readcr0() | CR0_TS); 1339 } 1340 1341 /** 1342 * @brief 1343 * Saves the current floating point unit state 1344 * context of the current calling thread. 1345 * 1346 * @param[out] Save 1347 * The saved floating point context given to the 1348 * caller at the end of function's operations. 1349 * The structure whose data contents are opaque 1350 * to the calling thread. 1351 * 1352 * @return 1353 * Returns STATUS_SUCCESS if the function has 1354 * successfully completed its operations. 1355 * STATUS_INSUFFICIENT_RESOURCES is returned 1356 * if the function couldn't allocate memory 1357 * for FPU state information. 1358 * 1359 * @remarks 1360 * The function performs a FPU state save 1361 * in two ways. A normal FPU save (FNSAVE) 1362 * is performed if the system doesn't have 1363 * SSE/SSE2, otherwise the function performs 1364 * a save of FPU, MMX and SSE states save (FXSAVE). 1365 */ 1366 #if defined(__clang__) 1367 __attribute__((__target__("sse"))) 1368 #endif 1369 NTSTATUS 1370 NTAPI 1371 KeSaveFloatingPointState( 1372 _Out_ PKFLOATING_SAVE Save) 1373 { 1374 PFLOATING_SAVE_CONTEXT FsContext; 1375 PFX_SAVE_AREA FxSaveAreaFrame; 1376 PKPRCB CurrentPrcb; 1377 1378 /* Sanity checks */ 1379 ASSERT(Save); 1380 ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); 1381 ASSERT(KeI386NpxPresent); 1382 1383 /* Initialize the floating point context */ 1384 FsContext = ExAllocatePoolWithTag(NonPagedPool, 1385 sizeof(FLOATING_SAVE_CONTEXT), 1386 TAG_FLOATING_POINT_CONTEXT); 1387 if (!FsContext) 1388 { 1389 /* Bail out if we failed */ 1390 return STATUS_INSUFFICIENT_RESOURCES; 1391 } 1392 1393 /* 1394 * Allocate some memory pool for the buffer. The size 1395 * of this allocated buffer is the FX area plus the 1396 * alignment requirement needed for FXSAVE as a 16-byte 1397 * aligned pointer is compulsory in order to save the 1398 * FPU state. 1399 */ 1400 FsContext->Buffer = ExAllocatePoolWithTag(NonPagedPool, 1401 sizeof(FX_SAVE_AREA) + FXSAVE_ALIGN, 1402 TAG_FLOATING_POINT_FX); 1403 if (!FsContext->Buffer) 1404 { 1405 /* Bail out if we failed */ 1406 ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT); 1407 return STATUS_INSUFFICIENT_RESOURCES; 1408 } 1409 1410 /* 1411 * Now cache the allocated buffer into the save area 1412 * and align the said area to a 16-byte boundary. Why 1413 * do we have to do this is because of ExAllocate function. 1414 * We gave the necessary alignment requirement in the pool 1415 * allocation size although the function will always return 1416 * a 8-byte aligned pointer. Aligning the given pointer directly 1417 * can cause issues when freeing it from memory afterwards. With 1418 * that said, we have to cache the buffer to the area so that we 1419 * do not touch or mess the allocated buffer any further. 1420 */ 1421 FsContext->PfxSaveArea = ALIGN_UP_POINTER_BY(FsContext->Buffer, 16); 1422 1423 /* Disable interrupts and get the current processor control region */ 1424 _disable(); 1425 CurrentPrcb = KeGetCurrentPrcb(); 1426 1427 /* Store the current thread to context */ 1428 FsContext->CurrentThread = KeGetCurrentThread(); 1429 1430 /* 1431 * Save the previous NPX thread state registers (aka Numeric 1432 * Processor eXtension) into the current context so that 1433 * we are informing the scheduler the current FPU state 1434 * belongs to this thread. 1435 */ 1436 if (FsContext->CurrentThread != CurrentPrcb->NpxThread) 1437 { 1438 if ((CurrentPrcb->NpxThread != NULL) && 1439 (CurrentPrcb->NpxThread->NpxState == NPX_STATE_LOADED)) 1440 { 1441 /* Get the FX frame */ 1442 FxSaveAreaFrame = KiGetThreadNpxArea(CurrentPrcb->NpxThread); 1443 1444 /* Save the FPU state */ 1445 Ke386SaveFpuState(FxSaveAreaFrame); 1446 1447 /* NPX thread has lost its state */ 1448 CurrentPrcb->NpxThread->NpxState = NPX_STATE_NOT_LOADED; 1449 FxSaveAreaFrame->NpxSavedCpu = 0; 1450 } 1451 1452 /* The new NPX thread is the current thread */ 1453 CurrentPrcb->NpxThread = FsContext->CurrentThread; 1454 } 1455 1456 /* Perform the save */ 1457 Ke386SaveFpuState(FsContext->PfxSaveArea); 1458 1459 /* Store the NPX IRQL */ 1460 FsContext->OldNpxIrql = FsContext->CurrentThread->Header.NpxIrql; 1461 1462 /* Set the current IRQL to NPX */ 1463 FsContext->CurrentThread->Header.NpxIrql = KeGetCurrentIrql(); 1464 1465 /* Initialize the FPU */ 1466 Ke386FnInit(); 1467 1468 /* Enable interrupts back */ 1469 _enable(); 1470 1471 /* Give the saved FPU context to the caller */ 1472 *((PVOID *) Save) = FsContext; 1473 return STATUS_SUCCESS; 1474 } 1475 1476 /** 1477 * @brief 1478 * Restores the original FPU state context that has 1479 * been saved by a API call of KeSaveFloatingPointState. 1480 * Callers are expected to restore the floating point 1481 * state by calling this function when they've finished 1482 * doing FPU operations. 1483 * 1484 * @param[in] Save 1485 * The saved floating point context that is to be given 1486 * to the function to restore the FPU state. 1487 * 1488 * @return 1489 * Returns STATUS_SUCCESS indicating the function 1490 * has fully completed its operations. 1491 */ 1492 #if defined(__clang__) 1493 __attribute__((__target__("sse"))) 1494 #endif 1495 NTSTATUS 1496 NTAPI 1497 KeRestoreFloatingPointState( 1498 _In_ PKFLOATING_SAVE Save) 1499 { 1500 PFLOATING_SAVE_CONTEXT FsContext; 1501 1502 /* Sanity checks */ 1503 ASSERT(Save); 1504 ASSERT(KeGetCurrentIrql() <= DISPATCH_LEVEL); 1505 ASSERT(KeI386NpxPresent); 1506 1507 /* Cache the saved FS context */ 1508 FsContext = *((PVOID *) Save); 1509 1510 /* 1511 * We have to restore the regular saved FPU 1512 * state. For this we must first do some 1513 * validation checks so that we are sure 1514 * ourselves the state context is saved 1515 * properly. Check if we are in the same 1516 * calling thread. 1517 */ 1518 if (FsContext->CurrentThread != KeGetCurrentThread()) 1519 { 1520 /* 1521 * This isn't the thread that saved the 1522 * FPU state context, crash the system! 1523 */ 1524 KeBugCheckEx(INVALID_FLOATING_POINT_STATE, 1525 0x2, 1526 (ULONG_PTR)FsContext->CurrentThread, 1527 (ULONG_PTR)KeGetCurrentThread(), 1528 0); 1529 } 1530 1531 /* Are we under the same NPX interrupt level? */ 1532 if (FsContext->CurrentThread->Header.NpxIrql != KeGetCurrentIrql()) 1533 { 1534 /* The interrupt level has changed, crash the system! */ 1535 KeBugCheckEx(INVALID_FLOATING_POINT_STATE, 1536 0x1, 1537 (ULONG_PTR)FsContext->CurrentThread->Header.NpxIrql, 1538 (ULONG_PTR)KeGetCurrentIrql(), 1539 0); 1540 } 1541 1542 /* Disable interrupts */ 1543 _disable(); 1544 1545 /* 1546 * The saved FPU state context is valid, 1547 * it's time to restore the state. First, 1548 * clear FPU exceptions now. 1549 */ 1550 Ke386ClearFpExceptions(); 1551 1552 /* Restore the state */ 1553 Ke386RestoreFpuState(FsContext->PfxSaveArea); 1554 1555 /* Give the saved NPX IRQL back to the NPX thread */ 1556 FsContext->CurrentThread->Header.NpxIrql = FsContext->OldNpxIrql; 1557 1558 /* Enable interrupts back */ 1559 _enable(); 1560 1561 /* We're done, free the allocated area and context */ 1562 ExFreePoolWithTag(FsContext->Buffer, TAG_FLOATING_POINT_FX); 1563 ExFreePoolWithTag(FsContext, TAG_FLOATING_POINT_CONTEXT); 1564 1565 return STATUS_SUCCESS; 1566 } 1567 1568 /* 1569 * @implemented 1570 */ 1571 ULONG 1572 NTAPI 1573 KeGetRecommendedSharedDataAlignment(VOID) 1574 { 1575 /* Return the global variable */ 1576 return KeLargestCacheLine; 1577 } 1578 1579 VOID 1580 NTAPI 1581 KiFlushTargetEntireTb(IN PKIPI_CONTEXT PacketContext, 1582 IN PVOID Ignored1, 1583 IN PVOID Ignored2, 1584 IN PVOID Ignored3) 1585 { 1586 /* Signal this packet as done */ 1587 KiIpiSignalPacketDone(PacketContext); 1588 1589 /* Flush the TB for the Current CPU */ 1590 KeFlushCurrentTb(); 1591 } 1592 1593 /* 1594 * @implemented 1595 */ 1596 VOID 1597 NTAPI 1598 KeFlushEntireTb(IN BOOLEAN Invalid, 1599 IN BOOLEAN AllProcessors) 1600 { 1601 KIRQL OldIrql; 1602 #ifdef CONFIG_SMP 1603 KAFFINITY TargetAffinity; 1604 PKPRCB Prcb = KeGetCurrentPrcb(); 1605 #endif 1606 1607 /* Raise the IRQL for the TB Flush */ 1608 OldIrql = KeRaiseIrqlToSynchLevel(); 1609 1610 #ifdef CONFIG_SMP 1611 /* FIXME: Use KiTbFlushTimeStamp to synchronize TB flush */ 1612 1613 /* Get the current processor affinity, and exclude ourselves */ 1614 TargetAffinity = KeActiveProcessors; 1615 TargetAffinity &= ~Prcb->SetMember; 1616 1617 /* Make sure this is MP */ 1618 if (TargetAffinity) 1619 { 1620 /* Send an IPI TB flush to the other processors */ 1621 KiIpiSendPacket(TargetAffinity, 1622 KiFlushTargetEntireTb, 1623 NULL, 1624 0, 1625 NULL); 1626 } 1627 #endif 1628 1629 /* Flush the TB for the Current CPU, and update the flush stamp */ 1630 KeFlushCurrentTb(); 1631 1632 #ifdef CONFIG_SMP 1633 /* If this is MP, wait for the other processors to finish */ 1634 if (TargetAffinity) 1635 { 1636 /* Sanity check */ 1637 ASSERT(Prcb == KeGetCurrentPrcb()); 1638 1639 /* FIXME: TODO */ 1640 ASSERTMSG("Not yet implemented\n", FALSE); 1641 } 1642 #endif 1643 1644 /* Update the flush stamp and return to original IRQL */ 1645 InterlockedExchangeAdd(&KiTbFlushTimeStamp, 1); 1646 KeLowerIrql(OldIrql); 1647 } 1648 1649 /* 1650 * @implemented 1651 */ 1652 VOID 1653 NTAPI 1654 KeSetDmaIoCoherency(IN ULONG Coherency) 1655 { 1656 /* Save the coherency globally */ 1657 KiDmaIoCoherency = Coherency; 1658 } 1659 1660 /* 1661 * @implemented 1662 */ 1663 VOID 1664 __cdecl 1665 KeSaveStateForHibernate(IN PKPROCESSOR_STATE State) 1666 { 1667 /* Capture the context */ 1668 RtlCaptureContext(&State->ContextFrame); 1669 1670 /* Capture the control state */ 1671 KiSaveProcessorControlState(State); 1672 } 1673