1 /////////////////////////////////////////////////////////////////////////
2 // $Id: paging.cc 14328 2021-07-27 19:18:34Z vruppert $
3 /////////////////////////////////////////////////////////////////////////
4 //
5 // Copyright (C) 2001-2021 The Bochs Project
6 //
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Lesser General Public
9 // License as published by the Free Software Foundation; either
10 // version 2 of the License, or (at your option) any later version.
11 //
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Lesser General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public
18 // License along with this library; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA B 02110-1301 USA
20 /////////////////////////////////////////////////////////////////////////
21
22 #define NEED_CPU_REG_SHORTCUTS 1
23 #include "bochs.h"
24 #include "cpu.h"
25 #include "msr.h"
26 #define LOG_THIS BX_CPU_THIS_PTR
27
28 #include "memory/memory-bochs.h"
29 #include "pc_system.h"
30
31 // X86 Registers Which Affect Paging:
32 // ==================================
33 //
34 // CR0:
35 // bit 31: PG, Paging (386+)
36 // bit 16: WP, Write Protect (486+)
37 // 0: allow supervisor level writes into user level RO pages
38 // 1: inhibit supervisor level writes into user level RO pages
39 //
40 // CR3:
41 // bit 31..12: PDBR, Page Directory Base Register (386+)
42 // bit 4: PCD, Page level Cache Disable (486+)
43 // Controls caching of current page directory. Affects only the processor's
44 // internal caches (L1 and L2).
45 // This flag ignored if paging disabled (PG=0) or cache disabled (CD=1).
46 // Values:
47 // 0: Page Directory can be cached
48 // 1: Page Directory not cached
49 // bit 3: PWT, Page level Writes Transparent (486+)
50 // Controls write-through or write-back caching policy of current page
51 // directory. Affects only the processor's internal caches (L1 and L2).
52 // This flag ignored if paging disabled (PG=0) or cache disabled (CD=1).
53 // Values:
54 // 0: write-back caching enabled
55 // 1: write-through caching enabled
56 //
57 // CR4:
58 // bit 4: PSE, Page Size Extension (Pentium+)
59 // 0: 4KByte pages (typical)
60 // 1: 4MByte or 2MByte pages
61 // bit 5: PAE, Physical Address Extension (Pentium Pro+)
62 // 0: 32bit physical addresses
63 // 1: 36bit physical addresses
64 // bit 7: PGE, Page Global Enable (Pentium Pro+)
65 // The global page feature allows frequently used or shared pages
66 // to be marked as global (PDE or PTE bit 8). Global pages are
67 // not flushed from TLB on a task switch or write to CR3.
68 // Values:
69 // 0: disables global page feature
70 // 1: enables global page feature
71 //
72 // page size extention and physical address size extention matrix (legacy mode)
73 // ==============================================================================
74 // CR0.PG CR4.PAE CR4.PSE PDPE.PS PDE.PS | page size physical address size
75 // ==============================================================================
76 // 0 X X R X | -- paging disabled
77 // 1 0 0 R X | 4K 32bits
78 // 1 0 1 R 0 | 4K 32bits
79 // 1 0 1 R 1 | 4M 32bits
80 // 1 1 X R 0 | 4K 36bits
81 // 1 1 X R 1 | 2M 36bits
82
83 // page size extention and physical address size extention matrix (long mode)
84 // ==============================================================================
85 // CR0.PG CR4.PAE CR4.PSE PDPE.PS PDE.PS | page size physical address size
86 // ==============================================================================
87 // 1 1 X 0 0 | 4K 52bits
88 // 1 1 X 0 1 | 2M 52bits
89 // 1 1 X 1 - | 1G 52bits
90
91
92 // Page Directory/Table Entry Fields Defined:
93 // ==========================================
94 // NX: No Execute
95 // This bit controls the ability to execute code from all physical
96 // pages mapped by the table entry.
97 // 0: Code can be executed from the mapped physical pages
98 // 1: Code cannot be executed
99 // The NX bit can only be set when the no-execute page-protection
100 // feature is enabled by setting EFER.NXE=1, If EFER.NXE=0, the
101 // NX bit is treated as reserved. In this case, #PF occurs if the
102 // NX bit is not cleared to zero.
103 //
104 // G: Global flag
105 // Indiciates a global page when set. When a page is marked
106 // global and the PGE flag in CR4 is set, the page table or
107 // directory entry for the page is not invalidated in the TLB
108 // when CR3 is loaded or a task switch occurs. Only software
109 // clears and sets this flag. For page directory entries that
110 // point to page tables, this flag is ignored and the global
111 // characteristics of a page are set in the page table entries.
112 //
113 // PS: Page Size flag
114 // Only used in page directory entries. When PS=0, the page
115 // size is 4KBytes and the page directory entry points to a
116 // page table. When PS=1, the page size is 4MBytes for
117 // normal 32-bit addressing and 2MBytes if extended physical
118 // addressing.
119 //
120 // PAT: Page-Attribute Table
121 // This bit is only present in the lowest level of the page
122 // translation hierarchy. The PAT bit is the high-order bit
123 // of a 3-bit index into the PAT register. The other two
124 // bits involved in forming the index are the PCD and PWT
125 // bits.
126 //
127 // D: Dirty bit:
128 // Processor sets the Dirty bit in the 2nd-level page table before a
129 // write operation to an address mapped by that page table entry.
130 // Dirty bit in directory entries is undefined.
131 //
132 // A: Accessed bit:
133 // Processor sets the Accessed bits in both levels of page tables before
134 // a read/write operation to a page.
135 //
136 // PCD: Page level Cache Disable
137 // Controls caching of individual pages or page tables.
138 // This allows a per-page based mechanism to disable caching, for
139 // those pages which contained memory mapped IO, or otherwise
140 // should not be cached. Processor ignores this flag if paging
141 // is not used (CR0.PG=0) or the cache disable bit is set (CR0.CD=1).
142 // Values:
143 // 0: page or page table can be cached
144 // 1: page or page table is not cached (prevented)
145 //
146 // PWT: Page level Write Through
147 // Controls the write-through or write-back caching policy of individual
148 // pages or page tables. Processor ignores this flag if paging
149 // is not used (CR0.PG=0) or the cache disable bit is set (CR0.CD=1).
150 // Values:
151 // 0: write-back caching
152 // 1: write-through caching
153 //
154 // U/S: User/Supervisor level
155 // 0: Supervisor level - for the OS, drivers, etc.
156 // 1: User level - application code and data
157 //
158 // R/W: Read/Write access
159 // 0: read-only access
160 // 1: read/write access
161 //
162 // P: Present
163 // 0: Not present
164 // 1: Present
165 // ==========================================
166
167 // Combined page directory/page table protection:
168 // ==============================================
169 // There is one column for the combined effect on a 386
170 // and one column for the combined effect on a 486+ CPU.
171 // The 386 CPU behavior is not supported by Bochs.
172 //
173 // +----------------+-----------------+----------------+----------------+
174 // | Page Directory| Page Table | Combined 386 | Combined 486+ |
175 // |Privilege Type | Privilege Type | Privilege Type| Privilege Type|
176 // |----------------+-----------------+----------------+----------------|
177 // |User R | User R | User R | User R |
178 // |User R | User RW | User R | User R |
179 // |User RW | User R | User R | User R |
180 // |User RW | User RW | User RW | User RW |
181 // |User R | Supervisor R | User R | Supervisor RW |
182 // |User R | Supervisor RW | User R | Supervisor RW |
183 // |User RW | Supervisor R | User R | Supervisor RW |
184 // |User RW | Supervisor RW | User RW | Supervisor RW |
185 // |Supervisor R | User R | User R | Supervisor RW |
186 // |Supervisor R | User RW | User R | Supervisor RW |
187 // |Supervisor RW | User R | User R | Supervisor RW |
188 // |Supervisor RW | User RW | User RW | Supervisor RW |
189 // |Supervisor R | Supervisor R | Supervisor RW | Supervisor RW |
190 // |Supervisor R | Supervisor RW | Supervisor RW | Supervisor RW |
191 // |Supervisor RW | Supervisor R | Supervisor RW | Supervisor RW |
192 // |Supervisor RW | Supervisor RW | Supervisor RW | Supervisor RW |
193 // +----------------+-----------------+----------------+----------------+
194
195 // Page Fault Error Code Format:
196 // =============================
197 //
198 // bits 31..4: Reserved
199 // bit 3: RSVD (Pentium Pro+)
200 // 0: fault caused by reserved bits set to 1 in a page directory
201 // when the PSE or PAE flags in CR4 are set to 1
202 // 1: fault was not caused by reserved bit violation
203 // bit 2: U/S (386+)
204 // 0: fault originated when in supervior mode
205 // 1: fault originated when in user mode
206 // bit 1: R/W (386+)
207 // 0: access causing the fault was a read
208 // 1: access causing the fault was a write
209 // bit 0: P (386+)
210 // 0: fault caused by a nonpresent page
211 // 1: fault caused by a page level protection violation
212
213 // Some paging related notes:
214 // ==========================
215 //
216 // - When the processor is running in supervisor level, all pages are both
217 // readable and writable (write-protect ignored). When running at user
218 // level, only pages which belong to the user level are accessible;
219 // read/write & read-only are readable, read/write are writable.
220 //
221 // - If the Present bit is 0 in either level of page table, an
222 // access which uses these entries will generate a page fault.
223 //
224 // - (A)ccess bit is used to report read or write access to a page
225 // or 2nd level page table.
226 //
227 // - (D)irty bit is used to report write access to a page.
228 //
229 // - Processor running at CPL=0,1,2 maps to U/S=0
230 // Processor running at CPL=3 maps to U/S=1
231
232 // bit [11] of the TLB lpf used for TLB_NoHostPtr valid indication
233 #define TLB_LPFOf(laddr) AlignedAccessLPFOf(laddr, 0x7ff)
234
235 #if BX_CPU_LEVEL >= 4
236 # define BX_PRIV_CHECK_SIZE 32
237 #else
238 # define BX_PRIV_CHECK_SIZE 16
239 #endif
240
241 // The 'priv_check' array is used to decide if the current access
242 // has the proper paging permissions. An index is formed, based
243 // on parameters such as the access type and level, the write protect
244 // flag and values cached in the TLB. The format of the index into this
245 // array is:
246 //
247 // |4 |3 |2 |1 |0 |
248 // |wp|us|us|rw|rw|
249 // | | | | |
250 // | | | | +---> r/w of current access
251 // | | +--+------> u/s,r/w combined of page dir & table (cached)
252 // | +------------> u/s of current access
253 // +---------------> Current CR0.WP value
254 //
255 // CR0.WP = 0 CR0.WP = 1
256 // -----------------------------------------------------------------------------------------
257 // 0 0 0 0 | sys read from supervisor page | Allowed | Allowed
258 // 0 0 0 1 | sys write to read only supervisor page | Allowed | Not Allowed
259 // 0 0 1 0 | sys read from supervisor page | Allowed | Allowed
260 // 0 0 1 1 | sys write to supervisor page | Allowed | Allowed
261 // 0 1 0 0 | sys read from read only user page | Allowed | Allowed
262 // 0 1 0 1 | sys write to read only user page | Allowed | Not Allowed
263 // 0 1 1 0 | sys read from user page | Allowed | Allowed
264 // 0 1 1 1 | sys write to user page | Allowed | Allowed
265 // 1 0 0 0 | user read from read only supervisor page | Not Allowed | Not Allowed
266 // 1 0 0 1 | user write to read only supervisor page | Not Allowed | Not Allowed
267 // 1 0 1 0 | user read from supervisor page | Not Allowed | Not Allowed
268 // 1 0 1 1 | user write to supervisor page | Not Allowed | Not Allowed
269 // 1 1 0 0 | user read from read only user page | Allowed | Allowed
270 // 1 1 0 1 | user write to read only user page | Not Allowed | Not Allowed
271 // 1 1 1 0 | user read from user page | Allowed | Allowed
272 // 1 1 1 1 | user write to user page | Allowed | Allowed
273 //
274
275 /* 0xff0bbb0b */
276 static const Bit8u priv_check[BX_PRIV_CHECK_SIZE] =
277 {
278 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1,
279 #if BX_CPU_LEVEL >= 4
280 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 1, 1
281 #endif
282 };
283
284 // The 'priv_check' array for shadow stack accesses
285 //
286 // |3 |2 |1 |0 |
287 // |us|us|rw|rw|
288 // | | | |
289 // | | | +---> r/w of current access
290 // | +--+------> u/s,r/w combined of page dir & table (cached)
291 // +------------> u/s of current access
292 //
293 // -------------------------------------------------------------------
294 // 0 0 0 0 | sys read from supervisor page | Allowed
295 // 0 0 0 1 | sys write to read only supervisor page | Allowed : shadow stack page looks like read only page
296 // 0 0 1 0 | sys read from supervisor page | Allowed
297 // 0 0 1 1 | sys write to supervisor page | Allowed
298 // 0 1 0 0 | sys read from read only user page | Not Allowed : supervisor-mode shadow-stack access is not allowed to a user-mode page
299 // 0 1 0 1 | sys write to read only user page | Not Allowed : supervisor-mode shadow-stack access is not allowed to a user-mode page
300 // 0 1 1 0 | sys read from user page | Not Allowed : supervisor-mode shadow-stack access is not allowed to a user-mode page
301 // 0 1 1 1 | sys write to user page | Not Allowed : supervisor-mode shadow-stack access is not allowed to a user-mode page
302 // 1 0 0 0 | user read from read only supervisor page | Not Allowed : user-mode shadow-stack access is not allowed to a supervisor-mode page
303 // 1 0 0 1 | user write to read only supervisor page | Not Allowed : user-mode shadow-stack access is not allowed to a supervisor-mode page
304 // 1 0 1 0 | user read from supervisor page | Not Allowed : user-mode shadow-stack access is not allowed to a supervisor-mode page
305 // 1 0 1 1 | user write to supervisor page | Not Allowed : user-mode shadow-stack access is not allowed to a supervisor-mode page
306 // 1 1 0 0 | user read from read only user page | Allowed
307 // 1 1 0 1 | user write to read only user page | Allowed : shadow stack page looks like read only page
308 // 1 1 1 0 | user read from user page | Allowed
309 // 1 1 1 1 | user write to user page | Allowed
310 //
311
312 const Bit64u BX_PAGING_PHY_ADDRESS_RESERVED_BITS = BX_PHY_ADDRESS_RESERVED_BITS & BX_CONST64(0xfffffffffffff);
313
314 const Bit64u PAGE_DIRECTORY_NX_BIT = BX_CONST64(0x8000000000000000);
315
316 const Bit64u BX_CR3_PAGING_MASK = BX_CONST64(0x000ffffffffff000);
317
318 // Each entry in the TLB cache has 3 entries:
319 //
320 // lpf: Linear Page Frame (page aligned linear address of page)
321 // bits 32..12 Linear page frame
322 // bit 11 0: TLB HostPtr access allowed, 1: not allowed
323 // bit 10...0 Invalidate index
324 //
325 // ppf: Physical Page Frame (page aligned phy address of page)
326 //
327 // hostPageAddr:
328 // Host Page Frame address used for direct access to
329 // the mem.vector[] space allocated for the guest physical
330 // memory. If this is zero, it means that a pointer
331 // to the host space could not be generated, likely because
332 // that page of memory is not standard memory (it might
333 // be memory mapped IO, ROM, etc).
334 //
335 // accessBits:
336 //
337 // bit 31: Page is a global page.
338 //
339 // The following bits are used for a very efficient permissions
340 // check. The goal is to be able, using only the current privilege
341 // level and access type, to determine if the page tables allow the
342 // access to occur or at least should rewalk the page tables. On
343 // the first read access, permissions are set to only read, so a
344 // rewalk is necessary when a subsequent write fails the tests.
345 // This allows for the dirty bit to be set properly, but for the
346 // test to be efficient. Note that the CR0.WP flag is not present.
347 // The values in the following flags is based on the current CR0.WP
348 // value, necessitating a TLB flush when CR0.WP changes.
349 //
350 // The test bit:
351 // OK = 1 << ((S<<2) | (W<<1) | U)
352 //
353 // where S:1=Shadow Stack (CET)
354 // W:1=Write, 0=Read;
355 // U:1=CPL3, 0=CPL0-2
356 //
357 // Thus for reads, it is:
358 // OK = 0x01 << ( U )
359 // for writes:
360 // OK = 0x04 << ( U )
361 // for shadow stack reads:
362 // OK = 0x10 << ( U )
363 // for shadow stack writes:
364 // OK = 0x40 << ( U )
365 //
366 // bit 3: Write from User privilege is OK
367 // bit 2: Write from System privilege is OK
368 // bit 1: Read from User privilege is OK
369 // bit 0: Read from System privilege is OK
370 //
371 // Note, that the TLB should have TLB_NoHostPtr bit set in the lpf when
372 // direct access through host pointer is NOT allowed for the page.
373 // A memory operation asking for a direct access through host pointer
374 // will not set TLB_NoHostPtr bit in its lpf and thus get TLB miss
375 // result when the direct access is not allowed.
376 //
377
378 const Bit32u TLB_NoHostPtr = 0x800; /* set this bit when direct access is NOT allowed */
379
380 #include "cpustats.h"
381
382 // ==============================================================
383
TLB_flush(void)384 void BX_CPU_C::TLB_flush(void)
385 {
386 INC_TLBFLUSH_STAT(tlbGlobalFlushes);
387
388 invalidate_prefetch_q();
389 invalidate_stack_cache();
390
391 BX_CPU_THIS_PTR DTLB.flush();
392 BX_CPU_THIS_PTR ITLB.flush();
393
394 #if BX_SUPPORT_MONITOR_MWAIT
395 // invalidating of the TLB might change translation for monitored page
396 // and cause subsequent MWAIT instruction to wait forever
397 BX_CPU_THIS_PTR monitor.reset_monitor();
398 #endif
399
400 // break all links bewteen traces
401 BX_CPU_THIS_PTR iCache.breakLinks();
402 }
403
404 #if BX_CPU_LEVEL >= 6
TLB_flushNonGlobal(void)405 void BX_CPU_C::TLB_flushNonGlobal(void)
406 {
407 INC_TLBFLUSH_STAT(tlbNonGlobalFlushes);
408
409 invalidate_prefetch_q();
410 invalidate_stack_cache();
411
412 BX_CPU_THIS_PTR DTLB.flushNonGlobal();
413 BX_CPU_THIS_PTR ITLB.flushNonGlobal();
414
415 #if BX_SUPPORT_MONITOR_MWAIT
416 // invalidating of the TLB might change translation for monitored page
417 // and cause subsequent MWAIT instruction to wait forever
418 BX_CPU_THIS_PTR monitor.reset_monitor();
419 #endif
420
421 // break all links bewteen traces
422 BX_CPU_THIS_PTR iCache.breakLinks();
423 }
424 #endif
425
TLB_invlpg(bx_address laddr)426 void BX_CPU_C::TLB_invlpg(bx_address laddr)
427 {
428 invalidate_prefetch_q();
429 invalidate_stack_cache();
430
431 BX_DEBUG(("TLB_invlpg(0x" FMT_ADDRX "): invalidate TLB entry", laddr));
432 BX_CPU_THIS_PTR DTLB.invlpg(laddr);
433 BX_CPU_THIS_PTR ITLB.invlpg(laddr);
434
435 #if BX_SUPPORT_MONITOR_MWAIT
436 // invalidating of the TLB entry might change translation for monitored
437 // page and cause subsequent MWAIT instruction to wait forever
438 BX_CPU_THIS_PTR monitor.reset_monitor();
439 #endif
440
441 // break all links bewteen traces
442 BX_CPU_THIS_PTR iCache.breakLinks();
443 }
444
INVLPG(bxInstruction_c * i)445 void BX_CPP_AttrRegparmN(1) BX_CPU_C::INVLPG(bxInstruction_c* i)
446 {
447 // CPL is always 0 in real mode
448 if (/* !real_mode() && */ CPL!=0) {
449 BX_ERROR(("%s: priveledge check failed, generate #GP(0)", i->getIaOpcodeNameShort()));
450 exception(BX_GP_EXCEPTION, 0);
451 }
452
453 bx_address eaddr = BX_CPU_RESOLVE_ADDR(i);
454 bx_address laddr = get_laddr(i->seg(), eaddr);
455
456 #if BX_SUPPORT_VMX
457 if (BX_CPU_THIS_PTR in_vmx_guest) {
458 if (VMEXIT(VMX_VM_EXEC_CTRL2_INVLPG_VMEXIT)) VMexit(VMX_VMEXIT_INVLPG, laddr);
459 }
460 #endif
461
462 #if BX_SUPPORT_SVM
463 if (BX_CPU_THIS_PTR in_svm_guest) {
464 if (SVM_INTERCEPT(SVM_INTERCEPT0_INVLPG))
465 Svm_Vmexit(SVM_VMEXIT_INVLPG, BX_SUPPORT_SVM_EXTENSION(BX_CPUID_SVM_DECODE_ASSIST) ? laddr : 0);
466 }
467 #endif
468
469 #if BX_SUPPORT_X86_64
470 if (IsCanonical(laddr))
471 #endif
472 {
473 BX_INSTR_TLB_CNTRL(BX_CPU_ID, BX_INSTR_INVLPG, laddr);
474 TLB_invlpg(laddr);
475 }
476
477 BX_NEXT_TRACE(i);
478 }
479
480 // error checking order - page not present, reserved bits, protection
481 enum {
482 ERROR_NOT_PRESENT = 0x00,
483 ERROR_PROTECTION = 0x01,
484 ERROR_WRITE_ACCESS = 0x02,
485 ERROR_USER_ACCESS = 0x04,
486 ERROR_RESERVED = 0x08,
487 ERROR_CODE_ACCESS = 0x10,
488 ERROR_PKEY = 0x20,
489 ERROR_SHADOW_STACK = 0x40,
490 };
491
page_fault(unsigned fault,bx_address laddr,unsigned user,unsigned rw)492 void BX_CPU_C::page_fault(unsigned fault, bx_address laddr, unsigned user, unsigned rw)
493 {
494 unsigned isWrite = rw & 1;
495
496 Bit32u error_code = fault | (user << 2) | (isWrite << 1);
497 #if BX_CPU_LEVEL >= 6
498 if (rw == BX_EXECUTE) {
499 if (BX_CPU_THIS_PTR cr4.get_SMEP())
500 error_code |= ERROR_CODE_ACCESS; // I/D = 1
501 if (BX_CPU_THIS_PTR cr4.get_PAE() && BX_CPU_THIS_PTR efer.get_NXE())
502 error_code |= ERROR_CODE_ACCESS;
503 }
504 #endif
505 #if BX_SUPPORT_CET
506 bool is_shadow_stack = (rw & 4) != 0;
507 if (is_shadow_stack)
508 error_code |= ERROR_SHADOW_STACK;
509 #endif
510
511 #if BX_SUPPORT_SVM
512 SvmInterceptException(BX_HARDWARE_EXCEPTION, BX_PF_EXCEPTION, error_code, 1, laddr); // before the CR2 was modified
513 #endif
514
515 #if BX_SUPPORT_VMX
516 VMexit_Event(BX_HARDWARE_EXCEPTION, BX_PF_EXCEPTION, error_code, 1, laddr); // before the CR2 was modified
517 #endif
518
519 BX_CPU_THIS_PTR cr2 = laddr;
520
521 #if BX_SUPPORT_X86_64
522 BX_DEBUG(("page fault for address %08x%08x @ %08x%08x",
523 GET32H(laddr), GET32L(laddr), GET32H(RIP), GET32L(RIP)));
524 #else
525 BX_DEBUG(("page fault for address %08x @ %08x", laddr, EIP));
526 #endif
527
528 exception(BX_PF_EXCEPTION, error_code);
529 }
530
531 enum {
532 BX_LEVEL_PML4 = 3,
533 BX_LEVEL_PDPTE = 2,
534 BX_LEVEL_PDE = 1,
535 BX_LEVEL_PTE = 0
536 };
537
538 static const char *bx_paging_level[4] = { "PTE", "PDE", "PDPE", "PML4" }; // keep it 4 letters
539
540 // combined_access legend:
541 // -----------------------
542 // 00 |
543 // 01 | R/W
544 // 02 | U/S
545 // 03 |
546 // 07 | Shadow Stack
547 // 08 | Global
548 // 11-09 | memtype (3 bits)
549
550 enum {
551 BX_COMBINED_ACCESS_WRITE = 0x2,
552 BX_COMBINED_ACCESS_USER = 0x4,
553 BX_COMBINED_SHADOW_STACK = 0x80,
554 BX_COMBINED_GLOBAL_PAGE = 0x100,
555 };
556
557 #if BX_CPU_LEVEL >= 6
558
559 // Format of a Long Mode Non-Leaf Entry
560 // -----------------------------------------------------------
561 // 00 | Present (P)
562 // 01 | R/W
563 // 02 | U/S
564 // 03 | Page-Level Write-Through (PWT)
565 // 04 | Page-Level Cache-Disable (PCD)
566 // 05 | Accessed (A)
567 // 06 | (ignored)
568 // 07 | Page Size (PS), must be 0 if no Large Page on the level
569 // 11-08 | (ignored)
570 // PA-12 | Physical address of 4-KByte aligned page-directory-pointer table
571 // 51-PA | Reserved (must be zero)
572 // 62-52 | (ignored)
573 // 63 | Execute-Disable (XD) (if EFER.NXE=1, reserved otherwise)
574 // -----------------------------------------------------------
575
576 const Bit64u PAGING_PAE_RESERVED_BITS = BX_PAGING_PHY_ADDRESS_RESERVED_BITS;
577
578 // in legacy PAE mode bits [62:52] are reserved. bit 63 is NXE
579 const Bit64u PAGING_LEGACY_PAE_RESERVED_BITS = BX_PAGING_PHY_ADDRESS_RESERVED_BITS | BX_CONST64(0x7ff0000000000000);
580
581 // Format of a PDPTE that References a 1-GByte Page
582 // -----------------------------------------------------------
583 // 00 | Present (P)
584 // 01 | R/W
585 // 02 | U/S
586 // 03 | Page-Level Write-Through (PWT)
587 // 04 | Page-Level Cache-Disable (PCD)
588 // 05 | Accessed (A)
589 // 06 | (ignored)
590 // 07 | Page Size, must be 1 to indicate a 1-GByte Page
591 // 08 | Global (G) (if CR4.PGE=1, ignored otherwise)
592 // 11-09 | (ignored)
593 // 12 | PAT (if PAT is supported, reserved otherwise)
594 // 29-13 | Reserved (must be zero)
595 // PA-30 | Physical address of the 1-Gbyte Page
596 // 51-PA | Reserved (must be zero)
597 // 62-52 | (ignored)
598 // 63 | Execute-Disable (XD) (if EFER.NXE=1, reserved otherwise)
599 // -----------------------------------------------------------
600
601 const Bit64u PAGING_PAE_PDPTE1G_RESERVED_BITS = BX_PAGING_PHY_ADDRESS_RESERVED_BITS | BX_CONST64(0x3FFFE000);
602
603 // Format of a PAE PDE that Maps a 2-MByte Page
604 // -----------------------------------------------------------
605 // 00 | Present (P)
606 // 01 | R/W
607 // 02 | U/S
608 // 03 | Page-Level Write-Through (PWT)
609 // 04 | Page-Level Cache-Disable (PCD)
610 // 05 | Accessed (A)
611 // 06 | Dirty (D)
612 // 07 | Page Size (PS), must be 1 to indicate a 2-MByte Page
613 // 08 | Global (G) (if CR4.PGE=1, ignored otherwise)
614 // 11-09 | (ignored)
615 // 12 | PAT (if PAT is supported, reserved otherwise)
616 // 20-13 | Reserved (must be zero)
617 // PA-21 | Physical address of the 2-MByte page
618 // 51-PA | Reserved (must be zero)
619 // 62-52 | ignored in long mode, reserved (must be 0) in legacy PAE mode
620 // 63 | Execute-Disable (XD) (if EFER.NXE=1, reserved otherwise)
621 // -----------------------------------------------------------
622
623 const Bit64u PAGING_PAE_PDE2M_RESERVED_BITS = BX_PAGING_PHY_ADDRESS_RESERVED_BITS | BX_CONST64(0x001FE000);
624
625 // Format of a PAE PTE that Maps a 4-KByte Page
626 // -----------------------------------------------------------
627 // 00 | Present (P)
628 // 01 | R/W
629 // 02 | U/S
630 // 03 | Page-Level Write-Through (PWT)
631 // 04 | Page-Level Cache-Disable (PCD)
632 // 05 | Accessed (A)
633 // 06 | Dirty (D)
634 // 07 | PAT (if PAT is supported, reserved otherwise)
635 // 08 | Global (G) (if CR4.PGE=1, ignored otherwise)
636 // 11-09 | (ignored)
637 // PA-12 | Physical address of the 4-KByte page
638 // 51-PA | Reserved (must be zero)
639 // 62-52 | ignored in long mode, reserved (must be 0) in legacy PAE mode
640 // 63 | Execute-Disable (XD) (if EFER.NXE=1, reserved otherwise)
641 // -----------------------------------------------------------
642
check_entry_PAE(const char * s,Bit64u entry,Bit64u reserved,unsigned rw,bool * nx_fault)643 int BX_CPU_C::check_entry_PAE(const char *s, Bit64u entry, Bit64u reserved, unsigned rw, bool *nx_fault)
644 {
645 if (!(entry & 0x1)) {
646 BX_DEBUG(("PAE %s: entry not present", s));
647 return ERROR_NOT_PRESENT;
648 }
649
650 if (entry & reserved) {
651 BX_DEBUG(("PAE %s: reserved bit is set 0x" FMT_ADDRX64, s, entry));
652 return ERROR_RESERVED | ERROR_PROTECTION;
653 }
654
655 if (entry & PAGE_DIRECTORY_NX_BIT) {
656 if (rw == BX_EXECUTE) {
657 BX_DEBUG(("PAE %s: non-executable page fault occurred", s));
658 *nx_fault = true;
659 }
660 }
661
662 return -1;
663 }
664
665 #if BX_SUPPORT_MEMTYPE
calculate_pcd_pwt(Bit32u entry)666 BX_CPP_INLINE Bit32u calculate_pcd_pwt(Bit32u entry)
667 {
668 Bit32u pcd_pwt = (entry >> 3) & 0x3; // PCD, PWT are stored in bits 3 and 4
669 return pcd_pwt;
670 }
671
672 // extract PCD, PWT and PAT pat bits from page table entry
calculate_pat(Bit32u entry,Bit32u lpf_mask)673 BX_CPP_INLINE Bit32u calculate_pat(Bit32u entry, Bit32u lpf_mask)
674 {
675 Bit32u pcd_pwt = calculate_pcd_pwt(entry);
676 // PAT is stored in bit 12 for large pages and in bit 7 for small pages
677 Bit32u pat = ((lpf_mask < 0x1000) ? (entry >> 7) : (entry >> 12)) & 0x1;
678 return pcd_pwt | (pat << 2);
679 }
680 #endif
681
682 #if BX_SUPPORT_X86_64
683
684 // Translate a linear address to a physical address in long mode
translate_linear_long_mode(bx_address laddr,Bit32u & lpf_mask,Bit32u & pkey,unsigned user,unsigned rw)685 bx_phy_address BX_CPU_C::translate_linear_long_mode(bx_address laddr, Bit32u &lpf_mask, Bit32u &pkey, unsigned user, unsigned rw)
686 {
687 bx_phy_address ppf = BX_CPU_THIS_PTR cr3 & BX_CR3_PAGING_MASK;
688
689 bx_phy_address entry_addr[4];
690 Bit64u entry[4];
691 BxMemtype entry_memtype[4] = { 0 };
692
693 bool nx_fault = false;
694 int leaf;
695
696 Bit64u offset_mask = BX_CONST64(0x0000ffffffffffff);
697 lpf_mask = 0xfff;
698 Bit32u combined_access = (BX_COMBINED_ACCESS_WRITE | BX_COMBINED_ACCESS_USER);
699 Bit64u curr_entry = BX_CPU_THIS_PTR cr3;
700
701 Bit64u reserved = PAGING_PAE_RESERVED_BITS;
702 if (! BX_CPU_THIS_PTR efer.get_NXE())
703 reserved |= PAGE_DIRECTORY_NX_BIT;
704
705 for (leaf = BX_LEVEL_PML4;; --leaf) {
706 entry_addr[leaf] = ppf + ((laddr >> (9 + 9*leaf)) & 0xff8);
707 #if BX_SUPPORT_VMX >= 2
708 if (BX_CPU_THIS_PTR in_vmx_guest) {
709 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE))
710 entry_addr[leaf] = translate_guest_physical(entry_addr[leaf], laddr, 1, 1, BX_READ);
711 }
712 #endif
713 #if BX_SUPPORT_SVM
714 if (BX_CPU_THIS_PTR in_svm_guest && SVM_NESTED_PAGING_ENABLED) {
715 entry_addr[leaf] = nested_walk(entry_addr[leaf], BX_RW, 1);
716 }
717 #endif
718
719 #if BX_SUPPORT_MEMTYPE
720 entry_memtype[leaf] = resolve_memtype(memtype_by_mtrr(entry_addr[leaf]), memtype_by_pat(calculate_pcd_pwt((Bit32u) curr_entry)));
721 #endif
722 access_read_physical(entry_addr[leaf], 8, &entry[leaf]);
723 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, entry_memtype[leaf], BX_READ, (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
724
725 offset_mask >>= 9;
726
727 curr_entry = entry[leaf];
728 int fault = check_entry_PAE(bx_paging_level[leaf], curr_entry, reserved, rw, &nx_fault);
729 if (fault >= 0)
730 page_fault(fault, laddr, user, rw);
731
732 ppf = curr_entry & BX_CONST64(0x000ffffffffff000);
733
734 if (leaf == BX_LEVEL_PTE) break;
735
736 if (curr_entry & 0x80) {
737 if (leaf > (BX_LEVEL_PDE + !!is_cpu_extension_supported(BX_ISA_1G_PAGES))) {
738 BX_DEBUG(("long mode %s: PS bit set !", bx_paging_level[leaf]));
739 page_fault(ERROR_RESERVED | ERROR_PROTECTION, laddr, user, rw);
740 }
741
742 ppf &= BX_CONST64(0x000fffffffffe000);
743 if (ppf & offset_mask) {
744 BX_DEBUG(("long mode %s: reserved bit is set: 0x" FMT_ADDRX64, bx_paging_level[leaf], curr_entry));
745 page_fault(ERROR_RESERVED | ERROR_PROTECTION, laddr, user, rw);
746 }
747
748 lpf_mask = (Bit32u) offset_mask;
749 break;
750 }
751
752 combined_access &= curr_entry; // U/S and R/W
753 }
754
755 bool isWrite = (rw & 1); // write or r-m-w
756
757 #if BX_SUPPORT_PKEYS
758 if (rw != BX_EXECUTE) {
759 if (BX_CPU_THIS_PTR cr4.get_PKE()) {
760 pkey = (entry[leaf] >> 59) & 0xf;
761
762 // check of accessDisable bit set
763 if (user) {
764 if (BX_CPU_THIS_PTR pkru & (1<<(pkey*2))) {
765 BX_ERROR(("protection key access not allowed PKRU=%x pkey=%d", BX_CPU_THIS_PTR pkru, pkey));
766 page_fault(ERROR_PROTECTION | ERROR_PKEY, laddr, user, rw);
767 }
768 }
769
770 // check of writeDisable bit set
771 if (BX_CPU_THIS_PTR pkru & (1<<(pkey*2+1))) {
772 if (isWrite && (user || BX_CPU_THIS_PTR cr0.get_WP())) {
773 BX_ERROR(("protection key write not allowed PKRU=%x pkey=%d", BX_CPU_THIS_PTR pkru, pkey));
774 page_fault(ERROR_PROTECTION | ERROR_PKEY, laddr, user, rw);
775 }
776 }
777 }
778
779 if (BX_CPU_THIS_PTR cr4.get_PKS() && !user) {
780 pkey = (entry[leaf] >> 59) & 0xf;
781
782 // check of accessDisable bit set
783 if (BX_CPU_THIS_PTR pkrs & (1<<(pkey*2))) {
784 BX_ERROR(("protection key access not allowed PKRS=%x pkey=%d", BX_CPU_THIS_PTR pkrs, pkey));
785 page_fault(ERROR_PROTECTION | ERROR_PKEY, laddr, user, rw);
786 }
787
788 // check of writeDisable bit set
789 if (BX_CPU_THIS_PTR pkrs & (1<<(pkey*2+1))) {
790 if (isWrite && BX_CPU_THIS_PTR cr0.get_WP()) {
791 BX_ERROR(("protection key write not allowed PKRS=%x pkey=%d", BX_CPU_THIS_PTR pkrs, pkey));
792 page_fault(ERROR_PROTECTION | ERROR_PKEY, laddr, user, rw);
793 }
794 }
795 }
796 }
797 #endif
798
799 #if BX_SUPPORT_CET
800 bool shadow_stack = (rw & 4) != 0;
801 if (shadow_stack) {
802 // shadow stack pages:
803 // - R/W bit=1 in every paging structure entry except the leaf
804 // - R/W bit=0 and Dirty=1 for leaf entry
805 bool shadow_stack_page = ((combined_access & BX_COMBINED_ACCESS_WRITE) != 0) && ((entry[leaf] & 0x40) != 0) && ((entry[leaf] & 0x02) == 0);
806 if (!shadow_stack_page) {
807 BX_DEBUG(("shadow stack access to not shadow stack page CA=%x entry=%lx\n", combined_access, Bit32u(entry[leaf] & 0xfff)));
808 page_fault(ERROR_PROTECTION, laddr, user, rw);
809 }
810
811 combined_access &= entry[leaf]; // U/S and R/W
812
813 // must be to shadow stack page, check that U/S match
814 if ((combined_access & BX_COMBINED_ACCESS_USER) ^ (user << 2)) {
815 BX_DEBUG(("shadow stack U/S access mismatch"));
816 page_fault(ERROR_PROTECTION, laddr, user, rw);
817 }
818 combined_access |= BX_COMBINED_SHADOW_STACK;
819 }
820 else
821 #endif
822 {
823 combined_access &= entry[leaf]; // U/S and R/W
824
825 unsigned priv_index = (BX_CPU_THIS_PTR cr0.get_WP() << 4) | // bit 4
826 (user<<3) | // bit 3
827 (combined_access | (unsigned)isWrite);// bit 2,1,0
828
829 if (!priv_check[priv_index] || nx_fault)
830 page_fault(ERROR_PROTECTION, laddr, user, rw);
831 }
832
833 if (BX_CPU_THIS_PTR cr4.get_SMEP() && rw == BX_EXECUTE && !user) {
834 if (combined_access & BX_COMBINED_ACCESS_USER)
835 page_fault(ERROR_PROTECTION, laddr, user, rw);
836 }
837
838 // SMAP protections are disabled if EFLAGS.AC=1
839 if (BX_CPU_THIS_PTR cr4.get_SMAP() && ! BX_CPU_THIS_PTR get_AC() && rw != BX_EXECUTE && ! user) {
840 if (combined_access & BX_COMBINED_ACCESS_USER)
841 page_fault(ERROR_PROTECTION, laddr, user, rw);
842 }
843
844 if (BX_CPU_THIS_PTR cr4.get_PGE())
845 combined_access |= (entry[leaf] & BX_COMBINED_GLOBAL_PAGE);
846
847 #if BX_SUPPORT_MEMTYPE
848 combined_access |= (memtype_by_pat(calculate_pat((Bit32u) entry[leaf], lpf_mask)) << 9);
849 #endif
850
851 // Update A/D bits if needed
852 update_access_dirty_PAE(entry_addr, entry, entry_memtype, BX_LEVEL_PML4, leaf, isWrite);
853
854 return (ppf | combined_access);
855 }
856
857 #endif
858
update_access_dirty_PAE(bx_phy_address * entry_addr,Bit64u * entry,BxMemtype * entry_memtype,unsigned max_level,unsigned leaf,unsigned write)859 void BX_CPU_C::update_access_dirty_PAE(bx_phy_address *entry_addr, Bit64u *entry, BxMemtype *entry_memtype, unsigned max_level, unsigned leaf, unsigned write)
860 {
861 // Update A bit if needed
862 for (unsigned level=max_level; level > leaf; level--) {
863 if (!(entry[level] & 0x20)) {
864 entry[level] |= 0x20;
865 access_write_physical(entry_addr[level], 8, &entry[level]);
866 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[level], 8, entry_memtype[level], BX_WRITE,
867 (BX_PTE_ACCESS + level), (Bit8u*)(&entry[level]));
868 }
869 }
870
871 // Update A/D bits if needed
872 if (!(entry[leaf] & 0x20) || (write && !(entry[leaf] & 0x40))) {
873 entry[leaf] |= (0x20 | (write<<6)); // Update A and possibly D bits
874 access_write_physical(entry_addr[leaf], 8, &entry[leaf]);
875 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, entry_memtype[leaf], BX_WRITE,
876 (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
877 }
878 }
879
880 // Format of Legacy PAE PDPTR entry (PDPTE)
881 // -----------------------------------------------------------
882 // 00 | Present (P)
883 // 02-01 | Reserved (must be zero)
884 // 03 | Page-Level Write-Through (PWT) (486+), 0=reserved otherwise
885 // 04 | Page-Level Cache-Disable (PCD) (486+), 0=reserved otherwise
886 // 08-05 | Reserved (must be zero)
887 // 11-09 | (ignored)
888 // PA-12 | Physical address of 4-KByte aligned page directory
889 // 63-PA | Reserved (must be zero)
890 // -----------------------------------------------------------
891
892 const Bit64u PAGING_PAE_PDPTE_RESERVED_BITS = BX_PAGING_PHY_ADDRESS_RESERVED_BITS | BX_CONST64(0xFFF00000000001E6);
893
CheckPDPTR(bx_phy_address cr3_val)894 bool BX_CPP_AttrRegparmN(1) BX_CPU_C::CheckPDPTR(bx_phy_address cr3_val)
895 {
896 // with Nested Paging PDPTRs are not loaded for guest page tables but
897 // accessed on demand as part of the guest page walk
898 #if BX_SUPPORT_SVM
899 if (BX_CPU_THIS_PTR in_svm_guest && SVM_NESTED_PAGING_ENABLED)
900 return 1;
901 #endif
902
903 cr3_val &= 0xffffffe0;
904 #if BX_SUPPORT_VMX >= 2
905 if (BX_CPU_THIS_PTR in_vmx_guest) {
906 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE))
907 cr3_val = translate_guest_physical(cr3_val, 0, 0, 1, BX_READ);
908 }
909 #endif
910
911 Bit64u pdptr[4];
912 unsigned n;
913
914 for (n=0; n<4; n++) {
915 // read and check PDPTE entries
916 bx_phy_address pdpe_entry_addr = (bx_phy_address) (cr3_val | (n << 3));
917 access_read_physical(pdpe_entry_addr, 8, &(pdptr[n]));
918 BX_NOTIFY_PHY_MEMORY_ACCESS(pdpe_entry_addr, 8, BX_MEMTYPE_INVALID, BX_READ, (BX_PDPTR0_ACCESS + n), (Bit8u*) &(pdptr[n]));
919
920 if (pdptr[n] & 0x1) {
921 if (pdptr[n] & PAGING_PAE_PDPTE_RESERVED_BITS) return 0;
922 }
923 }
924
925 // load new PDPTRs
926 for (n=0; n<4; n++)
927 BX_CPU_THIS_PTR PDPTR_CACHE.entry[n] = pdptr[n];
928
929 return 1; /* PDPTRs are fine */
930 }
931
932 #if BX_SUPPORT_VMX >= 2
CheckPDPTR(Bit64u * pdptr)933 bool BX_CPP_AttrRegparmN(1) BX_CPU_C::CheckPDPTR(Bit64u *pdptr)
934 {
935 for (unsigned n=0; n<4; n++) {
936 if (pdptr[n] & 0x1) {
937 if (pdptr[n] & PAGING_PAE_PDPTE_RESERVED_BITS) return 0;
938 }
939 }
940
941 return 1; /* PDPTRs are fine */
942 }
943 #endif
944
translate_linear_load_PDPTR(bx_address laddr,unsigned user,unsigned rw)945 bx_phy_address BX_CPU_C::translate_linear_load_PDPTR(bx_address laddr, unsigned user, unsigned rw)
946 {
947 unsigned index = (laddr >> 30) & 0x3;
948 Bit64u pdptr;
949
950 #if BX_SUPPORT_SVM
951 if (BX_CPU_THIS_PTR in_svm_guest && SVM_NESTED_PAGING_ENABLED)
952 {
953 bx_phy_address cr3_val = BX_CPU_THIS_PTR cr3 & 0xffffffe0;
954 cr3_val = nested_walk(cr3_val, BX_RW, 1);
955
956 bx_phy_address pdpe_entry_addr = (bx_phy_address) (cr3_val | (index << 3));
957 access_read_physical(pdpe_entry_addr, 8, &pdptr);
958 BX_NOTIFY_PHY_MEMORY_ACCESS(pdpe_entry_addr, 8, BX_MEMTYPE_INVALID, BX_READ, (BX_PDPTR0_ACCESS + index), (Bit8u*) &pdptr);
959
960 if (pdptr & 0x1) {
961 if (pdptr & PAGING_PAE_PDPTE_RESERVED_BITS) {
962 BX_DEBUG(("PAE PDPTE%d entry reserved bits set: 0x" FMT_ADDRX64, index, pdptr));
963 page_fault(ERROR_RESERVED | ERROR_PROTECTION, laddr, user, rw);
964 }
965 }
966 }
967 else
968 #endif
969 {
970 pdptr = BX_CPU_THIS_PTR PDPTR_CACHE.entry[index];
971 }
972
973 if (! (pdptr & 0x1)) {
974 BX_DEBUG(("PAE PDPTE entry not present !"));
975 page_fault(ERROR_NOT_PRESENT, laddr, user, rw);
976 }
977
978 return pdptr;
979 }
980
981 // Translate a linear address to a physical address in PAE paging mode
translate_linear_PAE(bx_address laddr,Bit32u & lpf_mask,unsigned user,unsigned rw)982 bx_phy_address BX_CPU_C::translate_linear_PAE(bx_address laddr, Bit32u &lpf_mask, unsigned user, unsigned rw)
983 {
984 bx_phy_address entry_addr[2];
985 Bit64u entry[2];
986 BxMemtype entry_memtype[2] = { 0 };
987 bool nx_fault = false;
988 int leaf;
989
990 lpf_mask = 0xfff;
991 Bit32u combined_access = (BX_COMBINED_ACCESS_WRITE | BX_COMBINED_ACCESS_USER);
992
993 Bit64u reserved = PAGING_LEGACY_PAE_RESERVED_BITS;
994 if (! BX_CPU_THIS_PTR efer.get_NXE())
995 reserved |= PAGE_DIRECTORY_NX_BIT;
996
997 Bit64u pdpte = translate_linear_load_PDPTR(laddr, user, rw);
998 bx_phy_address ppf = pdpte & BX_CONST64(0x000ffffffffff000);
999 Bit64u curr_entry = pdpte;
1000
1001 for (leaf = BX_LEVEL_PDE;; --leaf) {
1002 entry_addr[leaf] = ppf + ((laddr >> (9 + 9*leaf)) & 0xff8);
1003 #if BX_SUPPORT_VMX >= 2
1004 if (BX_CPU_THIS_PTR in_vmx_guest) {
1005 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE))
1006 entry_addr[leaf] = translate_guest_physical(entry_addr[leaf], laddr, 1, 1, BX_READ);
1007 }
1008 #endif
1009 #if BX_SUPPORT_SVM
1010 if (BX_CPU_THIS_PTR in_svm_guest && SVM_NESTED_PAGING_ENABLED) {
1011 entry_addr[leaf] = nested_walk(entry_addr[leaf], BX_RW, 1);
1012 }
1013 #endif
1014
1015 #if BX_SUPPORT_MEMTYPE
1016 entry_memtype[leaf] = resolve_memtype(memtype_by_mtrr(entry_addr[leaf]), memtype_by_pat(calculate_pcd_pwt((Bit32u) curr_entry)));
1017 #endif
1018 access_read_physical(entry_addr[leaf], 8, &entry[leaf]);
1019 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, entry_memtype[leaf], BX_READ, (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
1020
1021 curr_entry = entry[leaf];
1022 int fault = check_entry_PAE(bx_paging_level[leaf], curr_entry, reserved, rw, &nx_fault);
1023 if (fault >= 0)
1024 page_fault(fault, laddr, user, rw);
1025
1026 ppf = curr_entry & BX_CONST64(0x000ffffffffff000);
1027
1028 if (leaf == BX_LEVEL_PTE) break;
1029
1030 // Ignore CR4.PSE in PAE mode
1031 if (curr_entry & 0x80) {
1032 if (curr_entry & PAGING_PAE_PDE2M_RESERVED_BITS) {
1033 BX_DEBUG(("PAE PDE2M: reserved bit is set PDE=0x" FMT_ADDRX64, curr_entry));
1034 page_fault(ERROR_RESERVED | ERROR_PROTECTION, laddr, user, rw);
1035 }
1036
1037 // Make up the physical page frame address
1038 ppf = (bx_phy_address)(curr_entry & BX_CONST64(0x000fffffffe00000));
1039 lpf_mask = 0x1fffff;
1040 break;
1041 }
1042
1043 combined_access &= curr_entry; // U/S and R/W
1044 }
1045
1046 bool isWrite = (rw & 1); // write or r-m-w
1047
1048 #if BX_SUPPORT_CET
1049 bool shadow_stack = (rw & 4) != 0;
1050 if (shadow_stack) {
1051 // shadow stack pages:
1052 // - R/W bit=1 in every paging structure entry except the leaf
1053 // - R/W bit=0 and Dirty=1 for leaf entry
1054 bool shadow_stack_page = ((combined_access & BX_COMBINED_ACCESS_WRITE) != 0) && ((entry[leaf] & 0x40) != 0) && ((entry[leaf] & 0x02) == 0);
1055 if (!shadow_stack_page)
1056 page_fault(ERROR_PROTECTION, laddr, user, rw);
1057
1058 combined_access &= entry[leaf]; // U/S and R/W
1059
1060 // must be to shadow stack page, check that U/S match
1061 if ((combined_access & BX_COMBINED_ACCESS_USER) ^ (user << 2)) {
1062 BX_DEBUG(("shadow stack U/S access mismatch"));
1063 page_fault(ERROR_PROTECTION, laddr, user, rw);
1064 }
1065 combined_access |= BX_COMBINED_SHADOW_STACK;
1066 }
1067 else
1068 #endif
1069 {
1070 combined_access &= entry[leaf]; // U/S and R/W
1071
1072 unsigned priv_index = (BX_CPU_THIS_PTR cr0.get_WP() << 4) | // bit 4
1073 (user<<3) | // bit 3
1074 (combined_access | (unsigned)isWrite);// bit 2,1,0
1075
1076 if (!priv_check[priv_index] || nx_fault)
1077 page_fault(ERROR_PROTECTION, laddr, user, rw);
1078 }
1079
1080 if (BX_CPU_THIS_PTR cr4.get_SMEP() && rw == BX_EXECUTE && !user) {
1081 if (combined_access & BX_COMBINED_ACCESS_USER)
1082 page_fault(ERROR_PROTECTION, laddr, user, rw);
1083 }
1084
1085 // SMAP protections are disabled if EFLAGS.AC=1
1086 if (BX_CPU_THIS_PTR cr4.get_SMAP() && ! BX_CPU_THIS_PTR get_AC() && rw != BX_EXECUTE && ! user) {
1087 if (combined_access & BX_COMBINED_ACCESS_USER)
1088 page_fault(ERROR_PROTECTION, laddr, user, rw);
1089 }
1090
1091 if (BX_CPU_THIS_PTR cr4.get_PGE())
1092 combined_access |= (entry[leaf] & BX_COMBINED_GLOBAL_PAGE); // G
1093
1094 #if BX_SUPPORT_MEMTYPE
1095 combined_access |= (memtype_by_pat(calculate_pat((Bit32u) entry[leaf], lpf_mask)) << 9);
1096 #endif
1097
1098 // Update A/D bits if needed
1099 update_access_dirty_PAE(entry_addr, entry, entry_memtype, BX_LEVEL_PDE, leaf, isWrite);
1100
1101 return (ppf | combined_access);
1102 }
1103
1104 #endif
1105
1106 // Format of a PDE that Maps a 4-MByte Page
1107 // -----------------------------------------------------------
1108 // 00 | Present (P)
1109 // 01 | R/W
1110 // 02 | U/S
1111 // 03 | Page-Level Write-Through (PWT)
1112 // 04 | Page-Level Cache-Disable (PCD)
1113 // 05 | Accessed (A)
1114 // 06 | Dirty (D)
1115 // 07 | Page size, must be 1 to indicate 4-Mbyte page
1116 // 08 | Global (G) (if CR4.PGE=1, ignored otherwise)
1117 // 11-09 | (ignored)
1118 // 12 | PAT (if PAT is supported, reserved otherwise)
1119 // PA-13 | Bits PA-32 of physical address of the 4-MByte page
1120 // 21-PA | Reserved (must be zero)
1121 // 31-22 | Bits 31-22 of physical address of the 4-MByte page
1122 // -----------------------------------------------------------
1123
1124 #if BX_PHY_ADDRESS_WIDTH > 40
1125 const Bit32u PAGING_PDE4M_RESERVED_BITS = 0; // there are no reserved bits in PDE4M when physical address is wider than 40 bit
1126 #else
1127 const Bit32u PAGING_PDE4M_RESERVED_BITS = ((1 << (41-BX_PHY_ADDRESS_WIDTH))-1) << (13 + BX_PHY_ADDRESS_WIDTH - 32);
1128 #endif
1129
1130 // Translate a linear address to a physical address in legacy paging mode
translate_linear_legacy(bx_address laddr,Bit32u & lpf_mask,unsigned user,unsigned rw)1131 bx_phy_address BX_CPU_C::translate_linear_legacy(bx_address laddr, Bit32u &lpf_mask, unsigned user, unsigned rw)
1132 {
1133 bx_phy_address entry_addr[2], ppf = (Bit32u) BX_CPU_THIS_PTR cr3 & BX_CR3_PAGING_MASK;
1134 Bit32u entry[2];
1135 BxMemtype entry_memtype[2] = { 0 };
1136 int leaf;
1137
1138 lpf_mask = 0xfff;
1139 Bit32u combined_access = (BX_COMBINED_ACCESS_WRITE | BX_COMBINED_ACCESS_USER);
1140 Bit32u curr_entry = (Bit32u) BX_CPU_THIS_PTR cr3;
1141
1142 for (leaf = BX_LEVEL_PDE;; --leaf) {
1143 entry_addr[leaf] = ppf + ((laddr >> (10 + 10*leaf)) & 0xffc);
1144 #if BX_SUPPORT_VMX >= 2
1145 if (BX_CPU_THIS_PTR in_vmx_guest) {
1146 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE))
1147 entry_addr[leaf] = translate_guest_physical(entry_addr[leaf], laddr, 1, 1, BX_READ);
1148 }
1149 #endif
1150 #if BX_SUPPORT_SVM
1151 if (BX_CPU_THIS_PTR in_svm_guest && SVM_NESTED_PAGING_ENABLED) {
1152 entry_addr[leaf] = nested_walk(entry_addr[leaf], BX_RW, 1);
1153 }
1154 #endif
1155
1156 #if BX_SUPPORT_MEMTYPE
1157 entry_memtype[leaf] = resolve_memtype(memtype_by_mtrr(entry_addr[leaf]), memtype_by_pat(calculate_pcd_pwt(curr_entry)));
1158 #endif
1159 access_read_physical(entry_addr[leaf], 4, &entry[leaf]);
1160 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 4, entry_memtype[leaf], BX_READ, (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
1161
1162 curr_entry = entry[leaf];
1163 if (!(curr_entry & 0x1)) {
1164 BX_DEBUG(("%s: entry not present", bx_paging_level[leaf]));
1165 page_fault(ERROR_NOT_PRESENT, laddr, user, rw);
1166 }
1167
1168 ppf = curr_entry & 0xfffff000;
1169
1170 if (leaf == BX_LEVEL_PTE) break;
1171
1172 #if BX_CPU_LEVEL >= 5
1173 if ((curr_entry & 0x80) != 0 && BX_CPU_THIS_PTR cr4.get_PSE()) {
1174 // 4M paging, only if CR4.PSE enabled, ignore PDE.PS otherwise
1175 if (curr_entry & PAGING_PDE4M_RESERVED_BITS) {
1176 BX_DEBUG(("PSE PDE4M: reserved bit is set: PDE=0x%08x", entry[BX_LEVEL_PDE]));
1177 page_fault(ERROR_RESERVED | ERROR_PROTECTION, laddr, user, rw);
1178 }
1179
1180 // make up the physical frame number
1181 ppf = (curr_entry & 0xffc00000);
1182 #if BX_PHY_ADDRESS_WIDTH > 32
1183 ppf |= ((bx_phy_address)(curr_entry & 0x003fe000)) << 19;
1184 #endif
1185 lpf_mask = 0x3fffff;
1186 break;
1187 }
1188 #endif
1189
1190 combined_access &= curr_entry; // U/S and R/W
1191 }
1192
1193 bool isWrite = (rw & 1); // write or r-m-w
1194
1195 #if BX_SUPPORT_CET
1196 bool shadow_stack = (rw & 4) != 0;
1197 if (shadow_stack) {
1198 // shadow stack pages:
1199 // - R/W bit=1 in every paging structure entry except the leaf
1200 // - R/W bit=0 and Dirty=1 for leaf entry
1201 bool shadow_stack_page = ((combined_access & BX_COMBINED_ACCESS_WRITE) != 0) && ((entry[leaf] & 0x40) != 0) && ((entry[leaf] & 0x02) == 0);
1202 if (!shadow_stack_page)
1203 page_fault(ERROR_PROTECTION, laddr, user, rw);
1204
1205 combined_access &= entry[leaf]; // U/S and R/W
1206
1207 // must be to shadow stack page, check that U/S match
1208 if ((combined_access & BX_COMBINED_ACCESS_USER) ^ (user << 2)) {
1209 BX_DEBUG(("shadow stack U/S access mismatch"));
1210 page_fault(ERROR_PROTECTION, laddr, user, rw);
1211 }
1212 combined_access |= BX_COMBINED_SHADOW_STACK;
1213 }
1214 else
1215 #endif
1216 {
1217 combined_access &= entry[leaf]; // U/S and R/W
1218
1219 unsigned priv_index =
1220 #if BX_CPU_LEVEL >= 4
1221 (BX_CPU_THIS_PTR cr0.get_WP() << 4) | // bit 4
1222 #endif
1223 (user<<3) | // bit 3
1224 (combined_access | (unsigned)isWrite); // bit 2,1,0
1225
1226 if (!priv_check[priv_index])
1227 page_fault(ERROR_PROTECTION, laddr, user, rw);
1228 }
1229
1230 #if BX_CPU_LEVEL >= 6
1231 if (BX_CPU_THIS_PTR cr4.get_SMEP() && rw == BX_EXECUTE && !user) {
1232 if (combined_access & BX_COMBINED_ACCESS_USER)
1233 page_fault(ERROR_PROTECTION, laddr, user, rw);
1234 }
1235
1236 // SMAP protections are disabled if EFLAGS.AC=1
1237 if (BX_CPU_THIS_PTR cr4.get_SMAP() && ! BX_CPU_THIS_PTR get_AC() && rw != BX_EXECUTE && ! user) {
1238 if (combined_access & BX_COMBINED_ACCESS_USER)
1239 page_fault(ERROR_PROTECTION, laddr, user, rw);
1240 }
1241
1242 if (BX_CPU_THIS_PTR cr4.get_PGE())
1243 combined_access |= (entry[leaf] & BX_COMBINED_GLOBAL_PAGE);
1244
1245 #if BX_SUPPORT_MEMTYPE
1246 combined_access |= (memtype_by_pat(calculate_pat(entry[leaf], lpf_mask)) << 9);
1247 #endif
1248
1249 #endif
1250
1251 update_access_dirty(entry_addr, entry, entry_memtype, leaf, isWrite);
1252
1253 return (ppf | combined_access);
1254 }
1255
update_access_dirty(bx_phy_address * entry_addr,Bit32u * entry,BxMemtype * entry_memtype,unsigned leaf,unsigned write)1256 void BX_CPU_C::update_access_dirty(bx_phy_address *entry_addr, Bit32u *entry, BxMemtype *entry_memtype, unsigned leaf, unsigned write)
1257 {
1258 if (leaf == BX_LEVEL_PTE) {
1259 // Update PDE A bit if needed
1260 if (!(entry[BX_LEVEL_PDE] & 0x20)) {
1261 entry[BX_LEVEL_PDE] |= 0x20;
1262 access_write_physical(entry_addr[BX_LEVEL_PDE], 4, &entry[BX_LEVEL_PDE]);
1263 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[BX_LEVEL_PDE], 4, entry_memtype[BX_LEVEL_PDE], BX_WRITE, BX_PDE_ACCESS, (Bit8u*)(&entry[BX_LEVEL_PDE]));
1264 }
1265 }
1266
1267 // Update A/D bits if needed
1268 if (!(entry[leaf] & 0x20) || (write && !(entry[leaf] & 0x40))) {
1269 entry[leaf] |= (0x20 | (write<<6)); // Update A and possibly D bits
1270 access_write_physical(entry_addr[leaf], 4, &entry[leaf]);
1271 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 4, entry_memtype[leaf], BX_WRITE, (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
1272 }
1273 }
1274
1275 // Translate a linear address to a physical address
translate_linear(bx_TLB_entry * tlbEntry,bx_address laddr,unsigned user,unsigned rw)1276 bx_phy_address BX_CPU_C::translate_linear(bx_TLB_entry *tlbEntry, bx_address laddr, unsigned user, unsigned rw)
1277 {
1278 #if BX_SUPPORT_X86_64
1279 if (! long_mode()) laddr &= 0xffffffff;
1280 #endif
1281
1282 bx_phy_address paddress, ppf, poffset = PAGE_OFFSET(laddr);
1283 unsigned isWrite = rw & 1; // write or r-m-w
1284 unsigned isExecute = (rw == BX_EXECUTE);
1285 unsigned isShadowStack = (rw & 4); // 4 if shadowstack and 0 otherwise
1286 bx_address lpf = LPFOf(laddr);
1287
1288 INC_TLB_STAT(tlbLookups);
1289 if (isExecute)
1290 INC_TLB_STAT(tlbExecuteLookups);
1291 if (isWrite)
1292 INC_TLB_STAT(tlbWriteLookups);
1293
1294 // already looked up TLB for code access
1295 if (! isExecute && TLB_LPFOf(tlbEntry->lpf) == lpf)
1296 {
1297 paddress = tlbEntry->ppf | poffset;
1298
1299 #if BX_SUPPORT_PKEYS
1300 if (isWrite) {
1301 if (tlbEntry->accessBits & (1 << (isShadowStack | (isWrite<<1) | user)) & BX_CPU_THIS_PTR wr_pkey[tlbEntry->pkey])
1302 return paddress;
1303 }
1304 else {
1305 if (tlbEntry->accessBits & (1 << (isShadowStack | user)) & BX_CPU_THIS_PTR rd_pkey[tlbEntry->pkey])
1306 return paddress;
1307 }
1308 #else
1309 if (tlbEntry->accessBits & (1 << (isShadowStack | (isWrite<<1) | user)))
1310 return paddress;
1311 #endif
1312
1313 // The current access does not have permission according to the info
1314 // in our TLB cache entry. Re-walk the page tables, in case there is
1315 // updated information in the memory image, and let the long path code
1316 // generate an exception if one is warranted.
1317
1318 // Invalidate the TLB entry before re-walk as re-walk may end with paging fault.
1319 // The entry will be reinitialized later if page walk succeeds.
1320 tlbEntry->invalidate();
1321 }
1322
1323 INC_TLB_STAT(tlbMisses);
1324 if (isExecute)
1325 INC_TLB_STAT(tlbExecuteMisses);
1326 if (isWrite)
1327 INC_TLB_STAT(tlbWriteMisses);
1328
1329 Bit32u lpf_mask = 0xfff; // 4K pages
1330 Bit32u combined_access = BX_COMBINED_ACCESS_WRITE | BX_COMBINED_ACCESS_USER;
1331 #if BX_SUPPORT_X86_64
1332 Bit32u pkey = 0;
1333 #endif
1334
1335 if(BX_CPU_THIS_PTR cr0.get_PG())
1336 {
1337 BX_DEBUG(("page walk for%s address 0x" FMT_LIN_ADDRX, isShadowStack ? " shadow stack" : "", laddr));
1338
1339 #if BX_CPU_LEVEL >= 6
1340 #if BX_SUPPORT_X86_64
1341 if (long_mode())
1342 paddress = translate_linear_long_mode(laddr, lpf_mask, pkey, user, rw);
1343 else
1344 #endif
1345 if (BX_CPU_THIS_PTR cr4.get_PAE())
1346 paddress = translate_linear_PAE(laddr, lpf_mask, user, rw);
1347 else
1348 #endif
1349 paddress = translate_linear_legacy(laddr, lpf_mask, user, rw);
1350
1351 // translate_linear functions return combined U/S, R/W bits, Global Page bit
1352 // and also effective page tables memory type in lower 12 bits of the physical address.
1353 // Bit 1 - R/W bit
1354 // Bit 2 - U/S bit
1355 // Bit 9,10,11 - Effective Memory Table from page tables
1356 combined_access = paddress & lpf_mask;
1357 paddress = (paddress & ~((Bit64u) lpf_mask)) | (laddr & lpf_mask);
1358
1359 #if BX_CPU_LEVEL >= 5
1360 if (lpf_mask > 0xfff) {
1361 if (isExecute)
1362 BX_CPU_THIS_PTR ITLB.split_large = true;
1363 else
1364 BX_CPU_THIS_PTR DTLB.split_large = true;
1365 }
1366 #endif
1367 }
1368 else {
1369 // no paging
1370 paddress = (bx_phy_address) laddr;
1371 combined_access |= (BX_MEMTYPE_WB << 9); // act as memory type by paging is WB
1372 }
1373
1374 // Calculate physical memory address and fill in TLB cache entry
1375 #if BX_SUPPORT_VMX >= 2
1376 if (BX_CPU_THIS_PTR in_vmx_guest) {
1377 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE)) {
1378 paddress = translate_guest_physical(paddress, laddr, 1, 0, rw, isShadowStack & !user);
1379 }
1380 }
1381 #endif
1382 #if BX_SUPPORT_SVM
1383 if (BX_CPU_THIS_PTR in_svm_guest && SVM_NESTED_PAGING_ENABLED) {
1384 // hack: ignore isExecute attribute in SMM mode under SVM virtualization
1385 if (BX_CPU_THIS_PTR in_smm && rw == BX_EXECUTE) rw = BX_READ;
1386
1387 paddress = nested_walk(paddress, rw, 0);
1388 }
1389 #endif
1390 paddress = A20ADDR(paddress);
1391 ppf = PPFOf(paddress);
1392
1393 // direct memory access is NOT allowed by default
1394 tlbEntry->lpf = lpf | TLB_NoHostPtr;
1395 tlbEntry->lpf_mask = lpf_mask;
1396 #if BX_SUPPORT_PKEYS
1397 tlbEntry->pkey = pkey;
1398 #endif
1399 tlbEntry->ppf = ppf;
1400 tlbEntry->accessBits = 0;
1401
1402 if (isExecute) {
1403 tlbEntry->accessBits |= TLB_SysExecuteOK;
1404 }
1405 else {
1406 #if BX_SUPPORT_CET
1407 if (isShadowStack) {
1408 tlbEntry->accessBits |= TLB_SysReadOK | TLB_SysReadShadowStackOK;
1409 if (isWrite)
1410 tlbEntry->accessBits |= TLB_SysWriteShadowStackOK;
1411 }
1412 else
1413 #endif
1414 {
1415 tlbEntry->accessBits |= TLB_SysReadOK;
1416 if (isWrite)
1417 tlbEntry->accessBits |= TLB_SysWriteOK;
1418 }
1419 }
1420
1421 if (! BX_CPU_THIS_PTR cr0.get_PG()
1422 #if BX_SUPPORT_VMX >= 2
1423 && ! (BX_CPU_THIS_PTR in_vmx_guest && SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE))
1424 #endif
1425 #if BX_SUPPORT_SVM
1426 && ! (BX_CPU_THIS_PTR in_svm_guest && SVM_NESTED_PAGING_ENABLED)
1427 #endif
1428 ) {
1429 if (isExecute)
1430 tlbEntry->accessBits |= TLB_UserExecuteOK;
1431 else
1432 tlbEntry->accessBits |= TLB_UserReadOK | TLB_UserWriteOK;
1433 }
1434 else {
1435 if ((combined_access & BX_COMBINED_ACCESS_USER) != 0) {
1436
1437 if (user) {
1438 if (isExecute) {
1439 tlbEntry->accessBits |= TLB_UserExecuteOK;
1440 }
1441 else {
1442 #if BX_SUPPORT_CET
1443 if (isShadowStack) {
1444 tlbEntry->accessBits |= TLB_UserReadOK | TLB_UserReadShadowStackOK;
1445 if (isWrite)
1446 tlbEntry->accessBits |= TLB_UserWriteShadowStackOK;
1447 }
1448 else
1449 #endif
1450 {
1451 tlbEntry->accessBits |= TLB_UserReadOK;
1452 if (isWrite)
1453 tlbEntry->accessBits |= TLB_UserWriteOK;
1454 }
1455 }
1456 }
1457
1458 #if BX_CPU_LEVEL >= 6
1459 if (isExecute) {
1460 if (BX_CPU_THIS_PTR cr4.get_SMEP())
1461 tlbEntry->accessBits &= ~TLB_SysExecuteOK;
1462 }
1463 else {
1464 if (BX_CPU_THIS_PTR cr4.get_SMAP())
1465 tlbEntry->accessBits &= ~(TLB_SysReadOK | TLB_SysWriteOK);
1466 }
1467 #endif
1468
1469 #if BX_SUPPORT_CET
1470 // system shadow stack accesses cannot access user pages
1471 tlbEntry->accessBits &= ~(TLB_SysReadShadowStackOK | TLB_SysWriteShadowStackOK);
1472 #endif
1473 }
1474 }
1475
1476 #if BX_CPU_LEVEL >= 6
1477 if (combined_access & BX_COMBINED_GLOBAL_PAGE) // Global bit
1478 tlbEntry->accessBits |= TLB_GlobalPage;
1479 #endif
1480
1481 // Attempt to get a host pointer to this physical page. Put that
1482 // pointer in the TLB cache. Note if the request is vetoed, NULL
1483 // will be returned, and it's OK to OR zero in anyways.
1484 tlbEntry->hostPageAddr = BX_CPU_THIS_PTR getHostMemAddr(ppf, rw);
1485 if (tlbEntry->hostPageAddr) {
1486 // All access allowed also via direct pointer
1487 #if BX_X86_DEBUGGER
1488 if (! hwbreakpoint_check(laddr, BX_HWDebugMemW, BX_HWDebugMemRW))
1489 #endif
1490 tlbEntry->lpf = lpf; // allow direct access with HostPtr
1491 }
1492
1493 #if BX_SUPPORT_MEMTYPE
1494 tlbEntry->memtype = resolve_memtype(memtype_by_mtrr(tlbEntry->ppf), combined_access >> 9 /* effective page tables memory type */);
1495 #endif
1496
1497 return paddress;
1498 }
1499
get_memtype_name(BxMemtype memtype)1500 const char *get_memtype_name(BxMemtype memtype)
1501 {
1502 static const char *mem_type_string[9] = { "UC", "WC", "RESERVED2", "RESERVED3", "WT", "WP", "WB", "UC-", "INVALID" };
1503 if (memtype > BX_MEMTYPE_INVALID) memtype = BX_MEMTYPE_INVALID;
1504 return mem_type_string[memtype];
1505 }
1506
1507 #if BX_SUPPORT_MEMTYPE
memtype_by_mtrr(bx_phy_address pAddr)1508 BxMemtype BX_CPP_AttrRegparmN(1) BX_CPU_C::memtype_by_mtrr(bx_phy_address pAddr)
1509 {
1510 #if BX_CPU_LEVEL >= 6
1511 if (is_cpu_extension_supported(BX_ISA_MTRR)) {
1512 const Bit32u BX_MTRR_DEFTYPE_FIXED_MTRR_ENABLE_MASK = (1 << 10);
1513 const Bit32u BX_MTRR_ENABLE_MASK = (1 << 11);
1514
1515 if (BX_CPU_THIS_PTR msr.mtrr_deftype & BX_MTRR_ENABLE_MASK) {
1516 // fixed range MTRR take priority over variable range MTRR when enabled
1517 if (pAddr < 0x100000 && (BX_CPU_THIS_PTR msr.mtrr_deftype & BX_MTRR_DEFTYPE_FIXED_MTRR_ENABLE_MASK)) {
1518 if (pAddr < 0x80000) {
1519 unsigned index = (pAddr >> 16) & 0x7;
1520 return (BxMemtype) BX_CPU_THIS_PTR msr.mtrrfix64k.ubyte(index);
1521 }
1522 if (pAddr < 0xc0000) {
1523 unsigned index = ((pAddr - 0x80000) >> 14) & 0xf;
1524 return (BxMemtype) BX_CPU_THIS_PTR msr.mtrrfix16k[index >> 3].ubyte(index & 0x7);
1525 }
1526 else {
1527 unsigned index = (pAddr - 0xc0000) >> 12;
1528 return (BxMemtype) BX_CPU_THIS_PTR msr.mtrrfix4k [index >> 3].ubyte(index & 0x7);
1529 }
1530 }
1531
1532 int memtype = -1;
1533
1534 for (unsigned i=0; i < BX_NUM_VARIABLE_RANGE_MTRRS; i++) {
1535 Bit64u base = BX_CPU_THIS_PTR msr.mtrrphys[i*2];
1536 Bit64u mask = BX_CPU_THIS_PTR msr.mtrrphys[i*2 + 1];
1537 if ((mask & BX_MTRR_ENABLE_MASK) == 0) continue;
1538 mask = PPFOf(mask);
1539 if ((pAddr & mask) == (base & mask)) {
1540 //
1541 // Matched variable MTRR, check overlap rules:
1542 // - if two or more variable memory ranges match and the memory types are identical,
1543 // then that memory type is used.
1544 // - if two or more variable memory ranges match and one of the memory types is UC,
1545 // the UC memory type used.
1546 // - if two or more variable memory ranges match and the memory types are WT and WB,
1547 // the WT memory type is used.
1548 // - For overlaps not defined by the above rules, processor behavior is undefined.
1549 //
1550 BxMemtype curr_memtype = BxMemtype(base & 0xff);
1551 if (curr_memtype == BX_MEMTYPE_UC)
1552 return BX_MEMTYPE_UC;
1553
1554 if (memtype == -1) {
1555 memtype = curr_memtype; // first match
1556 }
1557 else if (memtype != (int) curr_memtype) {
1558 if (curr_memtype == BX_MEMTYPE_WT && memtype == BX_MEMTYPE_WB)
1559 memtype = BX_MEMTYPE_WT;
1560 else if (curr_memtype == BX_MEMTYPE_WB && memtype == BX_MEMTYPE_WT)
1561 memtype = BX_MEMTYPE_WT;
1562 else
1563 memtype = BX_MEMTYPE_INVALID;
1564 }
1565 }
1566 }
1567
1568 if (memtype != -1)
1569 return BxMemtype(memtype);
1570
1571 // didn't match any variable range MTRR, return default memory type
1572 return BxMemtype(BX_CPU_THIS_PTR msr.mtrr_deftype & 0xff);
1573 }
1574
1575 // return UC memory type when MTRRs are not enabled
1576 return BX_MEMTYPE_UC;
1577 }
1578 #endif
1579
1580 // return INVALID memory type when MTRRs are not supported
1581 return BX_MEMTYPE_INVALID;
1582 }
1583
memtype_by_pat(unsigned pat)1584 BxMemtype BX_CPP_AttrRegparmN(1) BX_CPU_C::memtype_by_pat(unsigned pat)
1585 {
1586 return (BxMemtype) BX_CPU_THIS_PTR msr.pat.ubyte(pat);
1587 }
1588
resolve_memtype(BxMemtype mtrr_memtype,BxMemtype pat_memtype)1589 BxMemtype BX_CPP_AttrRegparmN(2) BX_CPU_C::resolve_memtype(BxMemtype mtrr_memtype, BxMemtype pat_memtype)
1590 {
1591 if (BX_CPU_THIS_PTR cr0.get_CD())
1592 return BX_MEMTYPE_UC;
1593
1594 if (mtrr_memtype == BX_MEMTYPE_INVALID) // will result in ignore of MTRR memory type
1595 mtrr_memtype = BX_MEMTYPE_WB;
1596
1597 switch(pat_memtype) {
1598 case BX_MEMTYPE_UC:
1599 case BX_MEMTYPE_WC:
1600 return pat_memtype;
1601
1602 case BX_MEMTYPE_WT:
1603 case BX_MEMTYPE_WP:
1604 if (mtrr_memtype == BX_MEMTYPE_WC) return BX_MEMTYPE_UC;
1605 return (mtrr_memtype < pat_memtype) ? mtrr_memtype : pat_memtype;
1606
1607 case BX_MEMTYPE_WB:
1608 return mtrr_memtype;
1609
1610 case BX_MEMTYPE_UC_WEAK:
1611 return (mtrr_memtype == BX_MEMTYPE_WC) ? BX_MEMTYPE_WC : BX_MEMTYPE_UC;
1612
1613 default:
1614 BX_PANIC(("unexpected PAT memory type: %u", (unsigned) pat_memtype));
1615 }
1616
1617 return BX_MEMTYPE_INVALID; // keep compiler happy
1618 }
1619 #endif
1620
1621 #if BX_SUPPORT_SVM
1622
nested_page_fault(unsigned fault,bx_phy_address guest_paddr,unsigned rw,unsigned is_page_walk)1623 void BX_CPU_C::nested_page_fault(unsigned fault, bx_phy_address guest_paddr, unsigned rw, unsigned is_page_walk)
1624 {
1625 unsigned isWrite = rw & 1;
1626
1627 Bit64u error_code = fault | (1 << 2) | (isWrite << 1);
1628 if (rw == BX_EXECUTE)
1629 error_code |= ERROR_CODE_ACCESS; // I/D = 1
1630
1631 if (is_page_walk)
1632 error_code |= BX_CONST64(1) << 33;
1633 else
1634 error_code |= BX_CONST64(1) << 32;
1635
1636 Svm_Vmexit(SVM_VMEXIT_NPF, error_code, guest_paddr);
1637 }
1638
nested_walk_long_mode(bx_phy_address guest_paddr,unsigned rw,bool is_page_walk)1639 bx_phy_address BX_CPU_C::nested_walk_long_mode(bx_phy_address guest_paddr, unsigned rw, bool is_page_walk)
1640 {
1641 bx_phy_address entry_addr[4];
1642 Bit64u entry[4];
1643 BxMemtype entry_memtype[4] = { BX_MEMTYPE_INVALID };
1644 bool nx_fault = false;
1645 int leaf;
1646
1647 SVM_CONTROLS *ctrls = &BX_CPU_THIS_PTR vmcb.ctrls;
1648 SVM_HOST_STATE *host_state = &BX_CPU_THIS_PTR vmcb.host_state;
1649 bx_phy_address ppf = ctrls->ncr3 & BX_CR3_PAGING_MASK;
1650 Bit64u offset_mask = BX_CONST64(0x0000ffffffffffff);
1651 unsigned combined_access = BX_COMBINED_ACCESS_WRITE | BX_COMBINED_ACCESS_USER;
1652
1653 Bit64u reserved = PAGING_PAE_RESERVED_BITS;
1654 if (! host_state->efer.get_NXE())
1655 reserved |= PAGE_DIRECTORY_NX_BIT;
1656
1657 for (leaf = BX_LEVEL_PML4;; --leaf) {
1658 entry_addr[leaf] = ppf + ((guest_paddr >> (9 + 9*leaf)) & 0xff8);
1659 access_read_physical(entry_addr[leaf], 8, &entry[leaf]);
1660 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, BX_MEMTYPE_INVALID, BX_READ, (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
1661 offset_mask >>= 9;
1662
1663 Bit64u curr_entry = entry[leaf];
1664 int fault = check_entry_PAE(bx_paging_level[leaf], curr_entry, reserved, rw, &nx_fault);
1665 if (fault >= 0)
1666 nested_page_fault(fault, guest_paddr, rw, is_page_walk);
1667
1668 combined_access &= curr_entry; // U/S and R/W
1669 ppf = curr_entry & BX_CONST64(0x000ffffffffff000);
1670
1671 if (leaf == BX_LEVEL_PTE) break;
1672
1673 if (curr_entry & 0x80) {
1674 if (leaf > (BX_LEVEL_PDE + !!is_cpu_extension_supported(BX_ISA_1G_PAGES))) {
1675 BX_DEBUG(("Nested PAE Walk %s: PS bit set !", bx_paging_level[leaf]));
1676 nested_page_fault(ERROR_RESERVED | ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1677 }
1678
1679 ppf &= BX_CONST64(0x000fffffffffe000);
1680 if (ppf & offset_mask) {
1681 BX_DEBUG(("Nested PAE Walk %s: reserved bit is set: 0x" FMT_ADDRX64, bx_paging_level[leaf], curr_entry));
1682 nested_page_fault(ERROR_RESERVED | ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1683 }
1684
1685 break;
1686 }
1687 }
1688
1689 bool isWrite = (rw & 1); // write or r-m-w
1690
1691 unsigned priv_index = (1<<3) /* user */ | (combined_access | isWrite);
1692
1693 if (!priv_check[priv_index] || nx_fault)
1694 nested_page_fault(ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1695
1696 // Update A/D bits if needed
1697 update_access_dirty_PAE(entry_addr, entry, entry_memtype, BX_LEVEL_PML4, leaf, isWrite);
1698
1699 // Make up the physical page frame address
1700 return ppf | (bx_phy_address)(guest_paddr & offset_mask);
1701 }
1702
nested_walk_PAE(bx_phy_address guest_paddr,unsigned rw,bool is_page_walk)1703 bx_phy_address BX_CPU_C::nested_walk_PAE(bx_phy_address guest_paddr, unsigned rw, bool is_page_walk)
1704 {
1705 bx_phy_address entry_addr[2];
1706 Bit64u entry[2];
1707 BxMemtype entry_memtype[2] = { BX_MEMTYPE_INVALID };
1708 bool nx_fault = false;
1709 int leaf;
1710
1711 unsigned combined_access = BX_COMBINED_ACCESS_WRITE | BX_COMBINED_ACCESS_USER;
1712
1713 SVM_CONTROLS *ctrls = &BX_CPU_THIS_PTR vmcb.ctrls;
1714 SVM_HOST_STATE *host_state = &BX_CPU_THIS_PTR vmcb.host_state;
1715 bx_phy_address ncr3 = ctrls->ncr3 & 0xffffffe0;
1716 unsigned index = (guest_paddr >> 30) & 0x3;
1717 Bit64u pdptr;
1718
1719 bx_phy_address pdpe_entry_addr = (bx_phy_address) (ncr3 | (index << 3));
1720 access_read_physical(pdpe_entry_addr, 8, &pdptr);
1721 BX_NOTIFY_PHY_MEMORY_ACCESS(pdpe_entry_addr, 8, BX_MEMTYPE_INVALID, BX_READ, (BX_PDPTR0_ACCESS + index), (Bit8u*) &pdptr);
1722
1723 if (! (pdptr & 0x1)) {
1724 BX_DEBUG(("Nested PAE Walk PDPTE%d entry not present !", index));
1725 nested_page_fault(ERROR_NOT_PRESENT, guest_paddr, rw, is_page_walk);
1726 }
1727
1728 if (pdptr & PAGING_PAE_PDPTE_RESERVED_BITS) {
1729 BX_DEBUG(("Nested PAE Walk PDPTE%d entry reserved bits set: 0x" FMT_ADDRX64, index, pdptr));
1730 nested_page_fault(ERROR_RESERVED | ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1731 }
1732
1733 Bit64u reserved = PAGING_LEGACY_PAE_RESERVED_BITS;
1734 if (! host_state->efer.get_NXE())
1735 reserved |= PAGE_DIRECTORY_NX_BIT;
1736
1737 bx_phy_address ppf = pdptr & BX_CONST64(0x000ffffffffff000);
1738
1739 for (leaf = BX_LEVEL_PDE;; --leaf) {
1740 entry_addr[leaf] = ppf + ((guest_paddr >> (9 + 9*leaf)) & 0xff8);
1741 access_read_physical(entry_addr[leaf], 8, &entry[leaf]);
1742 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, BX_MEMTYPE_INVALID, BX_READ, (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
1743
1744 Bit64u curr_entry = entry[leaf];
1745 int fault = check_entry_PAE(bx_paging_level[leaf], curr_entry, reserved, rw, &nx_fault);
1746 if (fault >= 0)
1747 nested_page_fault(fault, guest_paddr, rw, is_page_walk);
1748
1749 combined_access &= curr_entry; // U/S and R/W
1750 ppf = curr_entry & BX_CONST64(0x000ffffffffff000);
1751
1752 if (leaf == BX_LEVEL_PTE) break;
1753
1754 // Ignore CR4.PSE in PAE mode
1755 if (curr_entry & 0x80) {
1756 if (curr_entry & PAGING_PAE_PDE2M_RESERVED_BITS) {
1757 BX_DEBUG(("PAE PDE2M: reserved bit is set PDE=0x" FMT_ADDRX64, curr_entry));
1758 nested_page_fault(ERROR_RESERVED | ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1759 }
1760
1761 // Make up the physical page frame address
1762 ppf = (bx_phy_address)((curr_entry & BX_CONST64(0x000fffffffe00000)) | (guest_paddr & 0x001ff000));
1763 break;
1764 }
1765 }
1766
1767 bool isWrite = (rw & 1); // write or r-m-w
1768
1769 unsigned priv_index = (1<<3) /* user */ | (combined_access | isWrite);
1770
1771 if (!priv_check[priv_index] || nx_fault)
1772 nested_page_fault(ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1773
1774 // Update A/D bits if needed
1775 update_access_dirty_PAE(entry_addr, entry, entry_memtype, BX_LEVEL_PDE, leaf, isWrite);
1776
1777 Bit32u page_offset = PAGE_OFFSET(guest_paddr);
1778 return ppf | page_offset;
1779 }
1780
nested_walk_legacy(bx_phy_address guest_paddr,unsigned rw,bool is_page_walk)1781 bx_phy_address BX_CPU_C::nested_walk_legacy(bx_phy_address guest_paddr, unsigned rw, bool is_page_walk)
1782 {
1783 bx_phy_address entry_addr[2];
1784 Bit32u entry[2];
1785 BxMemtype entry_memtype[2] = { BX_MEMTYPE_INVALID };
1786 int leaf;
1787
1788 SVM_CONTROLS *ctrls = &BX_CPU_THIS_PTR vmcb.ctrls;
1789 SVM_HOST_STATE *host_state = &BX_CPU_THIS_PTR vmcb.host_state;
1790 bx_phy_address ppf = ctrls->ncr3 & BX_CR3_PAGING_MASK;
1791 unsigned combined_access = BX_COMBINED_ACCESS_WRITE | BX_COMBINED_ACCESS_USER;
1792
1793 for (leaf = BX_LEVEL_PDE;; --leaf) {
1794 entry_addr[leaf] = ppf + ((guest_paddr >> (10 + 10*leaf)) & 0xffc);
1795 access_read_physical(entry_addr[leaf], 4, &entry[leaf]);
1796 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 4, BX_MEMTYPE_INVALID, BX_READ, (BX_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
1797
1798 Bit32u curr_entry = entry[leaf];
1799 if (!(curr_entry & 0x1)) {
1800 BX_DEBUG(("Nested %s Walk: entry not present", bx_paging_level[leaf]));
1801 nested_page_fault(ERROR_NOT_PRESENT, guest_paddr, rw, is_page_walk);
1802 }
1803
1804 combined_access &= curr_entry; // U/S and R/W
1805 ppf = curr_entry & 0xfffff000;
1806
1807 if (leaf == BX_LEVEL_PTE) break;
1808
1809 if ((curr_entry & 0x80) != 0 && host_state->cr4.get_PSE()) {
1810 // 4M paging, only if CR4.PSE enabled, ignore PDE.PS otherwise
1811 if (curr_entry & PAGING_PDE4M_RESERVED_BITS) {
1812 BX_DEBUG(("Nested PSE Walk PDE4M: reserved bit is set: PDE=0x%08x", entry[BX_LEVEL_PDE]));
1813 nested_page_fault(ERROR_RESERVED | ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1814 }
1815
1816 // make up the physical frame number
1817 ppf = (curr_entry & 0xffc00000) | (guest_paddr & 0x003ff000);
1818 #if BX_PHY_ADDRESS_WIDTH > 32
1819 ppf |= ((bx_phy_address)(curr_entry & 0x003fe000)) << 19;
1820 #endif
1821 break;
1822 }
1823 }
1824
1825 bool isWrite = (rw & 1); // write or r-m-w
1826
1827 unsigned priv_index = (1<<3) /* user */ | (combined_access | isWrite);
1828
1829 if (!priv_check[priv_index])
1830 nested_page_fault(ERROR_PROTECTION, guest_paddr, rw, is_page_walk);
1831
1832 update_access_dirty(entry_addr, entry, entry_memtype, leaf, isWrite);
1833
1834 Bit32u page_offset = PAGE_OFFSET(guest_paddr);
1835 return ppf | page_offset;
1836 }
1837
nested_walk(bx_phy_address guest_paddr,unsigned rw,bool is_page_walk)1838 bx_phy_address BX_CPU_C::nested_walk(bx_phy_address guest_paddr, unsigned rw, bool is_page_walk)
1839 {
1840 SVM_HOST_STATE *host_state = &BX_CPU_THIS_PTR vmcb.host_state;
1841
1842 BX_DEBUG(("Nested walk for guest paddr 0x" FMT_PHY_ADDRX, guest_paddr));
1843
1844 if (host_state->efer.get_LMA())
1845 return nested_walk_long_mode(guest_paddr, rw, is_page_walk);
1846 else if (host_state->cr4.get_PAE())
1847 return nested_walk_PAE(guest_paddr, rw, is_page_walk);
1848 else
1849 return nested_walk_legacy(guest_paddr, rw, is_page_walk);
1850 }
1851
1852 #endif
1853
1854 #if BX_SUPPORT_VMX >= 2
1855
1856 /* EPT access type */
1857 enum {
1858 BX_EPT_READ = 0x01,
1859 BX_EPT_WRITE = 0x02,
1860 BX_EPT_EXECUTE = 0x04
1861 };
1862
1863 /* EPT access mask */
1864 enum {
1865 BX_EPT_ENTRY_NOT_PRESENT = 0x00,
1866 BX_EPT_ENTRY_READ_ONLY = 0x01,
1867 BX_EPT_ENTRY_WRITE_ONLY = 0x02,
1868 BX_EPT_ENTRY_READ_WRITE = 0x03,
1869 BX_EPT_ENTRY_EXECUTE_ONLY = 0x04,
1870 BX_EPT_ENTRY_READ_EXECUTE = 0x05,
1871 BX_EPT_ENTRY_WRITE_EXECUTE = 0x06,
1872 BX_EPT_ENTRY_READ_WRITE_EXECUTE = 0x07
1873 };
1874
1875 #define BX_VMX_EPT_ACCESS_DIRTY_ENABLED (BX_CPU_THIS_PTR vmcs.eptptr & 0x40)
1876 #define BX_VMX_EPT_SUPERVISOR_SHADOW_STACK_CTRL_ENABLED (BX_CPU_THIS_PTR vmcs.eptptr & 0x80)
1877
1878 // Format of a EPT Entry
1879 // -----------------------------------------------------------
1880 // 00 | Read access
1881 // 01 | Write access
1882 // 02 | Execute Access
1883 // 05-03 | EPT Memory type (for leaf entries, reserved otherwise)
1884 // 06 | Ignore PAT memory type (for leaf entries, reserved otherwise)
1885 // 07 | Page Size, must be 1 to indicate a Large Page
1886 // 08 | Accessed bit (if supported, ignored otherwise)
1887 // 09 | Dirty bit (for leaf entries, if supported, ignored otherwise)
1888 // 11-10 | (ignored)
1889 // PA-12 | Physical address
1890 // 51-PA | Reserved (must be zero)
1891 // 61-52 | (ignored)
1892 // 60 | Supervisor Shadow Stack Page (CET)
1893 // 61 | Super Page Protected (SPP)
1894 // 63 | Suppress #VE
1895 // -----------------------------------------------------------
1896
1897 const Bit64u BX_SUPPRESS_EPT_VIOLATION_EXCEPTION = (BX_CONST64(1) << 63);
1898 const Bit64u BX_SUB_PAGE_PROTECTED = (BX_CONST64(1) << 61);
1899 const Bit64u BX_SUPERVISOR_SHADOW_STACK_PAGE = (BX_CONST64(1) << 60);
1900
1901 const Bit64u PAGING_EPT_RESERVED_BITS = BX_PAGING_PHY_ADDRESS_RESERVED_BITS;
1902
translate_guest_physical(bx_phy_address guest_paddr,bx_address guest_laddr,bool guest_laddr_valid,bool is_page_walk,unsigned rw,bool supervisor_shadow_stack)1903 bx_phy_address BX_CPU_C::translate_guest_physical(bx_phy_address guest_paddr, bx_address guest_laddr, bool guest_laddr_valid, bool is_page_walk, unsigned rw, bool supervisor_shadow_stack)
1904 {
1905 VMCS_CACHE *vm = &BX_CPU_THIS_PTR vmcs;
1906 bx_phy_address entry_addr[4], ppf = LPFOf(vm->eptptr);
1907 Bit64u entry[4];
1908 int leaf;
1909
1910 #if BX_SUPPORT_MEMTYPE
1911 // The MTRRs have no effect on the memory type used for an access to an EPT paging structures.
1912 BxMemtype eptptr_memtype = BX_CPU_THIS_PTR cr0.get_CD() ? (BX_MEMTYPE_UC) : BxMemtype(vm->eptptr & 0x7);
1913 #endif
1914
1915 Bit32u combined_access = 0x7, access_mask = 0;
1916 Bit64u offset_mask = BX_CONST64(0x0000ffffffffffff);
1917
1918 BX_DEBUG(("EPT walk for guest paddr 0x" FMT_PHY_ADDRX, guest_paddr));
1919
1920 // when EPT A/D enabled treat guest page table accesses as writes
1921 if (BX_VMX_EPT_ACCESS_DIRTY_ENABLED && is_page_walk && guest_laddr_valid)
1922 rw = BX_WRITE;
1923
1924 if (rw == BX_EXECUTE) access_mask |= BX_EPT_EXECUTE;
1925 if (rw & 1) access_mask |= BX_EPT_WRITE; // write or r-m-w
1926 if ((rw & 3) == BX_READ) access_mask |= BX_EPT_READ; // handle correctly shadow stack reads
1927
1928 Bit32u vmexit_reason = 0;
1929
1930 for (leaf = BX_LEVEL_PML4;; --leaf) {
1931 entry_addr[leaf] = ppf + ((guest_paddr >> (9 + 9*leaf)) & 0xff8);
1932 access_read_physical(entry_addr[leaf], 8, &entry[leaf]);
1933 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, MEMTYPE(eptptr_memtype), BX_READ, (BX_EPT_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
1934
1935 offset_mask >>= 9;
1936 Bit64u curr_entry = entry[leaf];
1937 Bit32u curr_access_mask = curr_entry & 0x7;
1938
1939 if (curr_access_mask == BX_EPT_ENTRY_NOT_PRESENT) {
1940 BX_DEBUG(("EPT %s: not present", bx_paging_level[leaf]));
1941 vmexit_reason = VMX_VMEXIT_EPT_VIOLATION;
1942 break;
1943 }
1944
1945 if (curr_access_mask == BX_EPT_ENTRY_WRITE_ONLY || curr_access_mask == BX_EPT_ENTRY_WRITE_EXECUTE) {
1946 BX_DEBUG(("EPT %s: EPT misconfiguration mask=%d", bx_paging_level[leaf], curr_access_mask));
1947 vmexit_reason = VMX_VMEXIT_EPT_MISCONFIGURATION;
1948 break;
1949 }
1950
1951 extern bool isMemTypeValidMTRR(unsigned memtype);
1952 if (! isMemTypeValidMTRR((curr_entry >> 3) & 7)) {
1953 BX_DEBUG(("EPT %s: EPT misconfiguration memtype=%d",
1954 bx_paging_level[leaf], (unsigned)((curr_entry >> 3) & 7)));
1955 vmexit_reason = VMX_VMEXIT_EPT_MISCONFIGURATION;
1956 break;
1957 }
1958
1959 if (curr_entry & PAGING_EPT_RESERVED_BITS) {
1960 BX_DEBUG(("EPT %s: reserved bit is set 0x" FMT_ADDRX64, bx_paging_level[leaf], curr_entry));
1961 vmexit_reason = VMX_VMEXIT_EPT_MISCONFIGURATION;
1962 break;
1963 }
1964
1965 ppf = curr_entry & BX_CONST64(0x000ffffffffff000);
1966
1967 if (leaf == BX_LEVEL_PTE) break;
1968
1969 if (curr_entry & 0x80) {
1970 if (leaf > (BX_LEVEL_PDE + !!is_cpu_extension_supported(BX_ISA_1G_PAGES))) {
1971 BX_DEBUG(("EPT %s: PS bit set !", bx_paging_level[leaf]));
1972 vmexit_reason = VMX_VMEXIT_EPT_MISCONFIGURATION;
1973 break;
1974 }
1975
1976 ppf &= BX_CONST64(0x000fffffffffe000);
1977 if (ppf & offset_mask) {
1978 BX_DEBUG(("EPT %s: reserved bit is set: 0x" FMT_ADDRX64, bx_paging_level[leaf], curr_entry));
1979 vmexit_reason = VMX_VMEXIT_EPT_MISCONFIGURATION;
1980 break;
1981 }
1982
1983 // Make up the physical page frame address
1984 ppf += (bx_phy_address)(guest_paddr & offset_mask);
1985 break;
1986 }
1987
1988 // EPT non leaf entry, check for reserved bits
1989 if ((curr_entry >> 3) & 0xf) {
1990 BX_DEBUG(("EPT %s: EPT misconfiguration, reserved bits set for non-leaf entry", bx_paging_level[leaf]));
1991 vmexit_reason = VMX_VMEXIT_EPT_MISCONFIGURATION;
1992 break;
1993 }
1994
1995 combined_access &= curr_access_mask;
1996 }
1997
1998 // defer final combined_access calculation (with leaf entry) until CET is handled
1999
2000 if (!vmexit_reason) {
2001 #if BX_SUPPORT_CET
2002 if (BX_VMX_EPT_SUPERVISOR_SHADOW_STACK_CTRL_ENABLED && supervisor_shadow_stack) {
2003 // The EPT.R bit is set in all EPT paging-structure entry controlling the translation
2004 // The EPT.W bit is set in all EPT paging-structure entry controlling the translation except the leaf entry (allowed for shadow stack write access)
2005 // The SSS bit (bit 60) is 1 in the EPT paging-structure entry maps the page
2006 bool supervisor_shadow_stack_page = ((combined_access & BX_EPT_ENTRY_READ_WRITE) == BX_EPT_ENTRY_READ_WRITE) &&
2007 ((entry[leaf] & BX_EPT_READ) != 0) &&
2008 (((entry[leaf] & BX_EPT_WRITE) == 0) || !(access_mask & BX_EPT_WRITE)) &&
2009 ((entry[leaf] & BX_SUPERVISOR_SHADOW_STACK_PAGE) != 0);
2010 if (!supervisor_shadow_stack_page) {
2011 BX_ERROR(("VMEXIT: supervisor shadow stack access to non supervisor shadow stack page"));
2012 vmexit_reason = VMX_VMEXIT_EPT_VIOLATION;
2013 }
2014 }
2015 else
2016 #endif
2017 {
2018 combined_access &= entry[leaf];
2019
2020 if ((access_mask & combined_access) != access_mask) {
2021 vmexit_reason = VMX_VMEXIT_EPT_VIOLATION;
2022 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_SUBPAGE_WR_PROTECT_CTRL) && (entry[leaf] & BX_SUB_PAGE_PROTECTED) != 0 && leaf == BX_LEVEL_PTE) {
2023 if ((access_mask & BX_EPT_WRITE) != 0 && (combined_access & BX_EPT_WRITE) == 0 && guest_laddr_valid && ! is_page_walk)
2024 if (spp_walk(guest_paddr, guest_laddr, MEMTYPE(eptptr_memtype)))
2025 vmexit_reason = 0;
2026 }
2027 }
2028 }
2029 }
2030
2031 if (vmexit_reason) {
2032 BX_ERROR(("VMEXIT: EPT %s for guest paddr 0x" FMT_PHY_ADDRX " laddr 0x" FMT_ADDRX,
2033 (vmexit_reason == VMX_VMEXIT_EPT_VIOLATION) ? "violation" : "misconfig", guest_paddr, guest_laddr));
2034
2035 Bit32u vmexit_qualification = 0;
2036
2037 // no VMExit qualification for EPT Misconfiguration VMExit
2038 if (vmexit_reason == VMX_VMEXIT_EPT_VIOLATION) {
2039 combined_access &= entry[leaf];
2040 vmexit_qualification = access_mask | (combined_access << 3);
2041 if (guest_laddr_valid) {
2042 vmexit_qualification |= (1<<7);
2043 if (! is_page_walk) vmexit_qualification |= (1<<8);
2044 }
2045 if (BX_CPU_THIS_PTR nmi_unblocking_iret)
2046 vmexit_qualification |= (1 << 12);
2047 #if BX_SUPPORT_CET
2048 if (rw & 4) // shadow stack access
2049 vmexit_qualification |= (1 << 13);
2050
2051 if (BX_VMX_EPT_SUPERVISOR_SHADOW_STACK_CTRL_ENABLED && (entry[leaf] & BX_SUPERVISOR_SHADOW_STACK_PAGE) != 0)
2052 vmexit_qualification |= (1 << 14);
2053 #endif
2054 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_VIOLATION_EXCEPTION)) {
2055 if ((entry[leaf] & BX_SUPPRESS_EPT_VIOLATION_EXCEPTION) == 0)
2056 Virtualization_Exception(vmexit_qualification, guest_paddr, guest_laddr);
2057 }
2058 }
2059
2060 VMwrite64(VMCS_64BIT_GUEST_PHYSICAL_ADDR, guest_paddr);
2061 VMwrite_natural(VMCS_GUEST_LINEAR_ADDR, guest_laddr);
2062 VMexit(vmexit_reason, vmexit_qualification);
2063 }
2064
2065 if (BX_VMX_EPT_ACCESS_DIRTY_ENABLED) {
2066 // write access and Dirty-bit is not set in the leaf entry
2067 unsigned dirty_update = (rw & 1) && !(entry[leaf] & 0x200);
2068 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_PML_ENABLE))
2069 vmx_page_modification_logging(guest_paddr, dirty_update);
2070
2071 update_ept_access_dirty(entry_addr, entry, MEMTYPE(eptptr_memtype), leaf, rw & 1);
2072 }
2073
2074 Bit32u page_offset = PAGE_OFFSET(guest_paddr);
2075 return ppf | page_offset;
2076 }
2077
2078 // Access bit 8, Dirty bit 9
update_ept_access_dirty(bx_phy_address * entry_addr,Bit64u * entry,BxMemtype eptptr_memtype,unsigned leaf,unsigned write)2079 void BX_CPU_C::update_ept_access_dirty(bx_phy_address *entry_addr, Bit64u *entry, BxMemtype eptptr_memtype, unsigned leaf, unsigned write)
2080 {
2081 // Update A bit if needed
2082 for (unsigned level=BX_LEVEL_PML4; level > leaf; level--) {
2083 if (!(entry[level] & 0x100)) {
2084 entry[level] |= 0x100;
2085 access_write_physical(entry_addr[level], 8, &entry[level]);
2086 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[level], 8, MEMTYPE(eptptr_memtype), BX_WRITE, (BX_EPT_PTE_ACCESS + level), (Bit8u*)(&entry[level]));
2087 }
2088 }
2089
2090 // Update A/D bits if needed
2091 if (!(entry[leaf] & 0x100) || (write && !(entry[leaf] & 0x200))) {
2092 entry[leaf] |= (0x100 | (write<<9)); // Update A and possibly D bits
2093 access_write_physical(entry_addr[leaf], 8, &entry[leaf]);
2094 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, MEMTYPE(eptptr_memtype), BX_WRITE, (BX_EPT_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
2095 }
2096 }
2097
2098 const Bit64u PAGING_SPP_RESERVED_BITS = BX_PAGING_PHY_ADDRESS_RESERVED_BITS | BX_CONST64(0xFFF0000000000FFE);
2099
2100 const Bit32u VMX_SPP_NOT_PRESENT_QUALIFICATION = (1<<11);
2101
spp_walk(bx_phy_address guest_paddr,bx_address guest_laddr,BxMemtype memtype)2102 bool BX_CPU_C::spp_walk(bx_phy_address guest_paddr, bx_address guest_laddr, BxMemtype memtype)
2103 {
2104 VMCS_CACHE *vm = &BX_CPU_THIS_PTR vmcs;
2105 bx_phy_address entry_addr[4], ppf = LPFOf(vm->spptp);
2106 Bit64u entry[4];
2107 int leaf;
2108
2109 BX_DEBUG(("SPP walk for guest paddr 0x" FMT_PHY_ADDRX, guest_paddr));
2110
2111 Bit32u vmexit_reason = 0;
2112 Bit32u vmexit_qualification = 0;
2113
2114 for (leaf = BX_LEVEL_PML4;; --leaf) {
2115 entry_addr[leaf] = ppf + ((guest_paddr >> (9 + 9*leaf)) & 0xff8);
2116 access_read_physical(entry_addr[leaf], 8, &entry[leaf]);
2117 BX_NOTIFY_PHY_MEMORY_ACCESS(entry_addr[leaf], 8, MEMTYPE(memtype), BX_READ, (BX_EPT_SPP_PTE_ACCESS + leaf), (Bit8u*)(&entry[leaf]));
2118
2119 if (leaf == BX_LEVEL_PTE) break;
2120
2121 Bit64u curr_entry = entry[leaf];
2122
2123 if (!(curr_entry & 1)) {
2124 BX_DEBUG(("SPP %s: not present", bx_paging_level[leaf]));
2125 vmexit_reason = VMX_VMEXIT_SPP;
2126 vmexit_qualification = VMX_SPP_NOT_PRESENT_QUALIFICATION;
2127 break;
2128 }
2129
2130 if (curr_entry & PAGING_SPP_RESERVED_BITS) {
2131 BX_DEBUG(("SPP %s: reserved bit is set 0x" FMT_ADDRX64, bx_paging_level[leaf], curr_entry));
2132 vmexit_reason = VMX_VMEXIT_SPP;
2133 break;
2134 }
2135
2136 ppf = curr_entry & BX_CONST64(0x000ffffffffff000);
2137 }
2138
2139 if (vmexit_reason) {
2140 BX_ERROR(("VMEXIT: SPP %s for guest paddr 0x" FMT_PHY_ADDRX " laddr 0x" FMT_ADDRX,
2141 (vmexit_qualification == VMX_SPP_NOT_PRESENT_QUALIFICATION) ? "violation" : "misconfig", guest_paddr, guest_laddr));
2142
2143 if (BX_CPU_THIS_PTR nmi_unblocking_iret)
2144 vmexit_qualification |= (1 << 12);
2145
2146 VMwrite64(VMCS_64BIT_GUEST_PHYSICAL_ADDR, guest_paddr);
2147 VMwrite_natural(VMCS_GUEST_LINEAR_ADDR, guest_laddr);
2148 VMexit(vmexit_reason, vmexit_qualification);
2149 }
2150
2151 Bit32u spp_bit = 2 * ((guest_paddr & 0xFFF) >> 7);
2152 return (entry[BX_LEVEL_PTE] >> spp_bit) & 1;
2153 }
2154
2155 #endif
2156
2157 #if BX_DEBUGGER
2158
dbg_print_paging_pte(int level,Bit64u entry)2159 void dbg_print_paging_pte(int level, Bit64u entry)
2160 {
2161 dbg_printf("%4s: 0x%08x%08x", bx_paging_level[level], GET32H(entry), GET32L(entry));
2162
2163 if (entry & BX_CONST64(0x8000000000000000))
2164 dbg_printf(" XD");
2165 else
2166 dbg_printf(" ");
2167
2168 if (level == BX_LEVEL_PTE) {
2169 dbg_printf(" %s %s %s",
2170 (entry & 0x0100) ? "G" : "g",
2171 (entry & 0x0080) ? "PAT" : "pat",
2172 (entry & 0x0040) ? "D" : "d");
2173 }
2174 else {
2175 if (entry & 0x80) {
2176 dbg_printf(" PS %s %s %s",
2177 (entry & 0x0100) ? "G" : "g",
2178 (entry & 0x1000) ? "PAT" : "pat",
2179 (entry & 0x0040) ? "D" : "d");
2180 }
2181 else {
2182 dbg_printf(" ps ");
2183 }
2184 }
2185
2186 dbg_printf(" %s %s %s %s %s %s\n",
2187 (entry & 0x20) ? "A" : "a",
2188 (entry & 0x10) ? "PCD" : "pcd",
2189 (entry & 0x08) ? "PWT" : "pwt",
2190 (entry & 0x04) ? "U" : "S",
2191 (entry & 0x02) ? "W" : "R",
2192 (entry & 0x01) ? "P" : "p");
2193 }
2194
2195 #if BX_SUPPORT_VMX >= 2
dbg_print_ept_paging_pte(int level,Bit64u entry)2196 void dbg_print_ept_paging_pte(int level, Bit64u entry)
2197 {
2198 dbg_printf("EPT %4s: 0x%08x%08x", bx_paging_level[level], GET32H(entry), GET32L(entry));
2199
2200 if (level != BX_LEVEL_PTE && (entry & 0x80))
2201 dbg_printf(" PS");
2202 else
2203 dbg_printf(" ");
2204
2205 dbg_printf(" %s %s %s",
2206 (entry & 0x04) ? "E" : "e",
2207 (entry & 0x02) ? "W" : "w",
2208 (entry & 0x01) ? "R" : "r");
2209
2210 if (level == BX_LEVEL_PTE || (entry & 0x80)) {
2211 dbg_printf(" %s %s\n",
2212 (entry & 0x40) ? "IGNORE_PAT" : "ignore_pat",
2213 get_memtype_name(BxMemtype((entry >> 3) & 0x7)));
2214 }
2215 else {
2216 dbg_printf("\n");
2217 }
2218 }
2219 #endif
2220
2221 #endif // BX_DEBUGGER
2222
2223 #if BX_SUPPORT_VMX >= 2
dbg_translate_guest_physical(bx_phy_address guest_paddr,bx_phy_address * phy,bool verbose)2224 bool BX_CPU_C::dbg_translate_guest_physical(bx_phy_address guest_paddr, bx_phy_address *phy, bool verbose)
2225 {
2226 VMCS_CACHE *vm = &BX_CPU_THIS_PTR vmcs;
2227 bx_phy_address pt_address = LPFOf(vm->eptptr);
2228 Bit64u offset_mask = BX_CONST64(0x0000ffffffffffff);
2229
2230 for (int level = 3; level >= 0; --level) {
2231 Bit64u pte;
2232 pt_address += ((guest_paddr >> (9 + 9*level)) & 0xff8);
2233 offset_mask >>= 9;
2234 BX_MEM(0)->readPhysicalPage(BX_CPU_THIS, pt_address, 8, &pte);
2235 #if BX_DEBUGGER
2236 if (verbose)
2237 dbg_print_ept_paging_pte(level, pte);
2238 #endif
2239 switch(pte & 7) {
2240 case BX_EPT_ENTRY_NOT_PRESENT:
2241 case BX_EPT_ENTRY_WRITE_ONLY:
2242 case BX_EPT_ENTRY_WRITE_EXECUTE:
2243 return 0;
2244 }
2245 if (pte & BX_PAGING_PHY_ADDRESS_RESERVED_BITS)
2246 return 0;
2247
2248 pt_address = bx_phy_address(pte & BX_CONST64(0x000ffffffffff000));
2249
2250 if (level == BX_LEVEL_PTE) break;
2251
2252 if (pte & 0x80) {
2253 if (level > (BX_LEVEL_PDE + !!is_cpu_extension_supported(BX_ISA_1G_PAGES)))
2254 return 0;
2255
2256 pt_address &= BX_CONST64(0x000fffffffffe000);
2257 if (pt_address & offset_mask) return 0;
2258 break;
2259 }
2260 }
2261
2262 *phy = pt_address + (bx_phy_address)(guest_paddr & offset_mask);
2263 return 1;
2264 }
2265 #endif
2266
dbg_xlate_linear2phy(bx_address laddr,bx_phy_address * phy,bx_address * lpf_mask,bool verbose)2267 bool BX_CPU_C::dbg_xlate_linear2phy(bx_address laddr, bx_phy_address *phy, bx_address *lpf_mask, bool verbose)
2268 {
2269 bx_phy_address paddress;
2270 bx_address offset_mask = 0xfff;
2271
2272 #if BX_SUPPORT_X86_64
2273 if (! long_mode()) laddr &= 0xffffffff;
2274 #endif
2275
2276 if (! BX_CPU_THIS_PTR cr0.get_PG()) {
2277 paddress = (bx_phy_address) laddr;
2278 }
2279 else {
2280 bx_phy_address pt_address = BX_CPU_THIS_PTR cr3 & BX_CR3_PAGING_MASK;
2281
2282 #if BX_CPU_LEVEL >= 6
2283 if (BX_CPU_THIS_PTR cr4.get_PAE()) {
2284 offset_mask = BX_CONST64(0x0000ffffffffffff);
2285
2286 int level = 3;
2287 if (! long_mode()) {
2288 pt_address = BX_CPU_THIS_PTR PDPTR_CACHE.entry[(laddr >> 30) & 3];
2289 if (! (pt_address & 0x1)) {
2290 offset_mask = 0x3fffffff;
2291 goto page_fault;
2292 }
2293 offset_mask >>= 18;
2294 pt_address &= BX_CONST64(0x000ffffffffff000);
2295 level = 1;
2296 }
2297
2298 for (; level >= 0; --level) {
2299 Bit64u pte;
2300 pt_address += ((laddr >> (9 + 9*level)) & 0xff8);
2301 offset_mask >>= 9;
2302 #if BX_SUPPORT_VMX >= 2
2303 if (BX_CPU_THIS_PTR in_vmx_guest) {
2304 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE)) {
2305 if (! dbg_translate_guest_physical(pt_address, &pt_address, verbose))
2306 goto page_fault;
2307 }
2308 }
2309 #endif
2310 BX_MEM(0)->readPhysicalPage(BX_CPU_THIS, pt_address, 8, &pte);
2311 #if BX_DEBUGGER
2312 if (verbose)
2313 dbg_print_paging_pte(level, pte);
2314 #endif
2315 if(!(pte & 1))
2316 goto page_fault;
2317 if (pte & BX_PAGING_PHY_ADDRESS_RESERVED_BITS)
2318 goto page_fault;
2319 pt_address = bx_phy_address(pte & BX_CONST64(0x000ffffffffff000));
2320 if (level == BX_LEVEL_PTE) break;
2321 if (pte & 0x80) {
2322 // large page
2323 pt_address &= BX_CONST64(0x000fffffffffe000);
2324 if (pt_address & offset_mask)
2325 goto page_fault;
2326 if (is_cpu_extension_supported(BX_ISA_1G_PAGES) && level == BX_LEVEL_PDPTE) break;
2327 if (level == BX_LEVEL_PDE) break;
2328 goto page_fault;
2329 }
2330 }
2331 paddress = pt_address + (bx_phy_address)(laddr & offset_mask);
2332 }
2333 else // not PAE
2334 #endif
2335 {
2336 offset_mask = 0xfff;
2337 for (int level = 1; level >= 0; --level) {
2338 Bit32u pte;
2339 pt_address += ((laddr >> (10 + 10*level)) & 0xffc);
2340 #if BX_SUPPORT_VMX >= 2
2341 if (BX_CPU_THIS_PTR in_vmx_guest) {
2342 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE)) {
2343 if (! dbg_translate_guest_physical(pt_address, &pt_address, verbose))
2344 goto page_fault;
2345 }
2346 }
2347 #endif
2348 BX_MEM(0)->readPhysicalPage(BX_CPU_THIS, pt_address, 4, &pte);
2349 #if BX_DEBUGGER
2350 if (verbose)
2351 dbg_print_paging_pte(level, pte);
2352 #endif
2353 if (!(pte & 1))
2354 goto page_fault;
2355 pt_address = pte & 0xfffff000;
2356 #if BX_CPU_LEVEL >= 6
2357 if (level == BX_LEVEL_PDE && (pte & 0x80) != 0 && BX_CPU_THIS_PTR cr4.get_PSE()) {
2358 offset_mask = 0x3fffff;
2359 pt_address = pte & 0xffc00000;
2360 #if BX_PHY_ADDRESS_WIDTH > 32
2361 pt_address += ((bx_phy_address)(pte & 0x003fe000)) << 19;
2362 #endif
2363 break;
2364 }
2365 #endif
2366 }
2367 paddress = pt_address + (bx_phy_address)(laddr & offset_mask);
2368 }
2369 }
2370 #if BX_SUPPORT_VMX >= 2
2371 if (BX_CPU_THIS_PTR in_vmx_guest) {
2372 if (SECONDARY_VMEXEC_CONTROL(VMX_VM_EXEC_CTRL3_EPT_ENABLE)) {
2373 if (! dbg_translate_guest_physical(paddress, &paddress, verbose))
2374 goto page_fault;
2375 }
2376 }
2377 #endif
2378
2379 if (lpf_mask)
2380 *lpf_mask = offset_mask;
2381 *phy = A20ADDR(paddress);
2382 return 1;
2383
2384 page_fault:
2385 if (lpf_mask)
2386 *lpf_mask = offset_mask;
2387 *phy = 0;
2388 return 0;
2389 }
2390
access_write_linear(bx_address laddr,unsigned len,unsigned curr_pl,unsigned xlate_rw,Bit32u ac_mask,void * data)2391 int BX_CPU_C::access_write_linear(bx_address laddr, unsigned len, unsigned curr_pl, unsigned xlate_rw, Bit32u ac_mask, void *data)
2392 {
2393 #if BX_SUPPORT_CET
2394 BX_ASSERT(xlate_rw == BX_WRITE || xlate_rw == BX_SHADOW_STACK_WRITE);
2395 #else
2396 BX_ASSERT(xlate_rw == BX_WRITE);
2397 #endif
2398
2399 Bit32u pageOffset = PAGE_OFFSET(laddr);
2400
2401 bool user = (curr_pl == 3);
2402
2403 bx_TLB_entry *tlbEntry = BX_DTLB_ENTRY_OF(laddr, 0);
2404
2405 #if BX_SUPPORT_X86_64
2406 if (! IsCanonical(laddr)) {
2407 BX_ERROR(("access_write_linear(): canonical failure"));
2408 return -1;
2409 }
2410 #endif
2411
2412 #if BX_CPU_LEVEL >= 4 && BX_SUPPORT_ALIGNMENT_CHECK
2413 if (BX_CPU_THIS_PTR alignment_check() && user) {
2414 if (pageOffset & ac_mask) {
2415 BX_ERROR(("access_write_linear(): #AC misaligned access"));
2416 exception(BX_AC_EXCEPTION, 0);
2417 }
2418 }
2419 #endif
2420
2421 /* check for reference across multiple pages */
2422 if ((pageOffset + len) <= 4096) {
2423 // Access within single page.
2424 BX_CPU_THIS_PTR address_xlation.paddress1 = translate_linear(tlbEntry, laddr, user, xlate_rw);
2425 BX_CPU_THIS_PTR address_xlation.pages = 1;
2426 #if BX_SUPPORT_MEMTYPE
2427 BX_CPU_THIS_PTR address_xlation.memtype1 = tlbEntry->get_memtype();
2428 #endif
2429
2430 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr, BX_CPU_THIS_PTR address_xlation.paddress1,
2431 len, tlbEntry->get_memtype(), xlate_rw, (Bit8u*) data);
2432
2433 access_write_physical(BX_CPU_THIS_PTR address_xlation.paddress1, len, data);
2434
2435 #if BX_X86_DEBUGGER
2436 hwbreakpoint_match(laddr, len, xlate_rw);
2437 #endif
2438 }
2439 else {
2440 // access across 2 pages
2441 BX_CPU_THIS_PTR address_xlation.len1 = 4096 - pageOffset;
2442 BX_CPU_THIS_PTR address_xlation.len2 = len - BX_CPU_THIS_PTR address_xlation.len1;
2443 BX_CPU_THIS_PTR address_xlation.pages = 2;
2444 bx_address laddr2 = laddr + BX_CPU_THIS_PTR address_xlation.len1;
2445 #if BX_SUPPORT_X86_64
2446 if (! long64_mode()) laddr2 &= 0xffffffff; /* handle linear address wrap in legacy mode */
2447 else {
2448 if (! IsCanonical(laddr2)) {
2449 BX_ERROR(("access_write_linear(): canonical failure for second half of page split access"));
2450 return -1;
2451 }
2452 }
2453 #endif
2454
2455 bx_TLB_entry *tlbEntry2 = BX_DTLB_ENTRY_OF(laddr2, 0);
2456
2457 BX_CPU_THIS_PTR address_xlation.paddress1 = translate_linear(tlbEntry, laddr, user, xlate_rw);
2458 BX_CPU_THIS_PTR address_xlation.paddress2 = translate_linear(tlbEntry2, laddr2, user, xlate_rw);
2459 #if BX_SUPPORT_MEMTYPE
2460 BX_CPU_THIS_PTR address_xlation.memtype1 = tlbEntry->get_memtype();
2461 BX_CPU_THIS_PTR address_xlation.memtype2 = tlbEntry2->get_memtype();
2462 #endif
2463
2464 #ifdef BX_LITTLE_ENDIAN
2465 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr, BX_CPU_THIS_PTR address_xlation.paddress1,
2466 BX_CPU_THIS_PTR address_xlation.len1, tlbEntry->get_memtype(),
2467 xlate_rw, (Bit8u*) data);
2468 access_write_physical(BX_CPU_THIS_PTR address_xlation.paddress1,
2469 BX_CPU_THIS_PTR address_xlation.len1, data);
2470 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr2, BX_CPU_THIS_PTR address_xlation.paddress2,
2471 BX_CPU_THIS_PTR address_xlation.len2, tlbEntry2->get_memtype(),
2472 xlate_rw, ((Bit8u*)data) + BX_CPU_THIS_PTR address_xlation.len1);
2473 access_write_physical(BX_CPU_THIS_PTR address_xlation.paddress2,
2474 BX_CPU_THIS_PTR address_xlation.len2,
2475 ((Bit8u*)data) + BX_CPU_THIS_PTR address_xlation.len1);
2476 #else // BX_BIG_ENDIAN
2477 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr, BX_CPU_THIS_PTR address_xlation.paddress1,
2478 BX_CPU_THIS_PTR address_xlation.len1, tlbEntry->get_memtype(),
2479 xlate_rw, ((Bit8u*)data) + (len - BX_CPU_THIS_PTR address_xlation.len1));
2480 access_write_physical(BX_CPU_THIS_PTR address_xlation.paddress1,
2481 BX_CPU_THIS_PTR address_xlation.len1,
2482 ((Bit8u*)data) + (len - BX_CPU_THIS_PTR address_xlation.len1));
2483 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr2, BX_CPU_THIS_PTR address_xlation.paddress2,
2484 BX_CPU_THIS_PTR address_xlation.len2, tlbEntry2->get_memtype(),
2485 xlate_rw, (Bit8u*) data);
2486 access_write_physical(BX_CPU_THIS_PTR address_xlation.paddress2,
2487 BX_CPU_THIS_PTR address_xlation.len2, data);
2488 #endif
2489
2490 #if BX_X86_DEBUGGER
2491 hwbreakpoint_match(laddr, BX_CPU_THIS_PTR address_xlation.len1, xlate_rw);
2492 hwbreakpoint_match(laddr2, BX_CPU_THIS_PTR address_xlation.len2, xlate_rw);
2493 #endif
2494 }
2495
2496 return 0;
2497 }
2498
access_read_linear(bx_address laddr,unsigned len,unsigned curr_pl,unsigned xlate_rw,Bit32u ac_mask,void * data)2499 int BX_CPU_C::access_read_linear(bx_address laddr, unsigned len, unsigned curr_pl, unsigned xlate_rw, Bit32u ac_mask, void *data)
2500 {
2501 #if BX_SUPPORT_CET
2502 BX_ASSERT(xlate_rw == BX_READ || xlate_rw == BX_RW || xlate_rw == BX_SHADOW_STACK_READ || xlate_rw == BX_SHADOW_STACK_RW);
2503 #else
2504 BX_ASSERT(xlate_rw == BX_READ || xlate_rw == BX_RW);
2505 #endif
2506
2507 Bit32u pageOffset = PAGE_OFFSET(laddr);
2508
2509 bool user = (curr_pl == 3);
2510
2511 #if BX_SUPPORT_X86_64
2512 if (! IsCanonical(laddr)) {
2513 BX_ERROR(("access_read_linear(): canonical failure"));
2514 return -1;
2515 }
2516 #endif
2517
2518 #if BX_CPU_LEVEL >= 4 && BX_SUPPORT_ALIGNMENT_CHECK
2519 if (BX_CPU_THIS_PTR alignment_check() && user) {
2520 if (pageOffset & ac_mask) {
2521 BX_ERROR(("access_read_linear(): #AC misaligned access"));
2522 exception(BX_AC_EXCEPTION, 0);
2523 }
2524 }
2525 #endif
2526
2527 bx_TLB_entry *tlbEntry = BX_DTLB_ENTRY_OF(laddr, 0);
2528
2529 /* check for reference across multiple pages */
2530 if ((pageOffset + len) <= 4096) {
2531 // Access within single page.
2532 BX_CPU_THIS_PTR address_xlation.paddress1 = translate_linear(tlbEntry, laddr, user, xlate_rw);
2533 BX_CPU_THIS_PTR address_xlation.pages = 1;
2534 #if BX_SUPPORT_MEMTYPE
2535 BX_CPU_THIS_PTR address_xlation.memtype1 = tlbEntry->get_memtype();
2536 #endif
2537 access_read_physical(BX_CPU_THIS_PTR address_xlation.paddress1, len, data);
2538 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr, BX_CPU_THIS_PTR address_xlation.paddress1, len, tlbEntry->get_memtype(), xlate_rw, (Bit8u*) data);
2539
2540 #if BX_X86_DEBUGGER
2541 hwbreakpoint_match(laddr, len, xlate_rw);
2542 #endif
2543 }
2544 else {
2545 // access across 2 pages
2546 BX_CPU_THIS_PTR address_xlation.len1 = 4096 - pageOffset;
2547 BX_CPU_THIS_PTR address_xlation.len2 = len - BX_CPU_THIS_PTR address_xlation.len1;
2548 BX_CPU_THIS_PTR address_xlation.pages = 2;
2549 bx_address laddr2 = laddr + BX_CPU_THIS_PTR address_xlation.len1;
2550 #if BX_SUPPORT_X86_64
2551 if (! long64_mode()) laddr2 &= 0xffffffff; /* handle linear address wrap in legacy mode */
2552 else {
2553 if (! IsCanonical(laddr2)) {
2554 BX_ERROR(("access_read_linear(): canonical failure for second half of page split access"));
2555 return -1;
2556 }
2557 }
2558 #endif
2559
2560 bx_TLB_entry *tlbEntry2 = BX_DTLB_ENTRY_OF(laddr2, 0);
2561
2562 BX_CPU_THIS_PTR address_xlation.paddress1 = translate_linear(tlbEntry, laddr, user, xlate_rw);
2563 BX_CPU_THIS_PTR address_xlation.paddress2 = translate_linear(tlbEntry2, laddr2, user, xlate_rw);
2564 #if BX_SUPPORT_MEMTYPE
2565 BX_CPU_THIS_PTR address_xlation.memtype1 = tlbEntry->get_memtype();
2566 BX_CPU_THIS_PTR address_xlation.memtype2 = tlbEntry2->get_memtype();
2567 #endif
2568
2569 #ifdef BX_LITTLE_ENDIAN
2570 access_read_physical(BX_CPU_THIS_PTR address_xlation.paddress1,
2571 BX_CPU_THIS_PTR address_xlation.len1, data);
2572 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr, BX_CPU_THIS_PTR address_xlation.paddress1,
2573 BX_CPU_THIS_PTR address_xlation.len1, tlbEntry->get_memtype(),
2574 xlate_rw, (Bit8u*) data);
2575 access_read_physical(BX_CPU_THIS_PTR address_xlation.paddress2,
2576 BX_CPU_THIS_PTR address_xlation.len2,
2577 ((Bit8u*)data) + BX_CPU_THIS_PTR address_xlation.len1);
2578 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr2, BX_CPU_THIS_PTR address_xlation.paddress2,
2579 BX_CPU_THIS_PTR address_xlation.len2, tlbEntry2->get_memtype(),
2580 xlate_rw, ((Bit8u*)data) + BX_CPU_THIS_PTR address_xlation.len1);
2581 #else // BX_BIG_ENDIAN
2582 access_read_physical(BX_CPU_THIS_PTR address_xlation.paddress1,
2583 BX_CPU_THIS_PTR address_xlation.len1,
2584 ((Bit8u*)data) + (len - BX_CPU_THIS_PTR address_xlation.len1));
2585 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr, BX_CPU_THIS_PTR address_xlation.paddress1,
2586 BX_CPU_THIS_PTR address_xlation.len1, tlbEntry->get_memtype(),
2587 xlate_rw, ((Bit8u*)data) + (len - BX_CPU_THIS_PTR address_xlation.len1));
2588 access_read_physical(BX_CPU_THIS_PTR address_xlation.paddress2,
2589 BX_CPU_THIS_PTR address_xlation.len2, data);
2590 BX_NOTIFY_LIN_MEMORY_ACCESS(laddr2, BX_CPU_THIS_PTR address_xlation.paddress2,
2591 BX_CPU_THIS_PTR address_xlation.len2, tlbEntry2->get_memtype(),
2592 xlate_rw, (Bit8u*) data);
2593 #endif
2594
2595 #if BX_X86_DEBUGGER
2596 hwbreakpoint_match(laddr, BX_CPU_THIS_PTR address_xlation.len1, xlate_rw);
2597 hwbreakpoint_match(laddr2, BX_CPU_THIS_PTR address_xlation.len2, xlate_rw);
2598 #endif
2599 }
2600
2601 return 0;
2602 }
2603
access_write_physical(bx_phy_address paddr,unsigned len,void * data)2604 void BX_CPU_C::access_write_physical(bx_phy_address paddr, unsigned len, void *data)
2605 {
2606 #if BX_SUPPORT_VMX && BX_SUPPORT_X86_64
2607 if (is_virtual_apic_page(paddr)) {
2608 VMX_Virtual_Apic_Write(paddr, len, data);
2609 return;
2610 }
2611 #endif
2612
2613 #if BX_SUPPORT_APIC
2614 if (BX_CPU_THIS_PTR lapic.is_selected(paddr)) {
2615 BX_CPU_THIS_PTR lapic.write(paddr, data, len);
2616 return;
2617 }
2618 #endif
2619
2620 BX_MEM(0)->writePhysicalPage(BX_CPU_THIS, paddr, len, data);
2621 }
2622
access_read_physical(bx_phy_address paddr,unsigned len,void * data)2623 void BX_CPU_C::access_read_physical(bx_phy_address paddr, unsigned len, void *data)
2624 {
2625 #if BX_SUPPORT_VMX && BX_SUPPORT_X86_64
2626 if (is_virtual_apic_page(paddr)) {
2627 paddr = VMX_Virtual_Apic_Read(paddr, len, data);
2628 }
2629 #endif
2630
2631 #if BX_SUPPORT_APIC
2632 if (BX_CPU_THIS_PTR lapic.is_selected(paddr)) {
2633 BX_CPU_THIS_PTR lapic.read(paddr, data, len);
2634 return;
2635 }
2636 #endif
2637
2638 BX_MEM(0)->readPhysicalPage(BX_CPU_THIS, paddr, len, data);
2639 }
2640
getHostMemAddr(bx_phy_address paddr,unsigned rw)2641 bx_hostpageaddr_t BX_CPU_C::getHostMemAddr(bx_phy_address paddr, unsigned rw)
2642 {
2643 #if BX_SUPPORT_VMX && BX_SUPPORT_X86_64
2644 if (is_virtual_apic_page(paddr))
2645 return 0; // Do not allow direct access to virtual apic page
2646 #endif
2647
2648 #if BX_SUPPORT_APIC
2649 if (BX_CPU_THIS_PTR lapic.is_selected(paddr))
2650 return 0; // Vetoed! APIC address space
2651 #endif
2652
2653 return (bx_hostpageaddr_t) BX_MEM(0)->getHostMemAddr(BX_CPU_THIS, paddr, rw);
2654 }
2655
2656 #if BX_LARGE_RAMFILE
check_addr_in_tlb_buffers(const Bit8u * addr,const Bit8u * end)2657 bool BX_CPU_C::check_addr_in_tlb_buffers(const Bit8u *addr, const Bit8u *end)
2658 {
2659 #if BX_SUPPORT_VMX
2660 if (BX_CPU_THIS_PTR vmcshostptr) {
2661 if ((BX_CPU_THIS_PTR vmcshostptr >= (const bx_hostpageaddr_t)addr) &&
2662 (BX_CPU_THIS_PTR vmcshostptr < (const bx_hostpageaddr_t)end)) return true;
2663 }
2664 #endif
2665
2666 #if BX_SUPPORT_SVM
2667 if (BX_CPU_THIS_PTR vmcbhostptr) {
2668 if ((BX_CPU_THIS_PTR vmcbhostptr >= (const bx_hostpageaddr_t)addr) &&
2669 (BX_CPU_THIS_PTR vmcbhostptr < (const bx_hostpageaddr_t)end)) return true;
2670 }
2671 #endif
2672
2673 for (unsigned tlb_entry_num=0; tlb_entry_num < BX_DTLB_SIZE; tlb_entry_num++) {
2674 bx_TLB_entry *tlbEntry = &BX_CPU_THIS_PTR DTLB.entry[tlb_entry_num];
2675 if (tlbEntry->valid()) {
2676 if ((tlbEntry->hostPageAddr >= (const bx_hostpageaddr_t)addr) &&
2677 (tlbEntry->hostPageAddr < (const bx_hostpageaddr_t)end))
2678 return true;
2679 }
2680 }
2681
2682 for (unsigned tlb_entry_num=0; tlb_entry_num < BX_ITLB_SIZE; tlb_entry_num++) {
2683 bx_TLB_entry *tlbEntry = &BX_CPU_THIS_PTR ITLB.entry[tlb_entry_num];
2684 if (tlbEntry->valid()) {
2685 if ((tlbEntry->hostPageAddr >= (const bx_hostpageaddr_t)addr) &&
2686 (tlbEntry->hostPageAddr < (const bx_hostpageaddr_t)end))
2687 return true;
2688 }
2689 }
2690
2691 return false;
2692 }
2693 #endif
2694