1 /* $NetBSD: nvmm_x86_vmx.c,v 1.36.2.15 2020/09/13 11:56:44 martin Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 35 #include <sys/bitops.h> 36 #include <sys/cpumask.h> 37 #include <sys/globaldata.h> 38 #include <sys/kernel.h> 39 #include <sys/malloc.h> /* contigmalloc, contigfree */ 40 #include <sys/thread2.h> /* lwkt_send_ipiq, lwkt_send_ipiq_mask */ 41 42 #include <vm/vm_map.h> 43 44 #include <machine/cpufunc.h> 45 #include <machine/md_var.h> /* cpu_* */ 46 #include <machine/segments.h> 47 #include <machine/smp.h> /* smp_active_mask */ 48 #include <machine/specialreg.h> 49 50 #include <dev/virtual/nvmm/nvmm_compat.h> 51 #include <dev/virtual/nvmm/nvmm.h> 52 #include <dev/virtual/nvmm/nvmm_internal.h> 53 #include <dev/virtual/nvmm/x86/nvmm_x86.h> 54 55 int _vmx_vmxon(paddr_t *pa); 56 int _vmx_vmxoff(void); 57 int vmx_vmlaunch(uint64_t *gprs); 58 int vmx_vmresume(uint64_t *gprs); 59 60 #define vmx_vmxon(a) \ 61 if (__predict_false(_vmx_vmxon(a) != 0)) { \ 62 panic("%s: VMXON failed", __func__); \ 63 } 64 #define vmx_vmxoff() \ 65 if (__predict_false(_vmx_vmxoff() != 0)) { \ 66 panic("%s: VMXOFF failed", __func__); \ 67 } 68 69 struct ept_desc { 70 uint64_t eptp; 71 uint64_t mbz; 72 } __packed; 73 74 struct vpid_desc { 75 uint64_t vpid; 76 uint64_t addr; 77 } __packed; 78 79 static inline void 80 vmx_invept(uint64_t op, struct ept_desc *desc) 81 { 82 asm volatile ( 83 "invept %[desc],%[op];" 84 "jz vmx_insn_failvalid;" 85 "jc vmx_insn_failinvalid;" 86 : 87 : [desc] "m" (*desc), [op] "r" (op) 88 : "memory", "cc" 89 ); 90 } 91 92 static inline void 93 vmx_invvpid(uint64_t op, struct vpid_desc *desc) 94 { 95 asm volatile ( 96 "invvpid %[desc],%[op];" 97 "jz vmx_insn_failvalid;" 98 "jc vmx_insn_failinvalid;" 99 : 100 : [desc] "m" (*desc), [op] "r" (op) 101 : "memory", "cc" 102 ); 103 } 104 105 static inline uint64_t 106 vmx_vmread(uint64_t field) 107 { 108 uint64_t value; 109 110 asm volatile ( 111 "vmread %[field],%[value];" 112 "jz vmx_insn_failvalid;" 113 "jc vmx_insn_failinvalid;" 114 : [value] "=r" (value) 115 : [field] "r" (field) 116 : "cc" 117 ); 118 119 return value; 120 } 121 122 static inline void 123 vmx_vmwrite(uint64_t field, uint64_t value) 124 { 125 asm volatile ( 126 "vmwrite %[value],%[field];" 127 "jz vmx_insn_failvalid;" 128 "jc vmx_insn_failinvalid;" 129 : 130 : [field] "r" (field), [value] "r" (value) 131 : "cc" 132 ); 133 } 134 135 #ifdef DIAGNOSTIC 136 static inline paddr_t 137 vmx_vmptrst(void) 138 { 139 paddr_t pa; 140 141 asm volatile ( 142 "vmptrst %[pa];" 143 : 144 : [pa] "m" (*(paddr_t *)&pa) 145 : "memory" 146 ); 147 148 return pa; 149 } 150 #endif 151 152 static inline void 153 vmx_vmptrld(paddr_t *pa) 154 { 155 asm volatile ( 156 "vmptrld %[pa];" 157 "jz vmx_insn_failvalid;" 158 "jc vmx_insn_failinvalid;" 159 : 160 : [pa] "m" (*pa) 161 : "memory", "cc" 162 ); 163 } 164 165 static inline void 166 vmx_vmclear(paddr_t *pa) 167 { 168 asm volatile ( 169 "vmclear %[pa];" 170 "jz vmx_insn_failvalid;" 171 "jc vmx_insn_failinvalid;" 172 : 173 : [pa] "m" (*pa) 174 : "memory", "cc" 175 ); 176 } 177 178 #define MSR_IA32_FEATURE_CONTROL 0x003A 179 #define IA32_FEATURE_CONTROL_LOCK __BIT(0) 180 #define IA32_FEATURE_CONTROL_IN_SMX __BIT(1) 181 #define IA32_FEATURE_CONTROL_OUT_SMX __BIT(2) 182 183 #define MSR_IA32_VMX_BASIC 0x0480 184 #define IA32_VMX_BASIC_IDENT __BITS(30,0) 185 #define IA32_VMX_BASIC_DATA_SIZE __BITS(44,32) 186 #define IA32_VMX_BASIC_MEM_WIDTH __BIT(48) 187 #define IA32_VMX_BASIC_DUAL __BIT(49) 188 #define IA32_VMX_BASIC_MEM_TYPE __BITS(53,50) 189 #define MEM_TYPE_UC 0 190 #define MEM_TYPE_WB 6 191 #define IA32_VMX_BASIC_IO_REPORT __BIT(54) 192 #define IA32_VMX_BASIC_TRUE_CTLS __BIT(55) 193 194 #define MSR_IA32_VMX_PINBASED_CTLS 0x0481 195 #define MSR_IA32_VMX_PROCBASED_CTLS 0x0482 196 #define MSR_IA32_VMX_EXIT_CTLS 0x0483 197 #define MSR_IA32_VMX_ENTRY_CTLS 0x0484 198 #define MSR_IA32_VMX_PROCBASED_CTLS2 0x048B 199 200 #define MSR_IA32_VMX_TRUE_PINBASED_CTLS 0x048D 201 #define MSR_IA32_VMX_TRUE_PROCBASED_CTLS 0x048E 202 #define MSR_IA32_VMX_TRUE_EXIT_CTLS 0x048F 203 #define MSR_IA32_VMX_TRUE_ENTRY_CTLS 0x0490 204 205 #define MSR_IA32_VMX_CR0_FIXED0 0x0486 206 #define MSR_IA32_VMX_CR0_FIXED1 0x0487 207 #define MSR_IA32_VMX_CR4_FIXED0 0x0488 208 #define MSR_IA32_VMX_CR4_FIXED1 0x0489 209 210 #define MSR_IA32_VMX_EPT_VPID_CAP 0x048C 211 #define IA32_VMX_EPT_VPID_XO __BIT(0) 212 #define IA32_VMX_EPT_VPID_WALKLENGTH_4 __BIT(6) 213 #define IA32_VMX_EPT_VPID_UC __BIT(8) 214 #define IA32_VMX_EPT_VPID_WB __BIT(14) 215 #define IA32_VMX_EPT_VPID_2MB __BIT(16) 216 #define IA32_VMX_EPT_VPID_1GB __BIT(17) 217 #define IA32_VMX_EPT_VPID_INVEPT __BIT(20) 218 #define IA32_VMX_EPT_VPID_FLAGS_AD __BIT(21) 219 #define IA32_VMX_EPT_VPID_ADVANCED_VMEXIT_INFO __BIT(22) 220 #define IA32_VMX_EPT_VPID_SHSTK __BIT(23) 221 #define IA32_VMX_EPT_VPID_INVEPT_CONTEXT __BIT(25) 222 #define IA32_VMX_EPT_VPID_INVEPT_ALL __BIT(26) 223 #define IA32_VMX_EPT_VPID_INVVPID __BIT(32) 224 #define IA32_VMX_EPT_VPID_INVVPID_ADDR __BIT(40) 225 #define IA32_VMX_EPT_VPID_INVVPID_CONTEXT __BIT(41) 226 #define IA32_VMX_EPT_VPID_INVVPID_ALL __BIT(42) 227 #define IA32_VMX_EPT_VPID_INVVPID_CONTEXT_NOG __BIT(43) 228 229 /* -------------------------------------------------------------------------- */ 230 231 /* 16-bit control fields */ 232 #define VMCS_VPID 0x00000000 233 #define VMCS_PIR_VECTOR 0x00000002 234 #define VMCS_EPTP_INDEX 0x00000004 235 /* 16-bit guest-state fields */ 236 #define VMCS_GUEST_ES_SELECTOR 0x00000800 237 #define VMCS_GUEST_CS_SELECTOR 0x00000802 238 #define VMCS_GUEST_SS_SELECTOR 0x00000804 239 #define VMCS_GUEST_DS_SELECTOR 0x00000806 240 #define VMCS_GUEST_FS_SELECTOR 0x00000808 241 #define VMCS_GUEST_GS_SELECTOR 0x0000080A 242 #define VMCS_GUEST_LDTR_SELECTOR 0x0000080C 243 #define VMCS_GUEST_TR_SELECTOR 0x0000080E 244 #define VMCS_GUEST_INTR_STATUS 0x00000810 245 #define VMCS_PML_INDEX 0x00000812 246 /* 16-bit host-state fields */ 247 #define VMCS_HOST_ES_SELECTOR 0x00000C00 248 #define VMCS_HOST_CS_SELECTOR 0x00000C02 249 #define VMCS_HOST_SS_SELECTOR 0x00000C04 250 #define VMCS_HOST_DS_SELECTOR 0x00000C06 251 #define VMCS_HOST_FS_SELECTOR 0x00000C08 252 #define VMCS_HOST_GS_SELECTOR 0x00000C0A 253 #define VMCS_HOST_TR_SELECTOR 0x00000C0C 254 /* 64-bit control fields */ 255 #define VMCS_IO_BITMAP_A 0x00002000 256 #define VMCS_IO_BITMAP_B 0x00002002 257 #define VMCS_MSR_BITMAP 0x00002004 258 #define VMCS_EXIT_MSR_STORE_ADDRESS 0x00002006 259 #define VMCS_EXIT_MSR_LOAD_ADDRESS 0x00002008 260 #define VMCS_ENTRY_MSR_LOAD_ADDRESS 0x0000200A 261 #define VMCS_EXECUTIVE_VMCS 0x0000200C 262 #define VMCS_PML_ADDRESS 0x0000200E 263 #define VMCS_TSC_OFFSET 0x00002010 264 #define VMCS_VIRTUAL_APIC 0x00002012 265 #define VMCS_APIC_ACCESS 0x00002014 266 #define VMCS_PIR_DESC 0x00002016 267 #define VMCS_VM_CONTROL 0x00002018 268 #define VMCS_EPTP 0x0000201A 269 #define EPTP_TYPE __BITS(2,0) 270 #define EPTP_TYPE_UC 0 271 #define EPTP_TYPE_WB 6 272 #define EPTP_WALKLEN __BITS(5,3) 273 #define EPTP_FLAGS_AD __BIT(6) 274 #define EPTP_SSS __BIT(7) 275 #define EPTP_PHYSADDR __BITS(63,12) 276 #define VMCS_EOI_EXIT0 0x0000201C 277 #define VMCS_EOI_EXIT1 0x0000201E 278 #define VMCS_EOI_EXIT2 0x00002020 279 #define VMCS_EOI_EXIT3 0x00002022 280 #define VMCS_EPTP_LIST 0x00002024 281 #define VMCS_VMREAD_BITMAP 0x00002026 282 #define VMCS_VMWRITE_BITMAP 0x00002028 283 #define VMCS_VIRTUAL_EXCEPTION 0x0000202A 284 #define VMCS_XSS_EXIT_BITMAP 0x0000202C 285 #define VMCS_ENCLS_EXIT_BITMAP 0x0000202E 286 #define VMCS_SUBPAGE_PERM_TABLE_PTR 0x00002030 287 #define VMCS_TSC_MULTIPLIER 0x00002032 288 #define VMCS_ENCLV_EXIT_BITMAP 0x00002036 289 /* 64-bit read-only fields */ 290 #define VMCS_GUEST_PHYSICAL_ADDRESS 0x00002400 291 /* 64-bit guest-state fields */ 292 #define VMCS_LINK_POINTER 0x00002800 293 #define VMCS_GUEST_IA32_DEBUGCTL 0x00002802 294 #define VMCS_GUEST_IA32_PAT 0x00002804 295 #define VMCS_GUEST_IA32_EFER 0x00002806 296 #define VMCS_GUEST_IA32_PERF_GLOBAL_CTRL 0x00002808 297 #define VMCS_GUEST_PDPTE0 0x0000280A 298 #define VMCS_GUEST_PDPTE1 0x0000280C 299 #define VMCS_GUEST_PDPTE2 0x0000280E 300 #define VMCS_GUEST_PDPTE3 0x00002810 301 #define VMCS_GUEST_BNDCFGS 0x00002812 302 #define VMCS_GUEST_RTIT_CTL 0x00002814 303 #define VMCS_GUEST_PKRS 0x00002818 304 /* 64-bit host-state fields */ 305 #define VMCS_HOST_IA32_PAT 0x00002C00 306 #define VMCS_HOST_IA32_EFER 0x00002C02 307 #define VMCS_HOST_IA32_PERF_GLOBAL_CTRL 0x00002C04 308 #define VMCS_HOST_IA32_PKRS 0x00002C06 309 /* 32-bit control fields */ 310 #define VMCS_PINBASED_CTLS 0x00004000 311 #define PIN_CTLS_INT_EXITING __BIT(0) 312 #define PIN_CTLS_NMI_EXITING __BIT(3) 313 #define PIN_CTLS_VIRTUAL_NMIS __BIT(5) 314 #define PIN_CTLS_ACTIVATE_PREEMPT_TIMER __BIT(6) 315 #define PIN_CTLS_PROCESS_POSTED_INTS __BIT(7) 316 #define VMCS_PROCBASED_CTLS 0x00004002 317 #define PROC_CTLS_INT_WINDOW_EXITING __BIT(2) 318 #define PROC_CTLS_USE_TSC_OFFSETTING __BIT(3) 319 #define PROC_CTLS_HLT_EXITING __BIT(7) 320 #define PROC_CTLS_INVLPG_EXITING __BIT(9) 321 #define PROC_CTLS_MWAIT_EXITING __BIT(10) 322 #define PROC_CTLS_RDPMC_EXITING __BIT(11) 323 #define PROC_CTLS_RDTSC_EXITING __BIT(12) 324 #define PROC_CTLS_RCR3_EXITING __BIT(15) 325 #define PROC_CTLS_LCR3_EXITING __BIT(16) 326 #define PROC_CTLS_RCR8_EXITING __BIT(19) 327 #define PROC_CTLS_LCR8_EXITING __BIT(20) 328 #define PROC_CTLS_USE_TPR_SHADOW __BIT(21) 329 #define PROC_CTLS_NMI_WINDOW_EXITING __BIT(22) 330 #define PROC_CTLS_DR_EXITING __BIT(23) 331 #define PROC_CTLS_UNCOND_IO_EXITING __BIT(24) 332 #define PROC_CTLS_USE_IO_BITMAPS __BIT(25) 333 #define PROC_CTLS_MONITOR_TRAP_FLAG __BIT(27) 334 #define PROC_CTLS_USE_MSR_BITMAPS __BIT(28) 335 #define PROC_CTLS_MONITOR_EXITING __BIT(29) 336 #define PROC_CTLS_PAUSE_EXITING __BIT(30) 337 #define PROC_CTLS_ACTIVATE_CTLS2 __BIT(31) 338 #define VMCS_EXCEPTION_BITMAP 0x00004004 339 #define VMCS_PF_ERROR_MASK 0x00004006 340 #define VMCS_PF_ERROR_MATCH 0x00004008 341 #define VMCS_CR3_TARGET_COUNT 0x0000400A 342 #define VMCS_EXIT_CTLS 0x0000400C 343 #define EXIT_CTLS_SAVE_DEBUG_CONTROLS __BIT(2) 344 #define EXIT_CTLS_HOST_LONG_MODE __BIT(9) 345 #define EXIT_CTLS_LOAD_PERFGLOBALCTRL __BIT(12) 346 #define EXIT_CTLS_ACK_INTERRUPT __BIT(15) 347 #define EXIT_CTLS_SAVE_PAT __BIT(18) 348 #define EXIT_CTLS_LOAD_PAT __BIT(19) 349 #define EXIT_CTLS_SAVE_EFER __BIT(20) 350 #define EXIT_CTLS_LOAD_EFER __BIT(21) 351 #define EXIT_CTLS_SAVE_PREEMPT_TIMER __BIT(22) 352 #define EXIT_CTLS_CLEAR_BNDCFGS __BIT(23) 353 #define EXIT_CTLS_CONCEAL_PT __BIT(24) 354 #define EXIT_CTLS_CLEAR_RTIT_CTL __BIT(25) 355 #define EXIT_CTLS_LOAD_CET __BIT(28) 356 #define EXIT_CTLS_LOAD_PKRS __BIT(29) 357 #define VMCS_EXIT_MSR_STORE_COUNT 0x0000400E 358 #define VMCS_EXIT_MSR_LOAD_COUNT 0x00004010 359 #define VMCS_ENTRY_CTLS 0x00004012 360 #define ENTRY_CTLS_LOAD_DEBUG_CONTROLS __BIT(2) 361 #define ENTRY_CTLS_LONG_MODE __BIT(9) 362 #define ENTRY_CTLS_SMM __BIT(10) 363 #define ENTRY_CTLS_DISABLE_DUAL __BIT(11) 364 #define ENTRY_CTLS_LOAD_PERFGLOBALCTRL __BIT(13) 365 #define ENTRY_CTLS_LOAD_PAT __BIT(14) 366 #define ENTRY_CTLS_LOAD_EFER __BIT(15) 367 #define ENTRY_CTLS_LOAD_BNDCFGS __BIT(16) 368 #define ENTRY_CTLS_CONCEAL_PT __BIT(17) 369 #define ENTRY_CTLS_LOAD_RTIT_CTL __BIT(18) 370 #define ENTRY_CTLS_LOAD_CET __BIT(20) 371 #define ENTRY_CTLS_LOAD_PKRS __BIT(22) 372 #define VMCS_ENTRY_MSR_LOAD_COUNT 0x00004014 373 #define VMCS_ENTRY_INTR_INFO 0x00004016 374 #define INTR_INFO_VECTOR __BITS(7,0) 375 #define INTR_INFO_TYPE __BITS(10,8) 376 #define INTR_TYPE_EXT_INT 0 377 #define INTR_TYPE_NMI 2 378 #define INTR_TYPE_HW_EXC 3 379 #define INTR_TYPE_SW_INT 4 380 #define INTR_TYPE_PRIV_SW_EXC 5 381 #define INTR_TYPE_SW_EXC 6 382 #define INTR_TYPE_OTHER 7 383 #define INTR_INFO_ERROR __BIT(11) 384 #define INTR_INFO_VALID __BIT(31) 385 #define VMCS_ENTRY_EXCEPTION_ERROR 0x00004018 386 #define VMCS_ENTRY_INSTRUCTION_LENGTH 0x0000401A 387 #define VMCS_TPR_THRESHOLD 0x0000401C 388 #define VMCS_PROCBASED_CTLS2 0x0000401E 389 #define PROC_CTLS2_VIRT_APIC_ACCESSES __BIT(0) 390 #define PROC_CTLS2_ENABLE_EPT __BIT(1) 391 #define PROC_CTLS2_DESC_TABLE_EXITING __BIT(2) 392 #define PROC_CTLS2_ENABLE_RDTSCP __BIT(3) 393 #define PROC_CTLS2_VIRT_X2APIC __BIT(4) 394 #define PROC_CTLS2_ENABLE_VPID __BIT(5) 395 #define PROC_CTLS2_WBINVD_EXITING __BIT(6) 396 #define PROC_CTLS2_UNRESTRICTED_GUEST __BIT(7) 397 #define PROC_CTLS2_APIC_REG_VIRT __BIT(8) 398 #define PROC_CTLS2_VIRT_INT_DELIVERY __BIT(9) 399 #define PROC_CTLS2_PAUSE_LOOP_EXITING __BIT(10) 400 #define PROC_CTLS2_RDRAND_EXITING __BIT(11) 401 #define PROC_CTLS2_INVPCID_ENABLE __BIT(12) 402 #define PROC_CTLS2_VMFUNC_ENABLE __BIT(13) 403 #define PROC_CTLS2_VMCS_SHADOWING __BIT(14) 404 #define PROC_CTLS2_ENCLS_EXITING __BIT(15) 405 #define PROC_CTLS2_RDSEED_EXITING __BIT(16) 406 #define PROC_CTLS2_PML_ENABLE __BIT(17) 407 #define PROC_CTLS2_EPT_VIOLATION __BIT(18) 408 #define PROC_CTLS2_CONCEAL_VMX_FROM_PT __BIT(19) 409 #define PROC_CTLS2_XSAVES_ENABLE __BIT(20) 410 #define PROC_CTLS2_MODE_BASED_EXEC_EPT __BIT(22) 411 #define PROC_CTLS2_SUBPAGE_PERMISSIONS __BIT(23) 412 #define PROC_CTLS2_PT_USES_GPA __BIT(24) 413 #define PROC_CTLS2_USE_TSC_SCALING __BIT(25) 414 #define PROC_CTLS2_WAIT_PAUSE_ENABLE __BIT(26) 415 #define PROC_CTLS2_ENCLV_EXITING __BIT(28) 416 #define VMCS_PLE_GAP 0x00004020 417 #define VMCS_PLE_WINDOW 0x00004022 418 /* 32-bit read-only data fields */ 419 #define VMCS_INSTRUCTION_ERROR 0x00004400 420 #define VMCS_EXIT_REASON 0x00004402 421 #define VMCS_EXIT_INTR_INFO 0x00004404 422 #define VMCS_EXIT_INTR_ERRCODE 0x00004406 423 #define VMCS_IDT_VECTORING_INFO 0x00004408 424 #define VMCS_IDT_VECTORING_ERROR 0x0000440A 425 #define VMCS_EXIT_INSTRUCTION_LENGTH 0x0000440C 426 #define VMCS_EXIT_INSTRUCTION_INFO 0x0000440E 427 /* 32-bit guest-state fields */ 428 #define VMCS_GUEST_ES_LIMIT 0x00004800 429 #define VMCS_GUEST_CS_LIMIT 0x00004802 430 #define VMCS_GUEST_SS_LIMIT 0x00004804 431 #define VMCS_GUEST_DS_LIMIT 0x00004806 432 #define VMCS_GUEST_FS_LIMIT 0x00004808 433 #define VMCS_GUEST_GS_LIMIT 0x0000480A 434 #define VMCS_GUEST_LDTR_LIMIT 0x0000480C 435 #define VMCS_GUEST_TR_LIMIT 0x0000480E 436 #define VMCS_GUEST_GDTR_LIMIT 0x00004810 437 #define VMCS_GUEST_IDTR_LIMIT 0x00004812 438 #define VMCS_GUEST_ES_ACCESS_RIGHTS 0x00004814 439 #define VMCS_GUEST_CS_ACCESS_RIGHTS 0x00004816 440 #define VMCS_GUEST_SS_ACCESS_RIGHTS 0x00004818 441 #define VMCS_GUEST_DS_ACCESS_RIGHTS 0x0000481A 442 #define VMCS_GUEST_FS_ACCESS_RIGHTS 0x0000481C 443 #define VMCS_GUEST_GS_ACCESS_RIGHTS 0x0000481E 444 #define VMCS_GUEST_LDTR_ACCESS_RIGHTS 0x00004820 445 #define VMCS_GUEST_TR_ACCESS_RIGHTS 0x00004822 446 #define VMCS_GUEST_INTERRUPTIBILITY 0x00004824 447 #define INT_STATE_STI __BIT(0) 448 #define INT_STATE_MOVSS __BIT(1) 449 #define INT_STATE_SMI __BIT(2) 450 #define INT_STATE_NMI __BIT(3) 451 #define INT_STATE_ENCLAVE __BIT(4) 452 #define VMCS_GUEST_ACTIVITY 0x00004826 453 #define VMCS_GUEST_SMBASE 0x00004828 454 #define VMCS_GUEST_IA32_SYSENTER_CS 0x0000482A 455 #define VMCS_PREEMPTION_TIMER_VALUE 0x0000482E 456 /* 32-bit host state fields */ 457 #define VMCS_HOST_IA32_SYSENTER_CS 0x00004C00 458 /* Natural-Width control fields */ 459 #define VMCS_CR0_MASK 0x00006000 460 #define VMCS_CR4_MASK 0x00006002 461 #define VMCS_CR0_SHADOW 0x00006004 462 #define VMCS_CR4_SHADOW 0x00006006 463 #define VMCS_CR3_TARGET0 0x00006008 464 #define VMCS_CR3_TARGET1 0x0000600A 465 #define VMCS_CR3_TARGET2 0x0000600C 466 #define VMCS_CR3_TARGET3 0x0000600E 467 /* Natural-Width read-only fields */ 468 #define VMCS_EXIT_QUALIFICATION 0x00006400 469 #define VMCS_IO_RCX 0x00006402 470 #define VMCS_IO_RSI 0x00006404 471 #define VMCS_IO_RDI 0x00006406 472 #define VMCS_IO_RIP 0x00006408 473 #define VMCS_GUEST_LINEAR_ADDRESS 0x0000640A 474 /* Natural-Width guest-state fields */ 475 #define VMCS_GUEST_CR0 0x00006800 476 #define VMCS_GUEST_CR3 0x00006802 477 #define VMCS_GUEST_CR4 0x00006804 478 #define VMCS_GUEST_ES_BASE 0x00006806 479 #define VMCS_GUEST_CS_BASE 0x00006808 480 #define VMCS_GUEST_SS_BASE 0x0000680A 481 #define VMCS_GUEST_DS_BASE 0x0000680C 482 #define VMCS_GUEST_FS_BASE 0x0000680E 483 #define VMCS_GUEST_GS_BASE 0x00006810 484 #define VMCS_GUEST_LDTR_BASE 0x00006812 485 #define VMCS_GUEST_TR_BASE 0x00006814 486 #define VMCS_GUEST_GDTR_BASE 0x00006816 487 #define VMCS_GUEST_IDTR_BASE 0x00006818 488 #define VMCS_GUEST_DR7 0x0000681A 489 #define VMCS_GUEST_RSP 0x0000681C 490 #define VMCS_GUEST_RIP 0x0000681E 491 #define VMCS_GUEST_RFLAGS 0x00006820 492 #define VMCS_GUEST_PENDING_DBG_EXCEPTIONS 0x00006822 493 #define VMCS_GUEST_IA32_SYSENTER_ESP 0x00006824 494 #define VMCS_GUEST_IA32_SYSENTER_EIP 0x00006826 495 #define VMCS_GUEST_IA32_S_CET 0x00006828 496 #define VMCS_GUEST_SSP 0x0000682A 497 #define VMCS_GUEST_IA32_INTR_SSP_TABLE 0x0000682C 498 /* Natural-Width host-state fields */ 499 #define VMCS_HOST_CR0 0x00006C00 500 #define VMCS_HOST_CR3 0x00006C02 501 #define VMCS_HOST_CR4 0x00006C04 502 #define VMCS_HOST_FS_BASE 0x00006C06 503 #define VMCS_HOST_GS_BASE 0x00006C08 504 #define VMCS_HOST_TR_BASE 0x00006C0A 505 #define VMCS_HOST_GDTR_BASE 0x00006C0C 506 #define VMCS_HOST_IDTR_BASE 0x00006C0E 507 #define VMCS_HOST_IA32_SYSENTER_ESP 0x00006C10 508 #define VMCS_HOST_IA32_SYSENTER_EIP 0x00006C12 509 #define VMCS_HOST_RSP 0x00006C14 510 #define VMCS_HOST_RIP 0x00006C16 511 #define VMCS_HOST_IA32_S_CET 0x00006C18 512 #define VMCS_HOST_SSP 0x00006C1A 513 #define VMCS_HOST_IA32_INTR_SSP_TABLE 0x00006C1C 514 515 /* VMX basic exit reasons. */ 516 #define VMCS_EXITCODE_EXC_NMI 0 517 #define VMCS_EXITCODE_EXT_INT 1 518 #define VMCS_EXITCODE_SHUTDOWN 2 519 #define VMCS_EXITCODE_INIT 3 520 #define VMCS_EXITCODE_SIPI 4 521 #define VMCS_EXITCODE_SMI 5 522 #define VMCS_EXITCODE_OTHER_SMI 6 523 #define VMCS_EXITCODE_INT_WINDOW 7 524 #define VMCS_EXITCODE_NMI_WINDOW 8 525 #define VMCS_EXITCODE_TASK_SWITCH 9 526 #define VMCS_EXITCODE_CPUID 10 527 #define VMCS_EXITCODE_GETSEC 11 528 #define VMCS_EXITCODE_HLT 12 529 #define VMCS_EXITCODE_INVD 13 530 #define VMCS_EXITCODE_INVLPG 14 531 #define VMCS_EXITCODE_RDPMC 15 532 #define VMCS_EXITCODE_RDTSC 16 533 #define VMCS_EXITCODE_RSM 17 534 #define VMCS_EXITCODE_VMCALL 18 535 #define VMCS_EXITCODE_VMCLEAR 19 536 #define VMCS_EXITCODE_VMLAUNCH 20 537 #define VMCS_EXITCODE_VMPTRLD 21 538 #define VMCS_EXITCODE_VMPTRST 22 539 #define VMCS_EXITCODE_VMREAD 23 540 #define VMCS_EXITCODE_VMRESUME 24 541 #define VMCS_EXITCODE_VMWRITE 25 542 #define VMCS_EXITCODE_VMXOFF 26 543 #define VMCS_EXITCODE_VMXON 27 544 #define VMCS_EXITCODE_CR 28 545 #define VMCS_EXITCODE_DR 29 546 #define VMCS_EXITCODE_IO 30 547 #define VMCS_EXITCODE_RDMSR 31 548 #define VMCS_EXITCODE_WRMSR 32 549 #define VMCS_EXITCODE_FAIL_GUEST_INVALID 33 550 #define VMCS_EXITCODE_FAIL_MSR_INVALID 34 551 #define VMCS_EXITCODE_MWAIT 36 552 #define VMCS_EXITCODE_TRAP_FLAG 37 553 #define VMCS_EXITCODE_MONITOR 39 554 #define VMCS_EXITCODE_PAUSE 40 555 #define VMCS_EXITCODE_FAIL_MACHINE_CHECK 41 556 #define VMCS_EXITCODE_TPR_BELOW 43 557 #define VMCS_EXITCODE_APIC_ACCESS 44 558 #define VMCS_EXITCODE_VEOI 45 559 #define VMCS_EXITCODE_GDTR_IDTR 46 560 #define VMCS_EXITCODE_LDTR_TR 47 561 #define VMCS_EXITCODE_EPT_VIOLATION 48 562 #define VMCS_EXITCODE_EPT_MISCONFIG 49 563 #define VMCS_EXITCODE_INVEPT 50 564 #define VMCS_EXITCODE_RDTSCP 51 565 #define VMCS_EXITCODE_PREEMPT_TIMEOUT 52 566 #define VMCS_EXITCODE_INVVPID 53 567 #define VMCS_EXITCODE_WBINVD 54 568 #define VMCS_EXITCODE_XSETBV 55 569 #define VMCS_EXITCODE_APIC_WRITE 56 570 #define VMCS_EXITCODE_RDRAND 57 571 #define VMCS_EXITCODE_INVPCID 58 572 #define VMCS_EXITCODE_VMFUNC 59 573 #define VMCS_EXITCODE_ENCLS 60 574 #define VMCS_EXITCODE_RDSEED 61 575 #define VMCS_EXITCODE_PAGE_LOG_FULL 62 576 #define VMCS_EXITCODE_XSAVES 63 577 #define VMCS_EXITCODE_XRSTORS 64 578 #define VMCS_EXITCODE_SPP 66 579 #define VMCS_EXITCODE_UMWAIT 67 580 #define VMCS_EXITCODE_TPAUSE 68 581 582 /* -------------------------------------------------------------------------- */ 583 584 static void vmx_vcpu_state_provide(struct nvmm_cpu *, uint64_t); 585 static void vmx_vcpu_state_commit(struct nvmm_cpu *); 586 587 #define VMX_MSRLIST_STAR 0 588 #define VMX_MSRLIST_LSTAR 1 589 #define VMX_MSRLIST_CSTAR 2 590 #define VMX_MSRLIST_SFMASK 3 591 #define VMX_MSRLIST_KERNELGSBASE 4 592 #define VMX_MSRLIST_EXIT_NMSR 5 593 #define VMX_MSRLIST_L1DFLUSH 5 594 595 /* On entry, we may do +1 to include L1DFLUSH. */ 596 static size_t vmx_msrlist_entry_nmsr __read_mostly = VMX_MSRLIST_EXIT_NMSR; 597 598 struct vmxon { 599 uint32_t ident; 600 #define VMXON_IDENT_REVISION __BITS(30,0) 601 602 uint8_t data[PAGE_SIZE - 4]; 603 } __packed; 604 605 CTASSERT(sizeof(struct vmxon) == PAGE_SIZE); 606 607 struct vmxoncpu { 608 vaddr_t va; 609 paddr_t pa; 610 }; 611 612 static struct vmxoncpu vmxoncpu[MAXCPUS]; 613 614 struct vmcs { 615 uint32_t ident; 616 #define VMCS_IDENT_REVISION __BITS(30,0) 617 #define VMCS_IDENT_SHADOW __BIT(31) 618 619 uint32_t abort; 620 uint8_t data[PAGE_SIZE - 8]; 621 } __packed; 622 623 CTASSERT(sizeof(struct vmcs) == PAGE_SIZE); 624 625 struct msr_entry { 626 uint32_t msr; 627 uint32_t rsvd; 628 uint64_t val; 629 } __packed; 630 631 #define VPID_MAX 0xFFFF 632 633 /* Make sure we never run out of VPIDs. */ 634 CTASSERT(VPID_MAX-1 >= NVMM_MAX_MACHINES * NVMM_MAX_VCPUS); 635 636 static uint64_t vmx_tlb_flush_op __read_mostly; 637 static uint64_t vmx_ept_flush_op __read_mostly; 638 static uint64_t vmx_eptp_type __read_mostly; 639 640 static uint64_t vmx_pinbased_ctls __read_mostly; 641 static uint64_t vmx_procbased_ctls __read_mostly; 642 static uint64_t vmx_procbased_ctls2 __read_mostly; 643 static uint64_t vmx_entry_ctls __read_mostly; 644 static uint64_t vmx_exit_ctls __read_mostly; 645 646 static uint64_t vmx_cr0_fixed0 __read_mostly; 647 static uint64_t vmx_cr0_fixed1 __read_mostly; 648 static uint64_t vmx_cr4_fixed0 __read_mostly; 649 static uint64_t vmx_cr4_fixed1 __read_mostly; 650 651 extern bool pmap_ept_has_ad; 652 653 #define VMX_PINBASED_CTLS_ONE \ 654 (PIN_CTLS_INT_EXITING| \ 655 PIN_CTLS_NMI_EXITING| \ 656 PIN_CTLS_VIRTUAL_NMIS) 657 658 #define VMX_PINBASED_CTLS_ZERO 0 659 660 #define VMX_PROCBASED_CTLS_ONE \ 661 (PROC_CTLS_USE_TSC_OFFSETTING| \ 662 PROC_CTLS_HLT_EXITING| \ 663 PROC_CTLS_MWAIT_EXITING | \ 664 PROC_CTLS_RDPMC_EXITING | \ 665 PROC_CTLS_RCR8_EXITING | \ 666 PROC_CTLS_LCR8_EXITING | \ 667 PROC_CTLS_UNCOND_IO_EXITING | /* no I/O bitmap */ \ 668 PROC_CTLS_USE_MSR_BITMAPS | \ 669 PROC_CTLS_MONITOR_EXITING | \ 670 PROC_CTLS_ACTIVATE_CTLS2) 671 672 #define VMX_PROCBASED_CTLS_ZERO \ 673 (PROC_CTLS_RCR3_EXITING| \ 674 PROC_CTLS_LCR3_EXITING) 675 676 #define VMX_PROCBASED_CTLS2_ONE \ 677 (PROC_CTLS2_ENABLE_EPT| \ 678 PROC_CTLS2_ENABLE_VPID| \ 679 PROC_CTLS2_UNRESTRICTED_GUEST) 680 681 #define VMX_PROCBASED_CTLS2_ZERO 0 682 683 #define VMX_ENTRY_CTLS_ONE \ 684 (ENTRY_CTLS_LOAD_DEBUG_CONTROLS| \ 685 ENTRY_CTLS_LOAD_EFER| \ 686 ENTRY_CTLS_LOAD_PAT) 687 688 #define VMX_ENTRY_CTLS_ZERO \ 689 (ENTRY_CTLS_SMM| \ 690 ENTRY_CTLS_DISABLE_DUAL) 691 692 #define VMX_EXIT_CTLS_ONE \ 693 (EXIT_CTLS_SAVE_DEBUG_CONTROLS| \ 694 EXIT_CTLS_HOST_LONG_MODE| \ 695 EXIT_CTLS_SAVE_PAT| \ 696 EXIT_CTLS_LOAD_PAT| \ 697 EXIT_CTLS_SAVE_EFER| \ 698 EXIT_CTLS_LOAD_EFER) 699 700 #define VMX_EXIT_CTLS_ZERO 0 701 702 static uint8_t *vmx_asidmap __read_mostly; 703 static uint32_t vmx_maxasid __read_mostly; 704 static kmutex_t vmx_asidlock __cacheline_aligned; 705 706 #define VMX_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 707 static uint64_t vmx_xcr0_mask __read_mostly; 708 709 #define VMX_NCPUIDS 32 710 711 #define VMCS_NPAGES 1 712 #define VMCS_SIZE (VMCS_NPAGES * PAGE_SIZE) 713 714 #define MSRBM_NPAGES 1 715 #define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 716 717 #define CR4_VALID \ 718 (CR4_VME | \ 719 CR4_PVI | \ 720 CR4_TSD | \ 721 CR4_DE | \ 722 CR4_PSE | \ 723 CR4_PAE | \ 724 CR4_MCE | \ 725 CR4_PGE | \ 726 CR4_PCE | \ 727 CR4_OSFXSR | \ 728 CR4_OSXMMEXCPT | \ 729 CR4_UMIP | \ 730 /* CR4_LA57 excluded */ \ 731 /* CR4_VMXE excluded */ \ 732 /* CR4_SMXE excluded */ \ 733 CR4_FSGSBASE | \ 734 CR4_PCIDE | \ 735 CR4_OSXSAVE | \ 736 CR4_SMEP | \ 737 CR4_SMAP \ 738 /* CR4_PKE excluded */ \ 739 /* CR4_CET excluded */ \ 740 /* CR4_PKS excluded */) 741 #define CR4_INVALID \ 742 (0xFFFFFFFFFFFFFFFFULL & ~CR4_VALID) 743 744 #define EFER_TLB_FLUSH \ 745 (EFER_NXE|EFER_LMA|EFER_LME) 746 #define CR0_TLB_FLUSH \ 747 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 748 #define CR4_TLB_FLUSH \ 749 (CR4_PSE|CR4_PAE|CR4_PGE|CR4_PCIDE|CR4_SMEP) 750 751 /* -------------------------------------------------------------------------- */ 752 753 struct vmx_machdata { 754 volatile uint64_t mach_htlb_gen; 755 }; 756 757 static const size_t vmx_vcpu_conf_sizes[NVMM_X86_VCPU_NCONF] = { 758 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID)] = 759 sizeof(struct nvmm_vcpu_conf_cpuid), 760 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_TPR)] = 761 sizeof(struct nvmm_vcpu_conf_tpr) 762 }; 763 764 struct vmx_cpudata { 765 /* General */ 766 uint64_t asid; 767 bool gtlb_want_flush; 768 bool gtsc_want_update; 769 uint64_t vcpu_htlb_gen; 770 cpumask_t htlb_want_flush; 771 772 /* VMCS */ 773 struct vmcs *vmcs; 774 paddr_t vmcs_pa; 775 size_t vmcs_refcnt; 776 struct globaldata *vmcs_ci; /* struct cpu_info in NetBSD */ 777 bool vmcs_launched; 778 779 /* MSR bitmap */ 780 uint8_t *msrbm; 781 paddr_t msrbm_pa; 782 783 /* Host state */ 784 uint64_t hxcr0; 785 uint64_t star; 786 uint64_t lstar; 787 uint64_t cstar; 788 uint64_t sfmask; 789 uint64_t kernelgsbase; 790 bool ts_set; 791 mcontext_t hmctx; 792 793 /* Intr state */ 794 bool int_window_exit; 795 bool nmi_window_exit; 796 bool evt_pending; 797 798 /* Guest state */ 799 struct msr_entry *gmsr; 800 paddr_t gmsr_pa; 801 uint64_t gmsr_misc_enable; 802 uint64_t gcr2; 803 uint64_t gcr8; 804 uint64_t gxcr0; 805 uint64_t gprs[NVMM_X64_NGPR]; 806 uint64_t drs[NVMM_X64_NDR]; 807 uint64_t gtsc; 808 union savefpu gfpu __aligned(64); 809 810 /* VCPU configuration. */ 811 bool cpuidpresent[VMX_NCPUIDS]; 812 struct nvmm_vcpu_conf_cpuid cpuid[VMX_NCPUIDS]; 813 struct nvmm_vcpu_conf_tpr tpr; 814 }; 815 816 static const struct { 817 uint64_t selector; 818 uint64_t attrib; 819 uint64_t limit; 820 uint64_t base; 821 } vmx_guest_segs[NVMM_X64_NSEG] = { 822 [NVMM_X64_SEG_ES] = { 823 VMCS_GUEST_ES_SELECTOR, 824 VMCS_GUEST_ES_ACCESS_RIGHTS, 825 VMCS_GUEST_ES_LIMIT, 826 VMCS_GUEST_ES_BASE 827 }, 828 [NVMM_X64_SEG_CS] = { 829 VMCS_GUEST_CS_SELECTOR, 830 VMCS_GUEST_CS_ACCESS_RIGHTS, 831 VMCS_GUEST_CS_LIMIT, 832 VMCS_GUEST_CS_BASE 833 }, 834 [NVMM_X64_SEG_SS] = { 835 VMCS_GUEST_SS_SELECTOR, 836 VMCS_GUEST_SS_ACCESS_RIGHTS, 837 VMCS_GUEST_SS_LIMIT, 838 VMCS_GUEST_SS_BASE 839 }, 840 [NVMM_X64_SEG_DS] = { 841 VMCS_GUEST_DS_SELECTOR, 842 VMCS_GUEST_DS_ACCESS_RIGHTS, 843 VMCS_GUEST_DS_LIMIT, 844 VMCS_GUEST_DS_BASE 845 }, 846 [NVMM_X64_SEG_FS] = { 847 VMCS_GUEST_FS_SELECTOR, 848 VMCS_GUEST_FS_ACCESS_RIGHTS, 849 VMCS_GUEST_FS_LIMIT, 850 VMCS_GUEST_FS_BASE 851 }, 852 [NVMM_X64_SEG_GS] = { 853 VMCS_GUEST_GS_SELECTOR, 854 VMCS_GUEST_GS_ACCESS_RIGHTS, 855 VMCS_GUEST_GS_LIMIT, 856 VMCS_GUEST_GS_BASE 857 }, 858 [NVMM_X64_SEG_GDT] = { 859 0, /* doesn't exist */ 860 0, /* doesn't exist */ 861 VMCS_GUEST_GDTR_LIMIT, 862 VMCS_GUEST_GDTR_BASE 863 }, 864 [NVMM_X64_SEG_IDT] = { 865 0, /* doesn't exist */ 866 0, /* doesn't exist */ 867 VMCS_GUEST_IDTR_LIMIT, 868 VMCS_GUEST_IDTR_BASE 869 }, 870 [NVMM_X64_SEG_LDT] = { 871 VMCS_GUEST_LDTR_SELECTOR, 872 VMCS_GUEST_LDTR_ACCESS_RIGHTS, 873 VMCS_GUEST_LDTR_LIMIT, 874 VMCS_GUEST_LDTR_BASE 875 }, 876 [NVMM_X64_SEG_TR] = { 877 VMCS_GUEST_TR_SELECTOR, 878 VMCS_GUEST_TR_ACCESS_RIGHTS, 879 VMCS_GUEST_TR_LIMIT, 880 VMCS_GUEST_TR_BASE 881 } 882 }; 883 884 /* -------------------------------------------------------------------------- */ 885 886 static uint64_t 887 vmx_get_revision(void) 888 { 889 uint64_t msr; 890 891 msr = rdmsr(MSR_IA32_VMX_BASIC); 892 msr &= IA32_VMX_BASIC_IDENT; 893 894 return msr; 895 } 896 897 static void 898 vmx_vmclear_ipi(void *arg1) 899 { 900 paddr_t vmcs_pa = (paddr_t)arg1; 901 vmx_vmclear(&vmcs_pa); 902 } 903 904 static void 905 vmx_vmclear_remote(struct globaldata *ci, paddr_t vmcs_pa) 906 { 907 #ifdef __NetBSD__ 908 uint64_t xc; 909 int bound; 910 911 KASSERT(kpreempt_disabled()); 912 913 bound = curlwp_bind(); 914 kpreempt_enable(); 915 916 xc = xc_unicast(XC_HIGHPRI, vmx_vmclear_ipi, (void *)vmcs_pa, NULL, ci); 917 xc_wait(xc); 918 919 kpreempt_disable(); 920 curlwp_bindx(bound); 921 #endif /* __NetBSD__ */ 922 923 /* 924 * No need to bind the thread, because any normal kernel thread will 925 * not migrate to another CPU or be preempted (except by an interrupt 926 * thread). 927 */ 928 lwkt_send_ipiq(ci, vmx_vmclear_ipi, (void *)vmcs_pa); 929 /* XXX: need any cpu fence ?? */ 930 } 931 932 static void 933 vmx_vmcs_enter(struct nvmm_cpu *vcpu) 934 { 935 struct vmx_cpudata *cpudata = vcpu->cpudata; 936 struct globaldata *vmcs_ci; 937 938 cpudata->vmcs_refcnt++; 939 if (cpudata->vmcs_refcnt > 1) { 940 KASSERT(kpreempt_disabled()); 941 KASSERT(vmx_vmptrst() == cpudata->vmcs_pa); 942 return; 943 } 944 945 vmcs_ci = cpudata->vmcs_ci; 946 cpudata->vmcs_ci = (void *)0x00FFFFFFFFFFFFFF; /* clobber */ 947 948 kpreempt_disable(); 949 950 if (vmcs_ci == NULL) { 951 /* This VMCS is loaded for the first time. */ 952 vmx_vmclear(&cpudata->vmcs_pa); 953 cpudata->vmcs_launched = false; 954 } else if (vmcs_ci != mycpu) { 955 /* This VMCS is active on a remote CPU. */ 956 vmx_vmclear_remote(vmcs_ci, cpudata->vmcs_pa); 957 cpudata->vmcs_launched = false; 958 } else { 959 /* This VMCS is active on curcpu, nothing to do. */ 960 } 961 962 vmx_vmptrld(&cpudata->vmcs_pa); 963 } 964 965 static void 966 vmx_vmcs_leave(struct nvmm_cpu *vcpu) 967 { 968 struct vmx_cpudata *cpudata = vcpu->cpudata; 969 970 KASSERT(kpreempt_disabled()); 971 KASSERT(vmx_vmptrst() == cpudata->vmcs_pa); 972 KASSERT(cpudata->vmcs_refcnt > 0); 973 cpudata->vmcs_refcnt--; 974 975 if (cpudata->vmcs_refcnt > 0) { 976 return; 977 } 978 979 cpudata->vmcs_ci = mycpu; 980 kpreempt_enable(); 981 } 982 983 static void 984 vmx_vmcs_destroy(struct nvmm_cpu *vcpu) 985 { 986 struct vmx_cpudata *cpudata = vcpu->cpudata; 987 988 KASSERT(kpreempt_disabled()); 989 KASSERT(vmx_vmptrst() == cpudata->vmcs_pa); 990 KASSERT(cpudata->vmcs_refcnt == 1); 991 cpudata->vmcs_refcnt--; 992 993 vmx_vmclear(&cpudata->vmcs_pa); 994 kpreempt_enable(); 995 } 996 997 /* -------------------------------------------------------------------------- */ 998 999 static void 1000 vmx_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 1001 { 1002 struct vmx_cpudata *cpudata = vcpu->cpudata; 1003 uint64_t ctls1; 1004 1005 ctls1 = vmx_vmread(VMCS_PROCBASED_CTLS); 1006 1007 if (nmi) { 1008 // XXX INT_STATE_NMI? 1009 ctls1 |= PROC_CTLS_NMI_WINDOW_EXITING; 1010 cpudata->nmi_window_exit = true; 1011 } else { 1012 ctls1 |= PROC_CTLS_INT_WINDOW_EXITING; 1013 cpudata->int_window_exit = true; 1014 } 1015 1016 vmx_vmwrite(VMCS_PROCBASED_CTLS, ctls1); 1017 } 1018 1019 static void 1020 vmx_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 1021 { 1022 struct vmx_cpudata *cpudata = vcpu->cpudata; 1023 uint64_t ctls1; 1024 1025 ctls1 = vmx_vmread(VMCS_PROCBASED_CTLS); 1026 1027 if (nmi) { 1028 ctls1 &= ~PROC_CTLS_NMI_WINDOW_EXITING; 1029 cpudata->nmi_window_exit = false; 1030 } else { 1031 ctls1 &= ~PROC_CTLS_INT_WINDOW_EXITING; 1032 cpudata->int_window_exit = false; 1033 } 1034 1035 vmx_vmwrite(VMCS_PROCBASED_CTLS, ctls1); 1036 } 1037 1038 static inline bool 1039 vmx_excp_has_rf(uint8_t vector) 1040 { 1041 switch (vector) { 1042 case 1: /* #DB */ 1043 case 4: /* #OF */ 1044 case 8: /* #DF */ 1045 case 18: /* #MC */ 1046 return false; 1047 default: 1048 return true; 1049 } 1050 } 1051 1052 static inline int 1053 vmx_excp_has_error(uint8_t vector) 1054 { 1055 switch (vector) { 1056 case 8: /* #DF */ 1057 case 10: /* #TS */ 1058 case 11: /* #NP */ 1059 case 12: /* #SS */ 1060 case 13: /* #GP */ 1061 case 14: /* #PF */ 1062 case 17: /* #AC */ 1063 case 30: /* #SX */ 1064 return 1; 1065 default: 1066 return 0; 1067 } 1068 } 1069 1070 static int 1071 vmx_vcpu_inject(struct nvmm_cpu *vcpu) 1072 { 1073 struct nvmm_comm_page *comm = vcpu->comm; 1074 struct vmx_cpudata *cpudata = vcpu->cpudata; 1075 int type = 0, err = 0, ret = EINVAL; 1076 uint64_t rflags, info, error; 1077 u_int evtype; 1078 uint8_t vector; 1079 1080 evtype = comm->event.type; 1081 vector = comm->event.vector; 1082 error = comm->event.u.excp.error; 1083 __insn_barrier(); 1084 1085 vmx_vmcs_enter(vcpu); 1086 1087 switch (evtype) { 1088 case NVMM_VCPU_EVENT_EXCP: 1089 if (vector == 2 || vector >= 32) 1090 goto out; 1091 if (vector == 3 || vector == 0) 1092 goto out; 1093 if (vmx_excp_has_rf(vector)) { 1094 rflags = vmx_vmread(VMCS_GUEST_RFLAGS); 1095 vmx_vmwrite(VMCS_GUEST_RFLAGS, rflags | PSL_RF); 1096 } 1097 type = INTR_TYPE_HW_EXC; 1098 err = vmx_excp_has_error(vector); 1099 break; 1100 case NVMM_VCPU_EVENT_INTR: 1101 type = INTR_TYPE_EXT_INT; 1102 if (vector == 2) { 1103 type = INTR_TYPE_NMI; 1104 vmx_event_waitexit_enable(vcpu, true); 1105 } 1106 err = 0; 1107 break; 1108 default: 1109 goto out; 1110 } 1111 1112 info = 1113 __SHIFTIN((uint64_t)vector, INTR_INFO_VECTOR) | 1114 __SHIFTIN((uint64_t)type, INTR_INFO_TYPE) | 1115 __SHIFTIN((uint64_t)err, INTR_INFO_ERROR) | 1116 __SHIFTIN((uint64_t)1, INTR_INFO_VALID); 1117 vmx_vmwrite(VMCS_ENTRY_INTR_INFO, info); 1118 vmx_vmwrite(VMCS_ENTRY_EXCEPTION_ERROR, error); 1119 1120 cpudata->evt_pending = true; 1121 ret = 0; 1122 1123 out: 1124 vmx_vmcs_leave(vcpu); 1125 return ret; 1126 } 1127 1128 static void 1129 vmx_inject_ud(struct nvmm_cpu *vcpu) 1130 { 1131 struct nvmm_comm_page *comm = vcpu->comm; 1132 int ret __diagused; 1133 1134 comm->event.type = NVMM_VCPU_EVENT_EXCP; 1135 comm->event.vector = 6; 1136 comm->event.u.excp.error = 0; 1137 1138 ret = vmx_vcpu_inject(vcpu); 1139 KASSERT(ret == 0); 1140 } 1141 1142 static void 1143 vmx_inject_gp(struct nvmm_cpu *vcpu) 1144 { 1145 struct nvmm_comm_page *comm = vcpu->comm; 1146 int ret __diagused; 1147 1148 comm->event.type = NVMM_VCPU_EVENT_EXCP; 1149 comm->event.vector = 13; 1150 comm->event.u.excp.error = 0; 1151 1152 ret = vmx_vcpu_inject(vcpu); 1153 KASSERT(ret == 0); 1154 } 1155 1156 static inline int 1157 vmx_vcpu_event_commit(struct nvmm_cpu *vcpu) 1158 { 1159 if (__predict_true(!vcpu->comm->event_commit)) { 1160 return 0; 1161 } 1162 vcpu->comm->event_commit = false; 1163 return vmx_vcpu_inject(vcpu); 1164 } 1165 1166 static inline void 1167 vmx_inkernel_advance(void) 1168 { 1169 uint64_t rip, inslen, intstate, rflags; 1170 1171 /* 1172 * Maybe we should also apply single-stepping and debug exceptions. 1173 * Matters for guest-ring3, because it can execute 'cpuid' under a 1174 * debugger. 1175 */ 1176 1177 inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH); 1178 rip = vmx_vmread(VMCS_GUEST_RIP); 1179 vmx_vmwrite(VMCS_GUEST_RIP, rip + inslen); 1180 1181 rflags = vmx_vmread(VMCS_GUEST_RFLAGS); 1182 vmx_vmwrite(VMCS_GUEST_RFLAGS, rflags & ~PSL_RF); 1183 1184 intstate = vmx_vmread(VMCS_GUEST_INTERRUPTIBILITY); 1185 vmx_vmwrite(VMCS_GUEST_INTERRUPTIBILITY, 1186 intstate & ~(INT_STATE_STI|INT_STATE_MOVSS)); 1187 } 1188 1189 static void 1190 vmx_exit_invalid(struct nvmm_vcpu_exit *exit, uint64_t code) 1191 { 1192 exit->u.inv.hwcode = code; 1193 exit->reason = NVMM_VCPU_EXIT_INVALID; 1194 } 1195 1196 static void 1197 vmx_exit_exc_nmi(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1198 struct nvmm_vcpu_exit *exit) 1199 { 1200 uint64_t qual; 1201 1202 qual = vmx_vmread(VMCS_EXIT_INTR_INFO); 1203 1204 if ((qual & INTR_INFO_VALID) == 0) { 1205 goto error; 1206 } 1207 if (__SHIFTOUT(qual, INTR_INFO_TYPE) != INTR_TYPE_NMI) { 1208 goto error; 1209 } 1210 1211 exit->reason = NVMM_VCPU_EXIT_NONE; 1212 return; 1213 1214 error: 1215 vmx_exit_invalid(exit, VMCS_EXITCODE_EXC_NMI); 1216 } 1217 1218 #define VMX_CPUID_MAX_BASIC 0x16 1219 #define VMX_CPUID_MAX_HYPERVISOR 0x40000000 1220 #define VMX_CPUID_MAX_EXTENDED 0x80000008 1221 static uint32_t vmx_cpuid_max_basic __read_mostly; 1222 static uint32_t vmx_cpuid_max_extended __read_mostly; 1223 1224 static void 1225 vmx_inkernel_exec_cpuid(struct vmx_cpudata *cpudata, uint64_t eax, uint64_t ecx) 1226 { 1227 u_int descs[4]; 1228 1229 x86_cpuid2(eax, ecx, descs); 1230 cpudata->gprs[NVMM_X64_GPR_RAX] = descs[0]; 1231 cpudata->gprs[NVMM_X64_GPR_RBX] = descs[1]; 1232 cpudata->gprs[NVMM_X64_GPR_RCX] = descs[2]; 1233 cpudata->gprs[NVMM_X64_GPR_RDX] = descs[3]; 1234 } 1235 1236 static void 1237 vmx_inkernel_handle_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1238 uint64_t eax, uint64_t ecx) 1239 { 1240 struct vmx_cpudata *cpudata = vcpu->cpudata; 1241 unsigned int ncpus; 1242 uint64_t cr4; 1243 1244 if (eax < 0x40000000) { 1245 if (__predict_false(eax > vmx_cpuid_max_basic)) { 1246 eax = vmx_cpuid_max_basic; 1247 vmx_inkernel_exec_cpuid(cpudata, eax, ecx); 1248 } 1249 } else if (eax < 0x80000000) { 1250 if (__predict_false(eax > VMX_CPUID_MAX_HYPERVISOR)) { 1251 eax = vmx_cpuid_max_basic; 1252 vmx_inkernel_exec_cpuid(cpudata, eax, ecx); 1253 } 1254 } else { 1255 if (__predict_false(eax > vmx_cpuid_max_extended)) { 1256 eax = vmx_cpuid_max_basic; 1257 vmx_inkernel_exec_cpuid(cpudata, eax, ecx); 1258 } 1259 } 1260 1261 switch (eax) { 1262 case 0x00000000: 1263 cpudata->gprs[NVMM_X64_GPR_RAX] = vmx_cpuid_max_basic; 1264 break; 1265 case 0x00000001: 1266 cpudata->gprs[NVMM_X64_GPR_RAX] &= nvmm_cpuid_00000001.eax; 1267 1268 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_LOCAL_APIC_ID; 1269 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 1270 CPUID_LOCAL_APIC_ID); 1271 1272 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 1273 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_RAZ; 1274 if (vmx_procbased_ctls2 & PROC_CTLS2_INVPCID_ENABLE) { 1275 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_PCID; 1276 } 1277 1278 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 1279 1280 /* CPUID2_OSXSAVE depends on CR4. */ 1281 cr4 = vmx_vmread(VMCS_GUEST_CR4); 1282 if (!(cr4 & CR4_OSXSAVE)) { 1283 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID2_OSXSAVE; 1284 } 1285 break; 1286 case 0x00000002: 1287 break; 1288 case 0x00000003: 1289 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1290 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1291 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1292 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1293 break; 1294 case 0x00000004: /* Deterministic Cache Parameters */ 1295 break; /* TODO? */ 1296 case 0x00000005: /* MONITOR/MWAIT */ 1297 case 0x00000006: /* Thermal and Power Management */ 1298 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1299 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1300 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1301 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1302 break; 1303 case 0x00000007: /* Structured Extended Feature Flags Enumeration */ 1304 switch (ecx) { 1305 case 0: 1306 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1307 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 1308 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 1309 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 1310 if (vmx_procbased_ctls2 & PROC_CTLS2_INVPCID_ENABLE) { 1311 cpudata->gprs[NVMM_X64_GPR_RBX] |= CPUID_SEF_INVPCID; 1312 } 1313 break; 1314 default: 1315 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1316 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1317 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1318 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1319 break; 1320 } 1321 break; 1322 case 0x00000008: /* Empty */ 1323 case 0x00000009: /* Direct Cache Access Information */ 1324 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1325 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1326 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1327 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1328 break; 1329 case 0x0000000A: /* Architectural Performance Monitoring */ 1330 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1331 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1332 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1333 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1334 break; 1335 case 0x0000000B: /* Extended Topology Enumeration */ 1336 switch (ecx) { 1337 case 0: /* Threads */ 1338 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1339 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1340 cpudata->gprs[NVMM_X64_GPR_RCX] = 1341 __SHIFTIN(ecx, CPUID_TOP_LVLNUM) | 1342 __SHIFTIN(CPUID_TOP_LVLTYPE_SMT, CPUID_TOP_LVLTYPE); 1343 cpudata->gprs[NVMM_X64_GPR_RDX] = vcpu->cpuid; 1344 break; 1345 case 1: /* Cores */ 1346 ncpus = atomic_load_acq_int(&mach->ncpus); 1347 cpudata->gprs[NVMM_X64_GPR_RAX] = ilog2(ncpus); 1348 cpudata->gprs[NVMM_X64_GPR_RBX] = ncpus; 1349 cpudata->gprs[NVMM_X64_GPR_RCX] = 1350 __SHIFTIN(ecx, CPUID_TOP_LVLNUM) | 1351 __SHIFTIN(CPUID_TOP_LVLTYPE_CORE, CPUID_TOP_LVLTYPE); 1352 cpudata->gprs[NVMM_X64_GPR_RDX] = vcpu->cpuid; 1353 break; 1354 default: 1355 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1356 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1357 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; /* LVLTYPE_INVAL */ 1358 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1359 break; 1360 } 1361 break; 1362 case 0x0000000C: /* Empty */ 1363 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1364 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1365 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1366 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1367 break; 1368 case 0x0000000D: /* Processor Extended State Enumeration */ 1369 if (vmx_xcr0_mask == 0) { 1370 break; 1371 } 1372 switch (ecx) { 1373 case 0: 1374 cpudata->gprs[NVMM_X64_GPR_RAX] = vmx_xcr0_mask & 0xFFFFFFFF; 1375 if (cpudata->gxcr0 & XCR0_SSE) { 1376 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct savexmm64); 1377 } else { 1378 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct save87); 1379 } 1380 cpudata->gprs[NVMM_X64_GPR_RBX] += 64; /* XSAVE header */ 1381 cpudata->gprs[NVMM_X64_GPR_RCX] = sizeof(struct savexmm64) + 64; 1382 cpudata->gprs[NVMM_X64_GPR_RDX] = vmx_xcr0_mask >> 32; 1383 break; 1384 case 1: 1385 cpudata->gprs[NVMM_X64_GPR_RAX] &= 1386 (CPUID_PES1_XSAVEOPT | CPUID_PES1_XSAVEC | 1387 CPUID_PES1_XGETBV); 1388 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1389 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1390 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1391 break; 1392 default: 1393 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1394 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1395 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1396 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1397 break; 1398 } 1399 break; 1400 case 0x0000000E: /* Empty */ 1401 case 0x0000000F: /* Intel RDT Monitoring Enumeration */ 1402 case 0x00000010: /* Intel RDT Allocation Enumeration */ 1403 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1404 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1405 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1406 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1407 break; 1408 case 0x00000011: /* Empty */ 1409 case 0x00000012: /* Intel SGX Capability Enumeration */ 1410 case 0x00000013: /* Empty */ 1411 case 0x00000014: /* Intel Processor Trace Enumeration */ 1412 cpudata->gprs[NVMM_X64_GPR_RAX] = 0; 1413 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1414 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1415 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1416 break; 1417 case 0x00000015: /* TSC and Nominal Core Crystal Clock Information */ 1418 case 0x00000016: /* Processor Frequency Information */ 1419 break; 1420 1421 case 0x40000000: /* Hypervisor Information */ 1422 cpudata->gprs[NVMM_X64_GPR_RAX] = VMX_CPUID_MAX_HYPERVISOR; 1423 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1424 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1425 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1426 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 1427 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 1428 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 1429 break; 1430 1431 case 0x80000000: 1432 cpudata->gprs[NVMM_X64_GPR_RAX] = vmx_cpuid_max_extended; 1433 break; 1434 case 0x80000001: 1435 cpudata->gprs[NVMM_X64_GPR_RAX] &= nvmm_cpuid_80000001.eax; 1436 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 1437 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 1438 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 1439 break; 1440 case 0x80000002: /* Processor Brand String */ 1441 case 0x80000003: /* Processor Brand String */ 1442 case 0x80000004: /* Processor Brand String */ 1443 case 0x80000005: /* Reserved Zero */ 1444 case 0x80000006: /* Cache Information */ 1445 break; 1446 case 0x80000007: /* TSC Information */ 1447 cpudata->gprs[NVMM_X64_GPR_RAX] &= nvmm_cpuid_80000007.eax; 1448 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000007.ebx; 1449 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000007.ecx; 1450 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000007.edx; 1451 break; 1452 case 0x80000008: /* Address Sizes */ 1453 cpudata->gprs[NVMM_X64_GPR_RAX] &= nvmm_cpuid_80000008.eax; 1454 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000008.ebx; 1455 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000008.ecx; 1456 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000008.edx; 1457 break; 1458 1459 default: 1460 break; 1461 } 1462 } 1463 1464 static void 1465 vmx_exit_insn(struct nvmm_vcpu_exit *exit, uint64_t reason) 1466 { 1467 uint64_t inslen, rip; 1468 1469 inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH); 1470 rip = vmx_vmread(VMCS_GUEST_RIP); 1471 exit->u.insn.npc = rip + inslen; 1472 exit->reason = reason; 1473 } 1474 1475 static void 1476 vmx_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1477 struct nvmm_vcpu_exit *exit) 1478 { 1479 struct vmx_cpudata *cpudata = vcpu->cpudata; 1480 struct nvmm_vcpu_conf_cpuid *cpuid; 1481 uint64_t eax, ecx; 1482 size_t i; 1483 1484 eax = cpudata->gprs[NVMM_X64_GPR_RAX]; 1485 ecx = cpudata->gprs[NVMM_X64_GPR_RCX]; 1486 vmx_inkernel_exec_cpuid(cpudata, eax, ecx); 1487 vmx_inkernel_handle_cpuid(mach, vcpu, eax, ecx); 1488 1489 for (i = 0; i < VMX_NCPUIDS; i++) { 1490 if (!cpudata->cpuidpresent[i]) { 1491 continue; 1492 } 1493 cpuid = &cpudata->cpuid[i]; 1494 if (cpuid->leaf != eax) { 1495 continue; 1496 } 1497 1498 if (cpuid->exit) { 1499 vmx_exit_insn(exit, NVMM_VCPU_EXIT_CPUID); 1500 return; 1501 } 1502 KASSERT(cpuid->mask); 1503 1504 /* del */ 1505 cpudata->gprs[NVMM_X64_GPR_RAX] &= ~cpuid->u.mask.del.eax; 1506 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->u.mask.del.ebx; 1507 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->u.mask.del.ecx; 1508 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->u.mask.del.edx; 1509 1510 /* set */ 1511 cpudata->gprs[NVMM_X64_GPR_RAX] |= cpuid->u.mask.set.eax; 1512 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->u.mask.set.ebx; 1513 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->u.mask.set.ecx; 1514 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->u.mask.set.edx; 1515 1516 break; 1517 } 1518 1519 vmx_inkernel_advance(); 1520 exit->reason = NVMM_VCPU_EXIT_NONE; 1521 } 1522 1523 static void 1524 vmx_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1525 struct nvmm_vcpu_exit *exit) 1526 { 1527 struct vmx_cpudata *cpudata = vcpu->cpudata; 1528 uint64_t rflags; 1529 1530 if (cpudata->int_window_exit) { 1531 rflags = vmx_vmread(VMCS_GUEST_RFLAGS); 1532 if (rflags & PSL_I) { 1533 vmx_event_waitexit_disable(vcpu, false); 1534 } 1535 } 1536 1537 vmx_inkernel_advance(); 1538 exit->reason = NVMM_VCPU_EXIT_HALTED; 1539 } 1540 1541 #define VMX_QUAL_CR_NUM __BITS(3,0) 1542 #define VMX_QUAL_CR_TYPE __BITS(5,4) 1543 #define CR_TYPE_WRITE 0 1544 #define CR_TYPE_READ 1 1545 #define CR_TYPE_CLTS 2 1546 #define CR_TYPE_LMSW 3 1547 #define VMX_QUAL_CR_LMSW_OPMEM __BIT(6) 1548 #define VMX_QUAL_CR_GPR __BITS(11,8) 1549 #define VMX_QUAL_CR_LMSW_SRC __BIT(31,16) 1550 1551 static inline int 1552 vmx_check_cr(uint64_t crval, uint64_t fixed0, uint64_t fixed1) 1553 { 1554 /* Bits set to 1 in fixed0 are fixed to 1. */ 1555 if ((crval & fixed0) != fixed0) { 1556 return -1; 1557 } 1558 /* Bits set to 0 in fixed1 are fixed to 0. */ 1559 if (crval & ~fixed1) { 1560 return -1; 1561 } 1562 return 0; 1563 } 1564 1565 static int 1566 vmx_inkernel_handle_cr0(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1567 uint64_t qual) 1568 { 1569 struct vmx_cpudata *cpudata = vcpu->cpudata; 1570 uint64_t type, gpr, cr0; 1571 uint64_t efer, ctls1; 1572 1573 type = __SHIFTOUT(qual, VMX_QUAL_CR_TYPE); 1574 if (type != CR_TYPE_WRITE) { 1575 return -1; 1576 } 1577 1578 gpr = __SHIFTOUT(qual, VMX_QUAL_CR_GPR); 1579 KASSERT(gpr < 16); 1580 1581 if (gpr == NVMM_X64_GPR_RSP) { 1582 gpr = vmx_vmread(VMCS_GUEST_RSP); 1583 } else { 1584 gpr = cpudata->gprs[gpr]; 1585 } 1586 1587 cr0 = gpr | CR0_NE | CR0_ET; 1588 cr0 &= ~(CR0_NW|CR0_CD); 1589 1590 if (vmx_check_cr(cr0, vmx_cr0_fixed0, vmx_cr0_fixed1) == -1) { 1591 return -1; 1592 } 1593 1594 /* 1595 * XXX Handle 32bit PAE paging, need to set PDPTEs, fetched manually 1596 * from CR3. 1597 */ 1598 1599 if (cr0 & CR0_PG) { 1600 ctls1 = vmx_vmread(VMCS_ENTRY_CTLS); 1601 efer = vmx_vmread(VMCS_GUEST_IA32_EFER); 1602 if (efer & EFER_LME) { 1603 ctls1 |= ENTRY_CTLS_LONG_MODE; 1604 efer |= EFER_LMA; 1605 } else { 1606 ctls1 &= ~ENTRY_CTLS_LONG_MODE; 1607 efer &= ~EFER_LMA; 1608 } 1609 vmx_vmwrite(VMCS_GUEST_IA32_EFER, efer); 1610 vmx_vmwrite(VMCS_ENTRY_CTLS, ctls1); 1611 } 1612 1613 vmx_vmwrite(VMCS_GUEST_CR0, cr0); 1614 vmx_inkernel_advance(); 1615 return 0; 1616 } 1617 1618 static int 1619 vmx_inkernel_handle_cr4(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1620 uint64_t qual) 1621 { 1622 struct vmx_cpudata *cpudata = vcpu->cpudata; 1623 uint64_t type, gpr, cr4; 1624 1625 type = __SHIFTOUT(qual, VMX_QUAL_CR_TYPE); 1626 if (type != CR_TYPE_WRITE) { 1627 return -1; 1628 } 1629 1630 gpr = __SHIFTOUT(qual, VMX_QUAL_CR_GPR); 1631 KASSERT(gpr < 16); 1632 1633 if (gpr == NVMM_X64_GPR_RSP) { 1634 gpr = vmx_vmread(VMCS_GUEST_RSP); 1635 } else { 1636 gpr = cpudata->gprs[gpr]; 1637 } 1638 1639 if (gpr & CR4_INVALID) { 1640 return -1; 1641 } 1642 cr4 = gpr | CR4_VMXE; 1643 if (vmx_check_cr(cr4, vmx_cr4_fixed0, vmx_cr4_fixed1) == -1) { 1644 return -1; 1645 } 1646 1647 if ((vmx_vmread(VMCS_GUEST_CR4) ^ cr4) & CR4_TLB_FLUSH) { 1648 cpudata->gtlb_want_flush = true; 1649 } 1650 1651 vmx_vmwrite(VMCS_GUEST_CR4, cr4); 1652 vmx_inkernel_advance(); 1653 return 0; 1654 } 1655 1656 static int 1657 vmx_inkernel_handle_cr8(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1658 uint64_t qual, struct nvmm_vcpu_exit *exit) 1659 { 1660 struct vmx_cpudata *cpudata = vcpu->cpudata; 1661 uint64_t type, gpr; 1662 bool write; 1663 1664 type = __SHIFTOUT(qual, VMX_QUAL_CR_TYPE); 1665 if (type == CR_TYPE_WRITE) { 1666 write = true; 1667 } else if (type == CR_TYPE_READ) { 1668 write = false; 1669 } else { 1670 return -1; 1671 } 1672 1673 gpr = __SHIFTOUT(qual, VMX_QUAL_CR_GPR); 1674 KASSERT(gpr < 16); 1675 1676 if (write) { 1677 if (gpr == NVMM_X64_GPR_RSP) { 1678 cpudata->gcr8 = vmx_vmread(VMCS_GUEST_RSP); 1679 } else { 1680 cpudata->gcr8 = cpudata->gprs[gpr]; 1681 } 1682 if (cpudata->tpr.exit_changed) { 1683 exit->reason = NVMM_VCPU_EXIT_TPR_CHANGED; 1684 } 1685 } else { 1686 if (gpr == NVMM_X64_GPR_RSP) { 1687 vmx_vmwrite(VMCS_GUEST_RSP, cpudata->gcr8); 1688 } else { 1689 cpudata->gprs[gpr] = cpudata->gcr8; 1690 } 1691 } 1692 1693 vmx_inkernel_advance(); 1694 return 0; 1695 } 1696 1697 static void 1698 vmx_exit_cr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1699 struct nvmm_vcpu_exit *exit) 1700 { 1701 uint64_t qual; 1702 int ret; 1703 1704 exit->reason = NVMM_VCPU_EXIT_NONE; 1705 1706 qual = vmx_vmread(VMCS_EXIT_QUALIFICATION); 1707 1708 switch (__SHIFTOUT(qual, VMX_QUAL_CR_NUM)) { 1709 case 0: 1710 ret = vmx_inkernel_handle_cr0(mach, vcpu, qual); 1711 break; 1712 case 4: 1713 ret = vmx_inkernel_handle_cr4(mach, vcpu, qual); 1714 break; 1715 case 8: 1716 ret = vmx_inkernel_handle_cr8(mach, vcpu, qual, exit); 1717 break; 1718 default: 1719 ret = -1; 1720 break; 1721 } 1722 1723 if (ret == -1) { 1724 vmx_inject_gp(vcpu); 1725 } 1726 } 1727 1728 #define VMX_QUAL_IO_SIZE __BITS(2,0) 1729 #define IO_SIZE_8 0 1730 #define IO_SIZE_16 1 1731 #define IO_SIZE_32 3 1732 #define VMX_QUAL_IO_IN __BIT(3) 1733 #define VMX_QUAL_IO_STR __BIT(4) 1734 #define VMX_QUAL_IO_REP __BIT(5) 1735 #define VMX_QUAL_IO_DX __BIT(6) 1736 #define VMX_QUAL_IO_PORT __BITS(31,16) 1737 1738 #define VMX_INFO_IO_ADRSIZE __BITS(9,7) 1739 #define IO_ADRSIZE_16 0 1740 #define IO_ADRSIZE_32 1 1741 #define IO_ADRSIZE_64 2 1742 #define VMX_INFO_IO_SEG __BITS(17,15) 1743 1744 static void 1745 vmx_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1746 struct nvmm_vcpu_exit *exit) 1747 { 1748 uint64_t qual, info, inslen, rip; 1749 1750 qual = vmx_vmread(VMCS_EXIT_QUALIFICATION); 1751 info = vmx_vmread(VMCS_EXIT_INSTRUCTION_INFO); 1752 1753 exit->reason = NVMM_VCPU_EXIT_IO; 1754 1755 exit->u.io.in = (qual & VMX_QUAL_IO_IN) != 0; 1756 exit->u.io.port = __SHIFTOUT(qual, VMX_QUAL_IO_PORT); 1757 1758 KASSERT(__SHIFTOUT(info, VMX_INFO_IO_SEG) < 6); 1759 exit->u.io.seg = __SHIFTOUT(info, VMX_INFO_IO_SEG); 1760 1761 if (__SHIFTOUT(info, VMX_INFO_IO_ADRSIZE) == IO_ADRSIZE_64) { 1762 exit->u.io.address_size = 8; 1763 } else if (__SHIFTOUT(info, VMX_INFO_IO_ADRSIZE) == IO_ADRSIZE_32) { 1764 exit->u.io.address_size = 4; 1765 } else if (__SHIFTOUT(info, VMX_INFO_IO_ADRSIZE) == IO_ADRSIZE_16) { 1766 exit->u.io.address_size = 2; 1767 } 1768 1769 if (__SHIFTOUT(qual, VMX_QUAL_IO_SIZE) == IO_SIZE_32) { 1770 exit->u.io.operand_size = 4; 1771 } else if (__SHIFTOUT(qual, VMX_QUAL_IO_SIZE) == IO_SIZE_16) { 1772 exit->u.io.operand_size = 2; 1773 } else if (__SHIFTOUT(qual, VMX_QUAL_IO_SIZE) == IO_SIZE_8) { 1774 exit->u.io.operand_size = 1; 1775 } 1776 1777 exit->u.io.rep = (qual & VMX_QUAL_IO_REP) != 0; 1778 exit->u.io.str = (qual & VMX_QUAL_IO_STR) != 0; 1779 1780 if (exit->u.io.in && exit->u.io.str) { 1781 exit->u.io.seg = NVMM_X64_SEG_ES; 1782 } 1783 1784 inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH); 1785 rip = vmx_vmread(VMCS_GUEST_RIP); 1786 exit->u.io.npc = rip + inslen; 1787 1788 vmx_vcpu_state_provide(vcpu, 1789 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1790 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1791 } 1792 1793 static const uint64_t msr_ignore_list[] = { 1794 MSR_BIOS_SIGN, 1795 MSR_IA32_PLATFORM_ID 1796 }; 1797 1798 static bool 1799 vmx_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1800 struct nvmm_vcpu_exit *exit) 1801 { 1802 struct vmx_cpudata *cpudata = vcpu->cpudata; 1803 uint64_t val; 1804 size_t i; 1805 1806 if (exit->reason == NVMM_VCPU_EXIT_RDMSR) { 1807 if (exit->u.rdmsr.msr == MSR_CR_PAT) { 1808 val = vmx_vmread(VMCS_GUEST_IA32_PAT); 1809 cpudata->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF); 1810 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1811 goto handled; 1812 } 1813 if (exit->u.rdmsr.msr == MSR_MISC_ENABLE) { 1814 val = cpudata->gmsr_misc_enable; 1815 cpudata->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF); 1816 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1817 goto handled; 1818 } 1819 if (exit->u.rdmsr.msr == MSR_IA32_ARCH_CAPABILITIES) { 1820 u_int descs[4]; 1821 if (cpuid_level < 7) { 1822 goto error; 1823 } 1824 x86_cpuid(7, descs); 1825 if (!(descs[3] & CPUID_SEF_ARCH_CAP)) { 1826 goto error; 1827 } 1828 val = rdmsr(MSR_IA32_ARCH_CAPABILITIES); 1829 val &= (IA32_ARCH_RDCL_NO | 1830 IA32_ARCH_SSB_NO | 1831 IA32_ARCH_MDS_NO | 1832 IA32_ARCH_TAA_NO); 1833 cpudata->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF); 1834 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1835 goto handled; 1836 } 1837 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1838 if (msr_ignore_list[i] != exit->u.rdmsr.msr) 1839 continue; 1840 val = 0; 1841 cpudata->gprs[NVMM_X64_GPR_RAX] = (val & 0xFFFFFFFF); 1842 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1843 goto handled; 1844 } 1845 } else { 1846 if (exit->u.wrmsr.msr == MSR_TSC) { 1847 cpudata->gtsc = exit->u.wrmsr.val; 1848 cpudata->gtsc_want_update = true; 1849 goto handled; 1850 } 1851 if (exit->u.wrmsr.msr == MSR_CR_PAT) { 1852 val = exit->u.wrmsr.val; 1853 if (__predict_false(!nvmm_x86_pat_validate(val))) { 1854 goto error; 1855 } 1856 vmx_vmwrite(VMCS_GUEST_IA32_PAT, val); 1857 goto handled; 1858 } 1859 if (exit->u.wrmsr.msr == MSR_MISC_ENABLE) { 1860 /* Don't care. */ 1861 goto handled; 1862 } 1863 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1864 if (msr_ignore_list[i] != exit->u.wrmsr.msr) 1865 continue; 1866 goto handled; 1867 } 1868 } 1869 1870 return false; 1871 1872 handled: 1873 vmx_inkernel_advance(); 1874 return true; 1875 1876 error: 1877 vmx_inject_gp(vcpu); 1878 return true; 1879 } 1880 1881 static void 1882 vmx_exit_rdmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1883 struct nvmm_vcpu_exit *exit) 1884 { 1885 struct vmx_cpudata *cpudata = vcpu->cpudata; 1886 uint64_t inslen, rip; 1887 1888 exit->reason = NVMM_VCPU_EXIT_RDMSR; 1889 exit->u.rdmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1890 1891 if (vmx_inkernel_handle_msr(mach, vcpu, exit)) { 1892 exit->reason = NVMM_VCPU_EXIT_NONE; 1893 return; 1894 } 1895 1896 inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH); 1897 rip = vmx_vmread(VMCS_GUEST_RIP); 1898 exit->u.rdmsr.npc = rip + inslen; 1899 1900 vmx_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1901 } 1902 1903 static void 1904 vmx_exit_wrmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1905 struct nvmm_vcpu_exit *exit) 1906 { 1907 struct vmx_cpudata *cpudata = vcpu->cpudata; 1908 uint64_t rdx, rax, inslen, rip; 1909 1910 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1911 rax = cpudata->gprs[NVMM_X64_GPR_RAX]; 1912 1913 exit->reason = NVMM_VCPU_EXIT_WRMSR; 1914 exit->u.wrmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1915 exit->u.wrmsr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1916 1917 if (vmx_inkernel_handle_msr(mach, vcpu, exit)) { 1918 exit->reason = NVMM_VCPU_EXIT_NONE; 1919 return; 1920 } 1921 1922 inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH); 1923 rip = vmx_vmread(VMCS_GUEST_RIP); 1924 exit->u.wrmsr.npc = rip + inslen; 1925 1926 vmx_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1927 } 1928 1929 static void 1930 vmx_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1931 struct nvmm_vcpu_exit *exit) 1932 { 1933 struct vmx_cpudata *cpudata = vcpu->cpudata; 1934 uint64_t val; 1935 1936 exit->reason = NVMM_VCPU_EXIT_NONE; 1937 1938 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1939 (cpudata->gprs[NVMM_X64_GPR_RAX] & 0xFFFFFFFF); 1940 1941 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1942 goto error; 1943 } else if (__predict_false((val & ~vmx_xcr0_mask) != 0)) { 1944 goto error; 1945 } else if (__predict_false((val & XCR0_X87) == 0)) { 1946 goto error; 1947 } 1948 1949 cpudata->gxcr0 = val; 1950 1951 vmx_inkernel_advance(); 1952 return; 1953 1954 error: 1955 vmx_inject_gp(vcpu); 1956 } 1957 1958 #define VMX_EPT_VIOLATION_READ __BIT(0) 1959 #define VMX_EPT_VIOLATION_WRITE __BIT(1) 1960 #define VMX_EPT_VIOLATION_EXECUTE __BIT(2) 1961 1962 static void 1963 vmx_exit_epf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1964 struct nvmm_vcpu_exit *exit) 1965 { 1966 uint64_t perm; 1967 gpaddr_t gpa; 1968 1969 gpa = vmx_vmread(VMCS_GUEST_PHYSICAL_ADDRESS); 1970 1971 exit->reason = NVMM_VCPU_EXIT_MEMORY; 1972 perm = vmx_vmread(VMCS_EXIT_QUALIFICATION); 1973 if (perm & VMX_EPT_VIOLATION_WRITE) 1974 exit->u.mem.prot = PROT_WRITE; 1975 else if (perm & VMX_EPT_VIOLATION_EXECUTE) 1976 exit->u.mem.prot = PROT_EXEC; 1977 else 1978 exit->u.mem.prot = PROT_READ; 1979 exit->u.mem.gpa = gpa; 1980 exit->u.mem.inst_len = 0; 1981 1982 vmx_vcpu_state_provide(vcpu, 1983 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1984 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1985 } 1986 1987 /* -------------------------------------------------------------------------- */ 1988 1989 static void 1990 vmx_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1991 { 1992 struct vmx_cpudata *cpudata = vcpu->cpudata; 1993 1994 cpudata->ts_set = (rcr0() & CR0_TS) != 0; 1995 1996 #ifdef __NetBSD__ 1997 fpu_area_save(&cpudata->hfpu, vmx_xcr0_mask); 1998 fpu_area_restore(&cpudata->gfpu, vmx_xcr0_mask); 1999 #else /* DragonFly */ 2000 /* 2001 * NOTE: Host FPU state depends on whether the user program used the 2002 * FPU or not. Need to use npxpush()/npxpop() to handle this. 2003 */ 2004 npxpush(&cpudata->hmctx); 2005 clts(); 2006 fpurstor(&cpudata->gfpu, vmx_xcr0_mask); 2007 #endif 2008 2009 if (vmx_xcr0_mask != 0) { 2010 cpudata->hxcr0 = rdxcr(0); 2011 wrxcr(0, cpudata->gxcr0); 2012 } 2013 } 2014 2015 static void 2016 vmx_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 2017 { 2018 struct vmx_cpudata *cpudata = vcpu->cpudata; 2019 2020 if (vmx_xcr0_mask != 0) { 2021 cpudata->gxcr0 = rdxcr(0); 2022 wrxcr(0, cpudata->hxcr0); 2023 } 2024 2025 #ifdef __NetBSD__ 2026 fpu_area_save(&cpudata->gfpu, vmx_xcr0_mask); 2027 fpu_area_restore(&cpudata->hfpu, vmx_xcr0_mask); 2028 #else /* DragonFly */ 2029 fpusave(&cpudata->gfpu, vmx_xcr0_mask); 2030 stts(); 2031 npxpop(&cpudata->hmctx); 2032 #endif 2033 2034 if (cpudata->ts_set) { 2035 stts(); 2036 } 2037 } 2038 2039 static void 2040 vmx_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 2041 { 2042 struct vmx_cpudata *cpudata = vcpu->cpudata; 2043 2044 x86_dbregs_save(curlwp); 2045 2046 ldr7(0); 2047 2048 ldr0(cpudata->drs[NVMM_X64_DR_DR0]); 2049 ldr1(cpudata->drs[NVMM_X64_DR_DR1]); 2050 ldr2(cpudata->drs[NVMM_X64_DR_DR2]); 2051 ldr3(cpudata->drs[NVMM_X64_DR_DR3]); 2052 ldr6(cpudata->drs[NVMM_X64_DR_DR6]); 2053 } 2054 2055 static void 2056 vmx_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 2057 { 2058 struct vmx_cpudata *cpudata = vcpu->cpudata; 2059 2060 cpudata->drs[NVMM_X64_DR_DR0] = rdr0(); 2061 cpudata->drs[NVMM_X64_DR_DR1] = rdr1(); 2062 cpudata->drs[NVMM_X64_DR_DR2] = rdr2(); 2063 cpudata->drs[NVMM_X64_DR_DR3] = rdr3(); 2064 cpudata->drs[NVMM_X64_DR_DR6] = rdr6(); 2065 2066 x86_dbregs_restore(curlwp); 2067 } 2068 2069 static void 2070 vmx_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 2071 { 2072 struct vmx_cpudata *cpudata = vcpu->cpudata; 2073 2074 /* This gets restored automatically by the CPU. */ 2075 vmx_vmwrite(VMCS_HOST_FS_BASE, rdmsr(MSR_FSBASE)); 2076 vmx_vmwrite(VMCS_HOST_CR3, rcr3()); 2077 vmx_vmwrite(VMCS_HOST_CR4, rcr4()); 2078 2079 cpudata->kernelgsbase = rdmsr(MSR_KERNELGSBASE); 2080 } 2081 2082 static void 2083 vmx_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 2084 { 2085 struct vmx_cpudata *cpudata = vcpu->cpudata; 2086 2087 wrmsr(MSR_STAR, cpudata->star); 2088 wrmsr(MSR_LSTAR, cpudata->lstar); 2089 wrmsr(MSR_CSTAR, cpudata->cstar); 2090 wrmsr(MSR_SFMASK, cpudata->sfmask); 2091 wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase); 2092 } 2093 2094 /* -------------------------------------------------------------------------- */ 2095 2096 #define VMX_INVVPID_ADDRESS 0 2097 #define VMX_INVVPID_CONTEXT 1 2098 #define VMX_INVVPID_ALL 2 2099 #define VMX_INVVPID_CONTEXT_NOGLOBAL 3 2100 2101 #define VMX_INVEPT_CONTEXT 1 2102 #define VMX_INVEPT_ALL 2 2103 2104 static inline void 2105 vmx_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 2106 { 2107 struct vmx_cpudata *cpudata = vcpu->cpudata; 2108 2109 if (vcpu->hcpu_last != hcpu) { 2110 cpudata->gtlb_want_flush = true; 2111 } 2112 } 2113 2114 static inline void 2115 vmx_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 2116 { 2117 struct vmx_cpudata *cpudata = vcpu->cpudata; 2118 struct ept_desc ept_desc; 2119 2120 if (__predict_true(!CPUMASK_TESTBIT(cpudata->htlb_want_flush, hcpu))) { 2121 return; 2122 } 2123 2124 ept_desc.eptp = vmx_vmread(VMCS_EPTP); 2125 ept_desc.mbz = 0; 2126 vmx_invept(vmx_ept_flush_op, &ept_desc); 2127 ATOMIC_CPUMASK_NANDBIT(cpudata->htlb_want_flush, hcpu); 2128 } 2129 2130 static inline uint64_t 2131 vmx_htlb_flush(struct vmx_machdata *machdata, struct vmx_cpudata *cpudata) 2132 { 2133 struct ept_desc ept_desc; 2134 uint64_t machgen; 2135 2136 machgen = machdata->mach_htlb_gen; 2137 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 2138 return machgen; 2139 } 2140 2141 ATOMIC_CPUMASK_ORMASK(cpudata->htlb_want_flush, smp_active_mask); 2142 2143 ept_desc.eptp = vmx_vmread(VMCS_EPTP); 2144 ept_desc.mbz = 0; 2145 vmx_invept(vmx_ept_flush_op, &ept_desc); 2146 2147 return machgen; 2148 } 2149 2150 static inline void 2151 vmx_htlb_flush_ack(struct vmx_cpudata *cpudata, uint64_t machgen) 2152 { 2153 cpudata->vcpu_htlb_gen = machgen; 2154 ATOMIC_CPUMASK_NANDBIT(cpudata->htlb_want_flush, mycpuid); 2155 } 2156 2157 static inline void 2158 vmx_exit_evt(struct vmx_cpudata *cpudata) 2159 { 2160 uint64_t info, err, inslen; 2161 2162 cpudata->evt_pending = false; 2163 2164 info = vmx_vmread(VMCS_IDT_VECTORING_INFO); 2165 if (__predict_true((info & INTR_INFO_VALID) == 0)) { 2166 return; 2167 } 2168 err = vmx_vmread(VMCS_IDT_VECTORING_ERROR); 2169 2170 vmx_vmwrite(VMCS_ENTRY_INTR_INFO, info); 2171 vmx_vmwrite(VMCS_ENTRY_EXCEPTION_ERROR, err); 2172 2173 switch (__SHIFTOUT(info, INTR_INFO_TYPE)) { 2174 case INTR_TYPE_SW_INT: 2175 case INTR_TYPE_PRIV_SW_EXC: 2176 case INTR_TYPE_SW_EXC: 2177 inslen = vmx_vmread(VMCS_EXIT_INSTRUCTION_LENGTH); 2178 vmx_vmwrite(VMCS_ENTRY_INSTRUCTION_LENGTH, inslen); 2179 } 2180 2181 cpudata->evt_pending = true; 2182 } 2183 2184 static int 2185 vmx_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 2186 struct nvmm_vcpu_exit *exit) 2187 { 2188 struct nvmm_comm_page *comm = vcpu->comm; 2189 struct vmx_machdata *machdata = mach->machdata; 2190 struct vmx_cpudata *cpudata = vcpu->cpudata; 2191 struct vpid_desc vpid_desc; 2192 struct globaldata *gd; 2193 uint64_t exitcode; 2194 uint64_t intstate; 2195 uint64_t machgen; 2196 int hcpu, s, ret; 2197 bool launched; 2198 2199 vmx_vmcs_enter(vcpu); 2200 2201 vmx_vcpu_state_commit(vcpu); 2202 comm->state_cached = 0; 2203 2204 if (__predict_false(vmx_vcpu_event_commit(vcpu) != 0)) { 2205 vmx_vmcs_leave(vcpu); 2206 return EINVAL; 2207 } 2208 2209 gd = mycpu; 2210 hcpu = gd->gd_cpuid; 2211 launched = cpudata->vmcs_launched; 2212 2213 vmx_gtlb_catchup(vcpu, hcpu); 2214 vmx_htlb_catchup(vcpu, hcpu); 2215 2216 if (vcpu->hcpu_last != hcpu) { 2217 #ifdef __NetBSD__ 2218 vmx_vmwrite(VMCS_HOST_TR_SELECTOR, ci->ci_tss_sel); 2219 vmx_vmwrite(VMCS_HOST_TR_BASE, (uint64_t)ci->ci_tss); 2220 vmx_vmwrite(VMCS_HOST_GDTR_BASE, (uint64_t)ci->ci_gdt); 2221 #else /* DragonFly */ 2222 vmx_vmwrite(VMCS_HOST_TR_SELECTOR, GSEL(GPROC0_SEL, SEL_KPL)); 2223 vmx_vmwrite(VMCS_HOST_TR_BASE, 2224 (uint64_t)&gd->gd_prvspace->common_tss); 2225 vmx_vmwrite(VMCS_HOST_GDTR_BASE, (uint64_t)&gdt[hcpu * NGDT]); 2226 #endif /* __NetBSD__ */ 2227 vmx_vmwrite(VMCS_HOST_GS_BASE, rdmsr(MSR_GSBASE)); 2228 cpudata->gtsc_want_update = true; 2229 vcpu->hcpu_last = hcpu; 2230 } 2231 2232 vmx_vcpu_guest_dbregs_enter(vcpu); 2233 vmx_vcpu_guest_misc_enter(vcpu); 2234 2235 while (1) { 2236 if (cpudata->gtlb_want_flush) { 2237 vpid_desc.vpid = cpudata->asid; 2238 vpid_desc.addr = 0; 2239 vmx_invvpid(vmx_tlb_flush_op, &vpid_desc); 2240 cpudata->gtlb_want_flush = false; 2241 } 2242 2243 if (__predict_false(cpudata->gtsc_want_update)) { 2244 vmx_vmwrite(VMCS_TSC_OFFSET, cpudata->gtsc - rdtsc()); 2245 cpudata->gtsc_want_update = false; 2246 } 2247 2248 s = splhigh(); 2249 machgen = vmx_htlb_flush(machdata, cpudata); 2250 vmx_vcpu_guest_fpu_enter(vcpu); 2251 lcr2(cpudata->gcr2); 2252 if (launched) { 2253 ret = vmx_vmresume(cpudata->gprs); 2254 } else { 2255 ret = vmx_vmlaunch(cpudata->gprs); 2256 } 2257 cpudata->gcr2 = rcr2(); 2258 vmx_vcpu_guest_fpu_leave(vcpu); 2259 vmx_htlb_flush_ack(cpudata, machgen); 2260 splx(s); 2261 2262 if (__predict_false(ret != 0)) { 2263 vmx_exit_invalid(exit, -1); 2264 break; 2265 } 2266 vmx_exit_evt(cpudata); 2267 2268 launched = true; 2269 2270 exitcode = vmx_vmread(VMCS_EXIT_REASON); 2271 exitcode &= __BITS(15,0); 2272 2273 switch (exitcode) { 2274 case VMCS_EXITCODE_EXC_NMI: 2275 vmx_exit_exc_nmi(mach, vcpu, exit); 2276 break; 2277 case VMCS_EXITCODE_EXT_INT: 2278 exit->reason = NVMM_VCPU_EXIT_NONE; 2279 break; 2280 case VMCS_EXITCODE_CPUID: 2281 vmx_exit_cpuid(mach, vcpu, exit); 2282 break; 2283 case VMCS_EXITCODE_HLT: 2284 vmx_exit_hlt(mach, vcpu, exit); 2285 break; 2286 case VMCS_EXITCODE_CR: 2287 vmx_exit_cr(mach, vcpu, exit); 2288 break; 2289 case VMCS_EXITCODE_IO: 2290 vmx_exit_io(mach, vcpu, exit); 2291 break; 2292 case VMCS_EXITCODE_RDMSR: 2293 vmx_exit_rdmsr(mach, vcpu, exit); 2294 break; 2295 case VMCS_EXITCODE_WRMSR: 2296 vmx_exit_wrmsr(mach, vcpu, exit); 2297 break; 2298 case VMCS_EXITCODE_SHUTDOWN: 2299 exit->reason = NVMM_VCPU_EXIT_SHUTDOWN; 2300 break; 2301 case VMCS_EXITCODE_MONITOR: 2302 vmx_exit_insn(exit, NVMM_VCPU_EXIT_MONITOR); 2303 break; 2304 case VMCS_EXITCODE_MWAIT: 2305 vmx_exit_insn(exit, NVMM_VCPU_EXIT_MWAIT); 2306 break; 2307 case VMCS_EXITCODE_XSETBV: 2308 vmx_exit_xsetbv(mach, vcpu, exit); 2309 break; 2310 case VMCS_EXITCODE_RDPMC: 2311 case VMCS_EXITCODE_RDTSCP: 2312 case VMCS_EXITCODE_INVVPID: 2313 case VMCS_EXITCODE_INVEPT: 2314 case VMCS_EXITCODE_VMCALL: 2315 case VMCS_EXITCODE_VMCLEAR: 2316 case VMCS_EXITCODE_VMLAUNCH: 2317 case VMCS_EXITCODE_VMPTRLD: 2318 case VMCS_EXITCODE_VMPTRST: 2319 case VMCS_EXITCODE_VMREAD: 2320 case VMCS_EXITCODE_VMRESUME: 2321 case VMCS_EXITCODE_VMWRITE: 2322 case VMCS_EXITCODE_VMXOFF: 2323 case VMCS_EXITCODE_VMXON: 2324 vmx_inject_ud(vcpu); 2325 exit->reason = NVMM_VCPU_EXIT_NONE; 2326 break; 2327 case VMCS_EXITCODE_EPT_VIOLATION: 2328 vmx_exit_epf(mach, vcpu, exit); 2329 break; 2330 case VMCS_EXITCODE_INT_WINDOW: 2331 vmx_event_waitexit_disable(vcpu, false); 2332 exit->reason = NVMM_VCPU_EXIT_INT_READY; 2333 break; 2334 case VMCS_EXITCODE_NMI_WINDOW: 2335 vmx_event_waitexit_disable(vcpu, true); 2336 exit->reason = NVMM_VCPU_EXIT_NMI_READY; 2337 break; 2338 default: 2339 vmx_exit_invalid(exit, exitcode); 2340 break; 2341 } 2342 2343 /* If no reason to return to userland, keep rolling. */ 2344 if (nvmm_return_needed()) { 2345 break; 2346 } 2347 if (exit->reason != NVMM_VCPU_EXIT_NONE) { 2348 break; 2349 } 2350 } 2351 2352 cpudata->vmcs_launched = launched; 2353 2354 cpudata->gtsc = vmx_vmread(VMCS_TSC_OFFSET) + rdtsc(); 2355 2356 vmx_vcpu_guest_misc_leave(vcpu); 2357 vmx_vcpu_guest_dbregs_leave(vcpu); 2358 2359 exit->exitstate.rflags = vmx_vmread(VMCS_GUEST_RFLAGS); 2360 exit->exitstate.cr8 = cpudata->gcr8; 2361 intstate = vmx_vmread(VMCS_GUEST_INTERRUPTIBILITY); 2362 exit->exitstate.int_shadow = 2363 (intstate & (INT_STATE_STI|INT_STATE_MOVSS)) != 0; 2364 exit->exitstate.int_window_exiting = cpudata->int_window_exit; 2365 exit->exitstate.nmi_window_exiting = cpudata->nmi_window_exit; 2366 exit->exitstate.evt_pending = cpudata->evt_pending; 2367 2368 vmx_vmcs_leave(vcpu); 2369 2370 return 0; 2371 } 2372 2373 /* -------------------------------------------------------------------------- */ 2374 2375 static int 2376 vmx_memalloc(paddr_t *pa, vaddr_t *va, size_t npages) 2377 { 2378 #ifdef __NetBSD__ 2379 struct pglist pglist; 2380 paddr_t _pa; 2381 vaddr_t _va; 2382 size_t i; 2383 int ret; 2384 2385 ret = uvm_pglistalloc(npages * PAGE_SIZE, 0, ~0UL, PAGE_SIZE, 0, 2386 &pglist, 1, 0); 2387 if (ret != 0) 2388 return ENOMEM; 2389 _pa = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist)); 2390 _va = uvm_km_alloc(kernel_map, npages * PAGE_SIZE, 0, 2391 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 2392 if (_va == 0) 2393 goto error; 2394 2395 for (i = 0; i < npages; i++) { 2396 pmap_kenter_pa(_va + i * PAGE_SIZE, _pa + i * PAGE_SIZE, 2397 VM_PROT_READ | VM_PROT_WRITE, PMAP_WRITE_BACK); 2398 } 2399 pmap_update(pmap_kernel()); 2400 2401 memset((void *)_va, 0, npages * PAGE_SIZE); 2402 2403 *pa = _pa; 2404 *va = _va; 2405 return 0; 2406 2407 error: 2408 for (i = 0; i < npages; i++) { 2409 uvm_pagefree(PHYS_TO_VM_PAGE(_pa + i * PAGE_SIZE)); 2410 } 2411 return ENOMEM; 2412 2413 #else /* DragonFly */ 2414 void *addr; 2415 2416 addr = contigmalloc(npages * PAGE_SIZE, M_NVMM, M_WAITOK | M_ZERO, 2417 0, ~0UL, PAGE_SIZE, 0); 2418 if (addr == NULL) 2419 return ENOMEM; 2420 2421 *va = (vaddr_t)addr; 2422 *pa = vtophys(addr); 2423 return 0; 2424 #endif /* __NetBSD__ */ 2425 } 2426 2427 static void 2428 vmx_memfree(paddr_t pa __unused, vaddr_t va, size_t npages) 2429 { 2430 #ifdef __NetBSD__ 2431 size_t i; 2432 2433 pmap_kremove(va, npages * PAGE_SIZE); 2434 pmap_update(pmap_kernel()); 2435 uvm_km_free(kernel_map, va, npages * PAGE_SIZE, UVM_KMF_VAONLY); 2436 for (i = 0; i < npages; i++) { 2437 uvm_pagefree(PHYS_TO_VM_PAGE(pa + i * PAGE_SIZE)); 2438 } 2439 #else /* DragonFly */ 2440 contigfree((void *)va, npages * PAGE_SIZE, M_NVMM); 2441 #endif /* __NetBSD__ */ 2442 } 2443 2444 /* -------------------------------------------------------------------------- */ 2445 2446 static void 2447 vmx_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 2448 { 2449 uint64_t byte; 2450 uint8_t bitoff; 2451 2452 if (msr < 0x00002000) { 2453 /* Range 1 */ 2454 byte = ((msr - 0x00000000) / 8) + 0; 2455 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 2456 /* Range 2 */ 2457 byte = ((msr - 0xC0000000) / 8) + 1024; 2458 } else { 2459 panic("%s: wrong range", __func__); 2460 } 2461 2462 bitoff = (msr & 0x7); 2463 2464 if (read) { 2465 bitmap[byte] &= ~__BIT(bitoff); 2466 } 2467 if (write) { 2468 bitmap[2048 + byte] &= ~__BIT(bitoff); 2469 } 2470 } 2471 2472 #define VMX_SEG_ATTRIB_TYPE __BITS(3,0) 2473 #define VMX_SEG_ATTRIB_S __BIT(4) 2474 #define VMX_SEG_ATTRIB_DPL __BITS(6,5) 2475 #define VMX_SEG_ATTRIB_P __BIT(7) 2476 #define VMX_SEG_ATTRIB_AVL __BIT(12) 2477 #define VMX_SEG_ATTRIB_L __BIT(13) 2478 #define VMX_SEG_ATTRIB_DEF __BIT(14) 2479 #define VMX_SEG_ATTRIB_G __BIT(15) 2480 #define VMX_SEG_ATTRIB_UNUSABLE __BIT(16) 2481 2482 static void 2483 vmx_vcpu_setstate_seg(const struct nvmm_x64_state_seg *segs, int idx) 2484 { 2485 uint64_t attrib; 2486 2487 attrib = 2488 __SHIFTIN(segs[idx].attrib.type, VMX_SEG_ATTRIB_TYPE) | 2489 __SHIFTIN(segs[idx].attrib.s, VMX_SEG_ATTRIB_S) | 2490 __SHIFTIN(segs[idx].attrib.dpl, VMX_SEG_ATTRIB_DPL) | 2491 __SHIFTIN(segs[idx].attrib.p, VMX_SEG_ATTRIB_P) | 2492 __SHIFTIN(segs[idx].attrib.avl, VMX_SEG_ATTRIB_AVL) | 2493 __SHIFTIN(segs[idx].attrib.l, VMX_SEG_ATTRIB_L) | 2494 __SHIFTIN(segs[idx].attrib.def, VMX_SEG_ATTRIB_DEF) | 2495 __SHIFTIN(segs[idx].attrib.g, VMX_SEG_ATTRIB_G) | 2496 (!segs[idx].attrib.p ? VMX_SEG_ATTRIB_UNUSABLE : 0); 2497 2498 if (idx != NVMM_X64_SEG_GDT && idx != NVMM_X64_SEG_IDT) { 2499 vmx_vmwrite(vmx_guest_segs[idx].selector, segs[idx].selector); 2500 vmx_vmwrite(vmx_guest_segs[idx].attrib, attrib); 2501 } 2502 vmx_vmwrite(vmx_guest_segs[idx].limit, segs[idx].limit); 2503 vmx_vmwrite(vmx_guest_segs[idx].base, segs[idx].base); 2504 } 2505 2506 static void 2507 vmx_vcpu_getstate_seg(struct nvmm_x64_state_seg *segs, int idx) 2508 { 2509 uint64_t selector = 0, attrib = 0, base, limit; 2510 2511 if (idx != NVMM_X64_SEG_GDT && idx != NVMM_X64_SEG_IDT) { 2512 selector = vmx_vmread(vmx_guest_segs[idx].selector); 2513 attrib = vmx_vmread(vmx_guest_segs[idx].attrib); 2514 } 2515 limit = vmx_vmread(vmx_guest_segs[idx].limit); 2516 base = vmx_vmread(vmx_guest_segs[idx].base); 2517 2518 segs[idx].selector = selector; 2519 segs[idx].limit = limit; 2520 segs[idx].base = base; 2521 segs[idx].attrib.type = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_TYPE); 2522 segs[idx].attrib.s = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_S); 2523 segs[idx].attrib.dpl = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_DPL); 2524 segs[idx].attrib.p = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_P); 2525 segs[idx].attrib.avl = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_AVL); 2526 segs[idx].attrib.l = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_L); 2527 segs[idx].attrib.def = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_DEF); 2528 segs[idx].attrib.g = __SHIFTOUT(attrib, VMX_SEG_ATTRIB_G); 2529 if (attrib & VMX_SEG_ATTRIB_UNUSABLE) { 2530 segs[idx].attrib.p = 0; 2531 } 2532 } 2533 2534 static inline bool 2535 vmx_state_tlb_flush(const struct nvmm_x64_state *state, uint64_t flags) 2536 { 2537 uint64_t cr0, cr3, cr4, efer; 2538 2539 if (flags & NVMM_X64_STATE_CRS) { 2540 cr0 = vmx_vmread(VMCS_GUEST_CR0); 2541 if ((cr0 ^ state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 2542 return true; 2543 } 2544 cr3 = vmx_vmread(VMCS_GUEST_CR3); 2545 if (cr3 != state->crs[NVMM_X64_CR_CR3]) { 2546 return true; 2547 } 2548 cr4 = vmx_vmread(VMCS_GUEST_CR4); 2549 if ((cr4 ^ state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 2550 return true; 2551 } 2552 } 2553 2554 if (flags & NVMM_X64_STATE_MSRS) { 2555 efer = vmx_vmread(VMCS_GUEST_IA32_EFER); 2556 if ((efer ^ 2557 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 2558 return true; 2559 } 2560 } 2561 2562 return false; 2563 } 2564 2565 static void 2566 vmx_vcpu_setstate(struct nvmm_cpu *vcpu) 2567 { 2568 struct nvmm_comm_page *comm = vcpu->comm; 2569 const struct nvmm_x64_state *state = &comm->state; 2570 struct vmx_cpudata *cpudata = vcpu->cpudata; 2571 struct savexmm64 *fpustate; 2572 uint64_t ctls1, intstate; 2573 uint64_t flags; 2574 2575 flags = comm->state_wanted; 2576 2577 vmx_vmcs_enter(vcpu); 2578 2579 if (vmx_state_tlb_flush(state, flags)) { 2580 cpudata->gtlb_want_flush = true; 2581 } 2582 2583 if (flags & NVMM_X64_STATE_SEGS) { 2584 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_CS); 2585 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_DS); 2586 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_ES); 2587 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_FS); 2588 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_GS); 2589 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_SS); 2590 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_GDT); 2591 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_IDT); 2592 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_LDT); 2593 vmx_vcpu_setstate_seg(state->segs, NVMM_X64_SEG_TR); 2594 } 2595 2596 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 2597 if (flags & NVMM_X64_STATE_GPRS) { 2598 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 2599 2600 vmx_vmwrite(VMCS_GUEST_RIP, state->gprs[NVMM_X64_GPR_RIP]); 2601 vmx_vmwrite(VMCS_GUEST_RSP, state->gprs[NVMM_X64_GPR_RSP]); 2602 vmx_vmwrite(VMCS_GUEST_RFLAGS, state->gprs[NVMM_X64_GPR_RFLAGS]); 2603 } 2604 2605 if (flags & NVMM_X64_STATE_CRS) { 2606 /* 2607 * CR0_NE and CR4_VMXE are mandatory. 2608 */ 2609 vmx_vmwrite(VMCS_GUEST_CR0, 2610 state->crs[NVMM_X64_CR_CR0] | CR0_NE); 2611 cpudata->gcr2 = state->crs[NVMM_X64_CR_CR2]; 2612 vmx_vmwrite(VMCS_GUEST_CR3, state->crs[NVMM_X64_CR_CR3]); // XXX PDPTE? 2613 vmx_vmwrite(VMCS_GUEST_CR4, 2614 (state->crs[NVMM_X64_CR_CR4] & CR4_VALID) | CR4_VMXE); 2615 cpudata->gcr8 = state->crs[NVMM_X64_CR_CR8]; 2616 2617 if (vmx_xcr0_mask != 0) { 2618 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 2619 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 2620 cpudata->gxcr0 &= vmx_xcr0_mask; 2621 cpudata->gxcr0 |= XCR0_X87; 2622 } 2623 } 2624 2625 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 2626 if (flags & NVMM_X64_STATE_DRS) { 2627 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 2628 2629 cpudata->drs[NVMM_X64_DR_DR6] &= 0xFFFFFFFF; 2630 vmx_vmwrite(VMCS_GUEST_DR7, cpudata->drs[NVMM_X64_DR_DR7]); 2631 } 2632 2633 if (flags & NVMM_X64_STATE_MSRS) { 2634 cpudata->gmsr[VMX_MSRLIST_STAR].val = 2635 state->msrs[NVMM_X64_MSR_STAR]; 2636 cpudata->gmsr[VMX_MSRLIST_LSTAR].val = 2637 state->msrs[NVMM_X64_MSR_LSTAR]; 2638 cpudata->gmsr[VMX_MSRLIST_CSTAR].val = 2639 state->msrs[NVMM_X64_MSR_CSTAR]; 2640 cpudata->gmsr[VMX_MSRLIST_SFMASK].val = 2641 state->msrs[NVMM_X64_MSR_SFMASK]; 2642 cpudata->gmsr[VMX_MSRLIST_KERNELGSBASE].val = 2643 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 2644 2645 vmx_vmwrite(VMCS_GUEST_IA32_EFER, 2646 state->msrs[NVMM_X64_MSR_EFER]); 2647 vmx_vmwrite(VMCS_GUEST_IA32_PAT, 2648 state->msrs[NVMM_X64_MSR_PAT]); 2649 vmx_vmwrite(VMCS_GUEST_IA32_SYSENTER_CS, 2650 state->msrs[NVMM_X64_MSR_SYSENTER_CS]); 2651 vmx_vmwrite(VMCS_GUEST_IA32_SYSENTER_ESP, 2652 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]); 2653 vmx_vmwrite(VMCS_GUEST_IA32_SYSENTER_EIP, 2654 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]); 2655 2656 cpudata->gtsc = state->msrs[NVMM_X64_MSR_TSC]; 2657 cpudata->gtsc_want_update = true; 2658 2659 /* ENTRY_CTLS_LONG_MODE must match EFER_LMA. */ 2660 ctls1 = vmx_vmread(VMCS_ENTRY_CTLS); 2661 if (state->msrs[NVMM_X64_MSR_EFER] & EFER_LMA) { 2662 ctls1 |= ENTRY_CTLS_LONG_MODE; 2663 } else { 2664 ctls1 &= ~ENTRY_CTLS_LONG_MODE; 2665 } 2666 vmx_vmwrite(VMCS_ENTRY_CTLS, ctls1); 2667 } 2668 2669 if (flags & NVMM_X64_STATE_INTR) { 2670 intstate = vmx_vmread(VMCS_GUEST_INTERRUPTIBILITY); 2671 intstate &= ~(INT_STATE_STI|INT_STATE_MOVSS); 2672 if (state->intr.int_shadow) { 2673 intstate |= INT_STATE_MOVSS; 2674 } 2675 vmx_vmwrite(VMCS_GUEST_INTERRUPTIBILITY, intstate); 2676 2677 if (state->intr.int_window_exiting) { 2678 vmx_event_waitexit_enable(vcpu, false); 2679 } else { 2680 vmx_event_waitexit_disable(vcpu, false); 2681 } 2682 2683 if (state->intr.nmi_window_exiting) { 2684 vmx_event_waitexit_enable(vcpu, true); 2685 } else { 2686 vmx_event_waitexit_disable(vcpu, true); 2687 } 2688 } 2689 2690 CTASSERT(sizeof(cpudata->gfpu) == sizeof(state->fpu)); 2691 if (flags & NVMM_X64_STATE_FPU) { 2692 memcpy(&cpudata->gfpu, &state->fpu, sizeof(state->fpu)); 2693 2694 fpustate = &cpudata->gfpu.sv_xmm64; 2695 fpustate->sv_env.en_mxcsr_mask &= x86_fpu_mxcsr_mask; 2696 fpustate->sv_env.en_mxcsr &= fpustate->sv_env.en_mxcsr_mask; 2697 2698 #ifdef __NetBSD__ 2699 if (vmx_xcr0_mask != 0) { 2700 /* Reset XSTATE_BV, to force a reload. */ 2701 cpudata->gfpu.xsh_xstate_bv = vmx_xcr0_mask; 2702 } 2703 #endif /* __NetBSD__ */ 2704 } 2705 2706 vmx_vmcs_leave(vcpu); 2707 2708 comm->state_wanted = 0; 2709 comm->state_cached |= flags; 2710 } 2711 2712 static void 2713 vmx_vcpu_getstate(struct nvmm_cpu *vcpu) 2714 { 2715 struct nvmm_comm_page *comm = vcpu->comm; 2716 struct nvmm_x64_state *state = &comm->state; 2717 struct vmx_cpudata *cpudata = vcpu->cpudata; 2718 uint64_t intstate, flags; 2719 2720 flags = comm->state_wanted; 2721 2722 vmx_vmcs_enter(vcpu); 2723 2724 if (flags & NVMM_X64_STATE_SEGS) { 2725 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_CS); 2726 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_DS); 2727 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_ES); 2728 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_FS); 2729 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_GS); 2730 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_SS); 2731 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_GDT); 2732 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_IDT); 2733 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_LDT); 2734 vmx_vcpu_getstate_seg(state->segs, NVMM_X64_SEG_TR); 2735 } 2736 2737 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 2738 if (flags & NVMM_X64_STATE_GPRS) { 2739 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 2740 2741 state->gprs[NVMM_X64_GPR_RIP] = vmx_vmread(VMCS_GUEST_RIP); 2742 state->gprs[NVMM_X64_GPR_RSP] = vmx_vmread(VMCS_GUEST_RSP); 2743 state->gprs[NVMM_X64_GPR_RFLAGS] = vmx_vmread(VMCS_GUEST_RFLAGS); 2744 } 2745 2746 if (flags & NVMM_X64_STATE_CRS) { 2747 state->crs[NVMM_X64_CR_CR0] = vmx_vmread(VMCS_GUEST_CR0); 2748 state->crs[NVMM_X64_CR_CR2] = cpudata->gcr2; 2749 state->crs[NVMM_X64_CR_CR3] = vmx_vmread(VMCS_GUEST_CR3); 2750 state->crs[NVMM_X64_CR_CR4] = vmx_vmread(VMCS_GUEST_CR4); 2751 state->crs[NVMM_X64_CR_CR8] = cpudata->gcr8; 2752 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 2753 2754 /* Hide VMXE. */ 2755 state->crs[NVMM_X64_CR_CR4] &= ~CR4_VMXE; 2756 } 2757 2758 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 2759 if (flags & NVMM_X64_STATE_DRS) { 2760 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 2761 2762 state->drs[NVMM_X64_DR_DR7] = vmx_vmread(VMCS_GUEST_DR7); 2763 } 2764 2765 if (flags & NVMM_X64_STATE_MSRS) { 2766 state->msrs[NVMM_X64_MSR_STAR] = 2767 cpudata->gmsr[VMX_MSRLIST_STAR].val; 2768 state->msrs[NVMM_X64_MSR_LSTAR] = 2769 cpudata->gmsr[VMX_MSRLIST_LSTAR].val; 2770 state->msrs[NVMM_X64_MSR_CSTAR] = 2771 cpudata->gmsr[VMX_MSRLIST_CSTAR].val; 2772 state->msrs[NVMM_X64_MSR_SFMASK] = 2773 cpudata->gmsr[VMX_MSRLIST_SFMASK].val; 2774 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 2775 cpudata->gmsr[VMX_MSRLIST_KERNELGSBASE].val; 2776 state->msrs[NVMM_X64_MSR_EFER] = 2777 vmx_vmread(VMCS_GUEST_IA32_EFER); 2778 state->msrs[NVMM_X64_MSR_PAT] = 2779 vmx_vmread(VMCS_GUEST_IA32_PAT); 2780 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 2781 vmx_vmread(VMCS_GUEST_IA32_SYSENTER_CS); 2782 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 2783 vmx_vmread(VMCS_GUEST_IA32_SYSENTER_ESP); 2784 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 2785 vmx_vmread(VMCS_GUEST_IA32_SYSENTER_EIP); 2786 state->msrs[NVMM_X64_MSR_TSC] = cpudata->gtsc; 2787 } 2788 2789 if (flags & NVMM_X64_STATE_INTR) { 2790 intstate = vmx_vmread(VMCS_GUEST_INTERRUPTIBILITY); 2791 state->intr.int_shadow = 2792 (intstate & (INT_STATE_STI|INT_STATE_MOVSS)) != 0; 2793 state->intr.int_window_exiting = cpudata->int_window_exit; 2794 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 2795 state->intr.evt_pending = cpudata->evt_pending; 2796 } 2797 2798 CTASSERT(sizeof(cpudata->gfpu) == sizeof(state->fpu)); 2799 if (flags & NVMM_X64_STATE_FPU) { 2800 memcpy(&state->fpu, &cpudata->gfpu, sizeof(state->fpu)); 2801 } 2802 2803 vmx_vmcs_leave(vcpu); 2804 2805 comm->state_wanted = 0; 2806 comm->state_cached |= flags; 2807 } 2808 2809 static void 2810 vmx_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags) 2811 { 2812 vcpu->comm->state_wanted = flags; 2813 vmx_vcpu_getstate(vcpu); 2814 } 2815 2816 static void 2817 vmx_vcpu_state_commit(struct nvmm_cpu *vcpu) 2818 { 2819 vcpu->comm->state_wanted = vcpu->comm->state_commit; 2820 vcpu->comm->state_commit = 0; 2821 vmx_vcpu_setstate(vcpu); 2822 } 2823 2824 /* -------------------------------------------------------------------------- */ 2825 2826 static void 2827 vmx_asid_alloc(struct nvmm_cpu *vcpu) 2828 { 2829 struct vmx_cpudata *cpudata = vcpu->cpudata; 2830 size_t i, oct, bit; 2831 2832 mutex_enter(&vmx_asidlock); 2833 2834 for (i = 0; i < vmx_maxasid; i++) { 2835 oct = i / 8; 2836 bit = i % 8; 2837 2838 if (vmx_asidmap[oct] & __BIT(bit)) { 2839 continue; 2840 } 2841 2842 cpudata->asid = i; 2843 2844 vmx_asidmap[oct] |= __BIT(bit); 2845 vmx_vmwrite(VMCS_VPID, i); 2846 mutex_exit(&vmx_asidlock); 2847 return; 2848 } 2849 2850 mutex_exit(&vmx_asidlock); 2851 2852 panic("%s: impossible", __func__); 2853 } 2854 2855 static void 2856 vmx_asid_free(struct nvmm_cpu *vcpu) 2857 { 2858 size_t oct, bit; 2859 uint64_t asid; 2860 2861 asid = vmx_vmread(VMCS_VPID); 2862 2863 oct = asid / 8; 2864 bit = asid % 8; 2865 2866 mutex_enter(&vmx_asidlock); 2867 vmx_asidmap[oct] &= ~__BIT(bit); 2868 mutex_exit(&vmx_asidlock); 2869 } 2870 2871 static void 2872 vmx_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2873 { 2874 struct vmx_cpudata *cpudata = vcpu->cpudata; 2875 struct vmcs *vmcs = cpudata->vmcs; 2876 struct msr_entry *gmsr = cpudata->gmsr; 2877 extern uint8_t vmx_resume_rip; 2878 uint64_t rev, eptp; 2879 2880 rev = vmx_get_revision(); 2881 2882 memset(vmcs, 0, VMCS_SIZE); 2883 vmcs->ident = __SHIFTIN(rev, VMCS_IDENT_REVISION); 2884 vmcs->abort = 0; 2885 2886 vmx_vmcs_enter(vcpu); 2887 2888 /* No link pointer. */ 2889 vmx_vmwrite(VMCS_LINK_POINTER, 0xFFFFFFFFFFFFFFFF); 2890 2891 /* Install the CTLSs. */ 2892 vmx_vmwrite(VMCS_PINBASED_CTLS, vmx_pinbased_ctls); 2893 vmx_vmwrite(VMCS_PROCBASED_CTLS, vmx_procbased_ctls); 2894 vmx_vmwrite(VMCS_PROCBASED_CTLS2, vmx_procbased_ctls2); 2895 vmx_vmwrite(VMCS_ENTRY_CTLS, vmx_entry_ctls); 2896 vmx_vmwrite(VMCS_EXIT_CTLS, vmx_exit_ctls); 2897 2898 /* Allow direct access to certain MSRs. */ 2899 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 2900 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_EFER, true, true); 2901 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 2902 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 2903 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 2904 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 2905 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 2906 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 2907 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 2908 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 2909 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 2910 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 2911 vmx_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 2912 vmx_vmwrite(VMCS_MSR_BITMAP, (uint64_t)cpudata->msrbm_pa); 2913 2914 /* 2915 * List of Guest MSRs loaded on VMENTRY, saved on VMEXIT. This 2916 * includes the L1D_FLUSH MSR, to mitigate L1TF. 2917 */ 2918 gmsr[VMX_MSRLIST_STAR].msr = MSR_STAR; 2919 gmsr[VMX_MSRLIST_STAR].val = 0; 2920 gmsr[VMX_MSRLIST_LSTAR].msr = MSR_LSTAR; 2921 gmsr[VMX_MSRLIST_LSTAR].val = 0; 2922 gmsr[VMX_MSRLIST_CSTAR].msr = MSR_CSTAR; 2923 gmsr[VMX_MSRLIST_CSTAR].val = 0; 2924 gmsr[VMX_MSRLIST_SFMASK].msr = MSR_SFMASK; 2925 gmsr[VMX_MSRLIST_SFMASK].val = 0; 2926 gmsr[VMX_MSRLIST_KERNELGSBASE].msr = MSR_KERNELGSBASE; 2927 gmsr[VMX_MSRLIST_KERNELGSBASE].val = 0; 2928 gmsr[VMX_MSRLIST_L1DFLUSH].msr = MSR_IA32_FLUSH_CMD; 2929 gmsr[VMX_MSRLIST_L1DFLUSH].val = IA32_FLUSH_CMD_L1D_FLUSH; 2930 vmx_vmwrite(VMCS_ENTRY_MSR_LOAD_ADDRESS, cpudata->gmsr_pa); 2931 vmx_vmwrite(VMCS_EXIT_MSR_STORE_ADDRESS, cpudata->gmsr_pa); 2932 vmx_vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, vmx_msrlist_entry_nmsr); 2933 vmx_vmwrite(VMCS_EXIT_MSR_STORE_COUNT, VMX_MSRLIST_EXIT_NMSR); 2934 2935 /* Force CR0_NW and CR0_CD to zero, CR0_ET to one. */ 2936 vmx_vmwrite(VMCS_CR0_MASK, CR0_NW|CR0_CD|CR0_ET); 2937 vmx_vmwrite(VMCS_CR0_SHADOW, CR0_ET); 2938 2939 /* Force unsupported CR4 fields to zero. */ 2940 vmx_vmwrite(VMCS_CR4_MASK, CR4_INVALID); 2941 vmx_vmwrite(VMCS_CR4_SHADOW, 0); 2942 2943 /* Set the Host state for resuming. */ 2944 vmx_vmwrite(VMCS_HOST_RIP, (uint64_t)&vmx_resume_rip); 2945 vmx_vmwrite(VMCS_HOST_CS_SELECTOR, GSEL(GCODE_SEL, SEL_KPL)); 2946 vmx_vmwrite(VMCS_HOST_SS_SELECTOR, GSEL(GDATA_SEL, SEL_KPL)); 2947 vmx_vmwrite(VMCS_HOST_DS_SELECTOR, GSEL(GDATA_SEL, SEL_KPL)); 2948 vmx_vmwrite(VMCS_HOST_ES_SELECTOR, GSEL(GDATA_SEL, SEL_KPL)); 2949 vmx_vmwrite(VMCS_HOST_FS_SELECTOR, 0); 2950 vmx_vmwrite(VMCS_HOST_GS_SELECTOR, 0); 2951 vmx_vmwrite(VMCS_HOST_IA32_SYSENTER_CS, 0); 2952 vmx_vmwrite(VMCS_HOST_IA32_SYSENTER_ESP, 0); 2953 vmx_vmwrite(VMCS_HOST_IA32_SYSENTER_EIP, 0); 2954 #ifdef __NetBSD__ 2955 vmx_vmwrite(VMCS_HOST_IDTR_BASE, (uint64_t)idt); 2956 #else /* DragonFly */ 2957 vmx_vmwrite(VMCS_HOST_IDTR_BASE, (uint64_t)r_idt_arr[mycpuid].rd_base); 2958 #endif /* __NetBSD__ */ 2959 vmx_vmwrite(VMCS_HOST_IA32_PAT, rdmsr(MSR_CR_PAT)); 2960 vmx_vmwrite(VMCS_HOST_IA32_EFER, rdmsr(MSR_EFER)); 2961 vmx_vmwrite(VMCS_HOST_CR0, rcr0()); 2962 2963 /* Generate ASID. */ 2964 vmx_asid_alloc(vcpu); 2965 2966 /* Enable Extended Paging, 4-Level. */ 2967 eptp = 2968 __SHIFTIN(vmx_eptp_type, EPTP_TYPE) | 2969 __SHIFTIN(4-1, EPTP_WALKLEN) | 2970 (pmap_ept_has_ad ? EPTP_FLAGS_AD : 0) | 2971 vtophys(vmspace_pmap(mach->vm)->pm_pml4); 2972 vmx_vmwrite(VMCS_EPTP, eptp); 2973 2974 /* Init IA32_MISC_ENABLE. */ 2975 cpudata->gmsr_misc_enable = rdmsr(MSR_MISC_ENABLE); 2976 cpudata->gmsr_misc_enable &= 2977 ~(IA32_MISC_PERFMON_EN|IA32_MISC_EISST_EN|IA32_MISC_MWAIT_EN); 2978 cpudata->gmsr_misc_enable |= 2979 (IA32_MISC_BTS_UNAVAIL|IA32_MISC_PEBS_UNAVAIL); 2980 2981 #ifdef __NetBSD__ 2982 /* Init XSAVE header. */ 2983 cpudata->gfpu.xsh_xstate_bv = vmx_xcr0_mask; 2984 cpudata->gfpu.xsh_xcomp_bv = 0; 2985 #endif /* __NetBSD__ */ 2986 2987 /* These MSRs are static. */ 2988 cpudata->star = rdmsr(MSR_STAR); 2989 cpudata->lstar = rdmsr(MSR_LSTAR); 2990 cpudata->cstar = rdmsr(MSR_CSTAR); 2991 cpudata->sfmask = rdmsr(MSR_SFMASK); 2992 2993 /* Install the RESET state. */ 2994 memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, 2995 sizeof(nvmm_x86_reset_state)); 2996 vcpu->comm->state_wanted = NVMM_X64_STATE_ALL; 2997 vcpu->comm->state_cached = 0; 2998 vmx_vcpu_setstate(vcpu); 2999 3000 vmx_vmcs_leave(vcpu); 3001 } 3002 3003 static int 3004 vmx_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 3005 { 3006 struct vmx_cpudata *cpudata; 3007 int error; 3008 3009 /* Allocate the VMX cpudata. */ 3010 cpudata = (struct vmx_cpudata *)uvm_km_alloc(kernel_map, 3011 roundup(sizeof(*cpudata), PAGE_SIZE), 0, 3012 UVM_KMF_WIRED|UVM_KMF_ZERO); 3013 if (cpudata == NULL) 3014 return ENOMEM; 3015 3016 vcpu->cpudata = cpudata; 3017 3018 /* VMCS */ 3019 error = vmx_memalloc(&cpudata->vmcs_pa, (vaddr_t *)&cpudata->vmcs, 3020 VMCS_NPAGES); 3021 if (error) 3022 goto error; 3023 3024 /* MSR Bitmap */ 3025 error = vmx_memalloc(&cpudata->msrbm_pa, (vaddr_t *)&cpudata->msrbm, 3026 MSRBM_NPAGES); 3027 if (error) 3028 goto error; 3029 3030 /* Guest MSR List */ 3031 error = vmx_memalloc(&cpudata->gmsr_pa, (vaddr_t *)&cpudata->gmsr, 1); 3032 if (error) 3033 goto error; 3034 3035 CPUMASK_ASSZERO(cpudata->htlb_want_flush); 3036 3037 /* Init the VCPU info. */ 3038 vmx_vcpu_init(mach, vcpu); 3039 3040 return 0; 3041 3042 error: 3043 if (cpudata->vmcs_pa) { 3044 vmx_memfree(cpudata->vmcs_pa, (vaddr_t)cpudata->vmcs, 3045 VMCS_NPAGES); 3046 } 3047 if (cpudata->msrbm_pa) { 3048 vmx_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 3049 MSRBM_NPAGES); 3050 } 3051 if (cpudata->gmsr_pa) { 3052 vmx_memfree(cpudata->gmsr_pa, (vaddr_t)cpudata->gmsr, 1); 3053 } 3054 uvm_km_free(kernel_map, (vaddr_t)cpudata, 3055 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 3056 return error; 3057 } 3058 3059 static void 3060 vmx_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 3061 { 3062 struct vmx_cpudata *cpudata = vcpu->cpudata; 3063 3064 vmx_vmcs_enter(vcpu); 3065 vmx_asid_free(vcpu); 3066 vmx_vmcs_destroy(vcpu); 3067 3068 #ifdef __NetBSD__ 3069 kcpuset_destroy(cpudata->htlb_want_flush); 3070 #endif 3071 3072 vmx_memfree(cpudata->vmcs_pa, (vaddr_t)cpudata->vmcs, VMCS_NPAGES); 3073 vmx_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, MSRBM_NPAGES); 3074 vmx_memfree(cpudata->gmsr_pa, (vaddr_t)cpudata->gmsr, 1); 3075 uvm_km_free(kernel_map, (vaddr_t)cpudata, 3076 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 3077 } 3078 3079 /* -------------------------------------------------------------------------- */ 3080 3081 static int 3082 vmx_vcpu_configure_cpuid(struct vmx_cpudata *cpudata, void *data) 3083 { 3084 struct nvmm_vcpu_conf_cpuid *cpuid = data; 3085 size_t i; 3086 3087 if (__predict_false(cpuid->mask && cpuid->exit)) { 3088 return EINVAL; 3089 } 3090 if (__predict_false(cpuid->mask && 3091 ((cpuid->u.mask.set.eax & cpuid->u.mask.del.eax) || 3092 (cpuid->u.mask.set.ebx & cpuid->u.mask.del.ebx) || 3093 (cpuid->u.mask.set.ecx & cpuid->u.mask.del.ecx) || 3094 (cpuid->u.mask.set.edx & cpuid->u.mask.del.edx)))) { 3095 return EINVAL; 3096 } 3097 3098 /* If unset, delete, to restore the default behavior. */ 3099 if (!cpuid->mask && !cpuid->exit) { 3100 for (i = 0; i < VMX_NCPUIDS; i++) { 3101 if (!cpudata->cpuidpresent[i]) { 3102 continue; 3103 } 3104 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 3105 cpudata->cpuidpresent[i] = false; 3106 } 3107 } 3108 return 0; 3109 } 3110 3111 /* If already here, replace. */ 3112 for (i = 0; i < VMX_NCPUIDS; i++) { 3113 if (!cpudata->cpuidpresent[i]) { 3114 continue; 3115 } 3116 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 3117 memcpy(&cpudata->cpuid[i], cpuid, 3118 sizeof(struct nvmm_vcpu_conf_cpuid)); 3119 return 0; 3120 } 3121 } 3122 3123 /* Not here, insert. */ 3124 for (i = 0; i < VMX_NCPUIDS; i++) { 3125 if (!cpudata->cpuidpresent[i]) { 3126 cpudata->cpuidpresent[i] = true; 3127 memcpy(&cpudata->cpuid[i], cpuid, 3128 sizeof(struct nvmm_vcpu_conf_cpuid)); 3129 return 0; 3130 } 3131 } 3132 3133 return ENOBUFS; 3134 } 3135 3136 static int 3137 vmx_vcpu_configure_tpr(struct vmx_cpudata *cpudata, void *data) 3138 { 3139 struct nvmm_vcpu_conf_tpr *tpr = data; 3140 3141 memcpy(&cpudata->tpr, tpr, sizeof(*tpr)); 3142 return 0; 3143 } 3144 3145 static int 3146 vmx_vcpu_configure(struct nvmm_cpu *vcpu, uint64_t op, void *data) 3147 { 3148 struct vmx_cpudata *cpudata = vcpu->cpudata; 3149 3150 switch (op) { 3151 case NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID): 3152 return vmx_vcpu_configure_cpuid(cpudata, data); 3153 case NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_TPR): 3154 return vmx_vcpu_configure_tpr(cpudata, data); 3155 default: 3156 return EINVAL; 3157 } 3158 } 3159 3160 /* -------------------------------------------------------------------------- */ 3161 3162 static void 3163 vmx_tlb_flush(struct pmap *pm) 3164 { 3165 struct nvmm_machine *mach = pm->pm_data; 3166 struct vmx_machdata *machdata = mach->machdata; 3167 3168 atomic_inc_64(&machdata->mach_htlb_gen); 3169 3170 /* Generates IPIs, which cause #VMEXITs. */ 3171 pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_UPDATE); 3172 } 3173 3174 static void 3175 vmx_machine_create(struct nvmm_machine *mach) 3176 { 3177 struct pmap *pmap = vmspace_pmap(mach->vm); 3178 struct vmx_machdata *machdata; 3179 3180 /* Convert to EPT. */ 3181 pmap_ept_transform(pmap); 3182 3183 /* Fill in pmap info. */ 3184 pmap->pm_data = (void *)mach; 3185 pmap->pm_tlb_flush = vmx_tlb_flush; 3186 3187 machdata = kmem_zalloc(sizeof(struct vmx_machdata), KM_SLEEP); 3188 mach->machdata = machdata; 3189 3190 /* Start with an hTLB flush everywhere. */ 3191 machdata->mach_htlb_gen = 1; 3192 } 3193 3194 static void 3195 vmx_machine_destroy(struct nvmm_machine *mach) 3196 { 3197 struct vmx_machdata *machdata = mach->machdata; 3198 3199 kmem_free(machdata, sizeof(struct vmx_machdata)); 3200 } 3201 3202 static int 3203 vmx_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 3204 { 3205 panic("%s: impossible", __func__); 3206 } 3207 3208 /* -------------------------------------------------------------------------- */ 3209 3210 #define CTLS_ONE_ALLOWED(msrval, bitoff) \ 3211 ((msrval & __BIT(32 + bitoff)) != 0) 3212 #define CTLS_ZERO_ALLOWED(msrval, bitoff) \ 3213 ((msrval & __BIT(bitoff)) == 0) 3214 3215 static int 3216 vmx_check_ctls(uint64_t msr_ctls, uint64_t msr_true_ctls, uint64_t set_one) 3217 { 3218 uint64_t basic, val, true_val; 3219 bool has_true; 3220 size_t i; 3221 3222 basic = rdmsr(MSR_IA32_VMX_BASIC); 3223 has_true = (basic & IA32_VMX_BASIC_TRUE_CTLS) != 0; 3224 3225 val = rdmsr(msr_ctls); 3226 if (has_true) { 3227 true_val = rdmsr(msr_true_ctls); 3228 } else { 3229 true_val = val; 3230 } 3231 3232 for (i = 0; i < 32; i++) { 3233 if (!(set_one & __BIT(i))) { 3234 continue; 3235 } 3236 if (!CTLS_ONE_ALLOWED(true_val, i)) { 3237 return -1; 3238 } 3239 } 3240 3241 return 0; 3242 } 3243 3244 static int 3245 vmx_init_ctls(uint64_t msr_ctls, uint64_t msr_true_ctls, 3246 uint64_t set_one, uint64_t set_zero, uint64_t *res) 3247 { 3248 uint64_t basic, val, true_val; 3249 bool one_allowed, zero_allowed, has_true; 3250 size_t i; 3251 3252 basic = rdmsr(MSR_IA32_VMX_BASIC); 3253 has_true = (basic & IA32_VMX_BASIC_TRUE_CTLS) != 0; 3254 3255 val = rdmsr(msr_ctls); 3256 if (has_true) { 3257 true_val = rdmsr(msr_true_ctls); 3258 } else { 3259 true_val = val; 3260 } 3261 3262 for (i = 0; i < 32; i++) { 3263 one_allowed = CTLS_ONE_ALLOWED(true_val, i); 3264 zero_allowed = CTLS_ZERO_ALLOWED(true_val, i); 3265 3266 if (zero_allowed && !one_allowed) { 3267 if (set_one & __BIT(i)) 3268 return -1; 3269 *res &= ~__BIT(i); 3270 } else if (one_allowed && !zero_allowed) { 3271 if (set_zero & __BIT(i)) 3272 return -1; 3273 *res |= __BIT(i); 3274 } else { 3275 if (set_zero & __BIT(i)) { 3276 *res &= ~__BIT(i); 3277 } else if (set_one & __BIT(i)) { 3278 *res |= __BIT(i); 3279 } else if (!has_true) { 3280 *res &= ~__BIT(i); 3281 } else if (CTLS_ZERO_ALLOWED(val, i)) { 3282 *res &= ~__BIT(i); 3283 } else if (CTLS_ONE_ALLOWED(val, i)) { 3284 *res |= __BIT(i); 3285 } else { 3286 return -1; 3287 } 3288 } 3289 } 3290 3291 return 0; 3292 } 3293 3294 static bool 3295 vmx_ident(void) 3296 { 3297 uint64_t msr; 3298 int ret; 3299 3300 if (!(cpu_feature2 & CPUID2_VMX)) { 3301 return false; 3302 } 3303 3304 msr = rdmsr(MSR_IA32_FEATURE_CONTROL); 3305 if ((msr & IA32_FEATURE_CONTROL_LOCK) != 0 && 3306 (msr & IA32_FEATURE_CONTROL_OUT_SMX) == 0) { 3307 printf("NVMM: VMX disabled in BIOS\n"); 3308 return false; 3309 } 3310 3311 msr = rdmsr(MSR_IA32_VMX_BASIC); 3312 if ((msr & IA32_VMX_BASIC_IO_REPORT) == 0) { 3313 printf("NVMM: I/O reporting not supported\n"); 3314 return false; 3315 } 3316 if (__SHIFTOUT(msr, IA32_VMX_BASIC_MEM_TYPE) != MEM_TYPE_WB) { 3317 printf("NVMM: WB memory not supported\n"); 3318 return false; 3319 } 3320 3321 /* PG and PE are reported, even if Unrestricted Guests is supported. */ 3322 vmx_cr0_fixed0 = rdmsr(MSR_IA32_VMX_CR0_FIXED0) & ~(CR0_PG|CR0_PE); 3323 vmx_cr0_fixed1 = rdmsr(MSR_IA32_VMX_CR0_FIXED1) | (CR0_PG|CR0_PE); 3324 ret = vmx_check_cr(rcr0(), vmx_cr0_fixed0, vmx_cr0_fixed1); 3325 if (ret == -1) { 3326 printf("NVMM: CR0 requirements not satisfied\n"); 3327 return false; 3328 } 3329 3330 vmx_cr4_fixed0 = rdmsr(MSR_IA32_VMX_CR4_FIXED0); 3331 vmx_cr4_fixed1 = rdmsr(MSR_IA32_VMX_CR4_FIXED1); 3332 ret = vmx_check_cr(rcr4() | CR4_VMXE, vmx_cr4_fixed0, vmx_cr4_fixed1); 3333 if (ret == -1) { 3334 printf("NVMM: CR4 requirements not satisfied\n"); 3335 return false; 3336 } 3337 3338 /* Init the CTLSs right now, and check for errors. */ 3339 ret = vmx_init_ctls( 3340 MSR_IA32_VMX_PINBASED_CTLS, MSR_IA32_VMX_TRUE_PINBASED_CTLS, 3341 VMX_PINBASED_CTLS_ONE, VMX_PINBASED_CTLS_ZERO, 3342 &vmx_pinbased_ctls); 3343 if (ret == -1) { 3344 printf("NVMM: pin-based-ctls requirements not satisfied\n"); 3345 return false; 3346 } 3347 ret = vmx_init_ctls( 3348 MSR_IA32_VMX_PROCBASED_CTLS, MSR_IA32_VMX_TRUE_PROCBASED_CTLS, 3349 VMX_PROCBASED_CTLS_ONE, VMX_PROCBASED_CTLS_ZERO, 3350 &vmx_procbased_ctls); 3351 if (ret == -1) { 3352 printf("NVMM: proc-based-ctls requirements not satisfied\n"); 3353 return false; 3354 } 3355 ret = vmx_init_ctls( 3356 MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_PROCBASED_CTLS2, 3357 VMX_PROCBASED_CTLS2_ONE, VMX_PROCBASED_CTLS2_ZERO, 3358 &vmx_procbased_ctls2); 3359 if (ret == -1) { 3360 printf("NVMM: proc-based-ctls2 requirements not satisfied\n"); 3361 return false; 3362 } 3363 ret = vmx_check_ctls( 3364 MSR_IA32_VMX_PROCBASED_CTLS2, MSR_IA32_VMX_PROCBASED_CTLS2, 3365 PROC_CTLS2_INVPCID_ENABLE); 3366 if (ret != -1) { 3367 vmx_procbased_ctls2 |= PROC_CTLS2_INVPCID_ENABLE; 3368 } 3369 ret = vmx_init_ctls( 3370 MSR_IA32_VMX_ENTRY_CTLS, MSR_IA32_VMX_TRUE_ENTRY_CTLS, 3371 VMX_ENTRY_CTLS_ONE, VMX_ENTRY_CTLS_ZERO, 3372 &vmx_entry_ctls); 3373 if (ret == -1) { 3374 printf("NVMM: entry-ctls requirements not satisfied\n"); 3375 return false; 3376 } 3377 ret = vmx_init_ctls( 3378 MSR_IA32_VMX_EXIT_CTLS, MSR_IA32_VMX_TRUE_EXIT_CTLS, 3379 VMX_EXIT_CTLS_ONE, VMX_EXIT_CTLS_ZERO, 3380 &vmx_exit_ctls); 3381 if (ret == -1) { 3382 printf("NVMM: exit-ctls requirements not satisfied\n"); 3383 return false; 3384 } 3385 3386 msr = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 3387 if ((msr & IA32_VMX_EPT_VPID_WALKLENGTH_4) == 0) { 3388 printf("NVMM: 4-level page tree not supported\n"); 3389 return false; 3390 } 3391 if ((msr & IA32_VMX_EPT_VPID_INVEPT) == 0) { 3392 printf("NVMM: INVEPT not supported\n"); 3393 return false; 3394 } 3395 if ((msr & IA32_VMX_EPT_VPID_INVVPID) == 0) { 3396 printf("NVMM: INVVPID not supported\n"); 3397 return false; 3398 } 3399 if ((msr & IA32_VMX_EPT_VPID_FLAGS_AD) != 0) { 3400 pmap_ept_has_ad = true; 3401 } else { 3402 pmap_ept_has_ad = false; 3403 } 3404 if (!(msr & IA32_VMX_EPT_VPID_UC) && !(msr & IA32_VMX_EPT_VPID_WB)) { 3405 printf("NVMM: EPT UC/WB memory types not supported\n"); 3406 return false; 3407 } 3408 3409 return true; 3410 } 3411 3412 static void 3413 vmx_init_asid(uint32_t maxasid) 3414 { 3415 size_t allocsz; 3416 3417 mutex_init(&vmx_asidlock, MUTEX_DEFAULT, IPL_NONE); 3418 3419 vmx_maxasid = maxasid; 3420 allocsz = roundup(maxasid, 8) / 8; 3421 vmx_asidmap = kmem_zalloc(allocsz, KM_SLEEP); 3422 3423 /* ASID 0 is reserved for the host. */ 3424 vmx_asidmap[0] |= __BIT(0); 3425 } 3426 3427 static void 3428 vmx_change_cpu(void *arg1) 3429 { 3430 bool enable = arg1 != NULL; 3431 uint64_t msr, cr4; 3432 3433 if (enable) { 3434 msr = rdmsr(MSR_IA32_FEATURE_CONTROL); 3435 if ((msr & IA32_FEATURE_CONTROL_LOCK) == 0) { 3436 /* Lock now, with VMX-outside-SMX enabled. */ 3437 wrmsr(MSR_IA32_FEATURE_CONTROL, msr | 3438 IA32_FEATURE_CONTROL_LOCK | 3439 IA32_FEATURE_CONTROL_OUT_SMX); 3440 } 3441 } 3442 3443 if (!enable) { 3444 vmx_vmxoff(); 3445 } 3446 3447 cr4 = rcr4(); 3448 if (enable) { 3449 cr4 |= CR4_VMXE; 3450 } else { 3451 cr4 &= ~CR4_VMXE; 3452 } 3453 lcr4(cr4); 3454 3455 if (enable) { 3456 vmx_vmxon(&vmxoncpu[mycpuid].pa); 3457 } 3458 } 3459 3460 static void 3461 vmx_init_l1tf(void) 3462 { 3463 u_int descs[4]; 3464 uint64_t msr; 3465 3466 if (cpuid_level < 7) { 3467 return; 3468 } 3469 3470 x86_cpuid(7, descs); 3471 3472 if (descs[3] & CPUID_SEF_ARCH_CAP) { 3473 msr = rdmsr(MSR_IA32_ARCH_CAPABILITIES); 3474 if (msr & IA32_ARCH_SKIP_L1DFL_VMENTRY) { 3475 /* No mitigation needed. */ 3476 return; 3477 } 3478 } 3479 3480 if (descs[3] & CPUID_SEF_L1D_FLUSH) { 3481 /* Enable hardware mitigation. */ 3482 vmx_msrlist_entry_nmsr += 1; 3483 } 3484 } 3485 3486 static void 3487 vmx_init(void) 3488 { 3489 uint64_t msr; 3490 struct vmxon *vmxon; 3491 uint32_t revision; 3492 u_int descs[4]; 3493 paddr_t pa; 3494 vaddr_t va; 3495 int i, error; 3496 3497 /* Init the ASID bitmap (VPID). */ 3498 vmx_init_asid(VPID_MAX); 3499 3500 /* Init the XCR0 mask. */ 3501 vmx_xcr0_mask = VMX_XCR0_MASK_DEFAULT & x86_xsave_features; 3502 3503 /* Init the max basic CPUID leaf. */ 3504 vmx_cpuid_max_basic = uimin(cpuid_level, VMX_CPUID_MAX_BASIC); 3505 3506 /* Init the max extended CPUID leaf. */ 3507 x86_cpuid(0x80000000, descs); 3508 vmx_cpuid_max_extended = uimin(descs[0], VMX_CPUID_MAX_EXTENDED); 3509 3510 /* Init the TLB flush op, the EPT flush op and the EPTP type. */ 3511 msr = rdmsr(MSR_IA32_VMX_EPT_VPID_CAP); 3512 if ((msr & IA32_VMX_EPT_VPID_INVVPID_CONTEXT) != 0) { 3513 vmx_tlb_flush_op = VMX_INVVPID_CONTEXT; 3514 } else { 3515 vmx_tlb_flush_op = VMX_INVVPID_ALL; 3516 } 3517 if ((msr & IA32_VMX_EPT_VPID_INVEPT_CONTEXT) != 0) { 3518 vmx_ept_flush_op = VMX_INVEPT_CONTEXT; 3519 } else { 3520 vmx_ept_flush_op = VMX_INVEPT_ALL; 3521 } 3522 if ((msr & IA32_VMX_EPT_VPID_WB) != 0) { 3523 vmx_eptp_type = EPTP_TYPE_WB; 3524 } else { 3525 vmx_eptp_type = EPTP_TYPE_UC; 3526 } 3527 3528 /* Init the L1TF mitigation. */ 3529 vmx_init_l1tf(); 3530 3531 memset(vmxoncpu, 0, sizeof(vmxoncpu)); 3532 revision = vmx_get_revision(); 3533 3534 for (i = 0; i < ncpus; i++) { 3535 error = vmx_memalloc(&pa, &va, 1); 3536 if (error) { 3537 panic("%s: out of memory", __func__); 3538 } 3539 vmxoncpu[i].pa = pa; 3540 vmxoncpu[i].va = va; 3541 3542 vmxon = (struct vmxon *)vmxoncpu[i].va; 3543 vmxon->ident = __SHIFTIN(revision, VMXON_IDENT_REVISION); 3544 } 3545 3546 #ifdef __NetBSD__ 3547 uint64_t xc; 3548 xc = xc_broadcast(0, vmx_change_cpu, (void *)true, NULL); 3549 xc_wait(xc); 3550 #endif /* __NetBSD__ */ 3551 3552 lwkt_send_ipiq_mask(smp_active_mask, vmx_change_cpu, (void *)true); 3553 /* XXX: need any cpu fence ?? */ 3554 } 3555 3556 static void 3557 vmx_fini_asid(void) 3558 { 3559 size_t allocsz; 3560 3561 allocsz = roundup(vmx_maxasid, 8) / 8; 3562 kmem_free(vmx_asidmap, allocsz); 3563 3564 mutex_destroy(&vmx_asidlock); 3565 } 3566 3567 static void 3568 vmx_fini(void) 3569 { 3570 size_t i; 3571 3572 #ifdef __NetBSD__ 3573 uint64_t xc; 3574 xc = xc_broadcast(0, vmx_change_cpu, (void *)false, NULL); 3575 xc_wait(xc); 3576 #endif /* __NetBSD__ */ 3577 3578 lwkt_send_ipiq_mask(smp_active_mask, vmx_change_cpu, (void *)false); 3579 /* XXX: need any cpu fence ?? */ 3580 3581 for (i = 0; i < MAXCPUS; i++) { 3582 if (vmxoncpu[i].pa != 0) 3583 vmx_memfree(vmxoncpu[i].pa, vmxoncpu[i].va, 1); 3584 } 3585 3586 vmx_fini_asid(); 3587 } 3588 3589 static void 3590 vmx_capability(struct nvmm_capability *cap) 3591 { 3592 cap->arch.mach_conf_support = 0; 3593 cap->arch.vcpu_conf_support = 3594 NVMM_CAP_ARCH_VCPU_CONF_CPUID | 3595 NVMM_CAP_ARCH_VCPU_CONF_TPR; 3596 cap->arch.xcr0_mask = vmx_xcr0_mask; 3597 cap->arch.mxcsr_mask = x86_fpu_mxcsr_mask; 3598 cap->arch.conf_cpuid_maxops = VMX_NCPUIDS; 3599 } 3600 3601 const struct nvmm_impl nvmm_x86_vmx = { 3602 .name = "x86-vmx", 3603 .ident = vmx_ident, 3604 .init = vmx_init, 3605 .fini = vmx_fini, 3606 .capability = vmx_capability, 3607 .mach_conf_max = NVMM_X86_MACH_NCONF, 3608 .mach_conf_sizes = NULL, 3609 .vcpu_conf_max = NVMM_X86_VCPU_NCONF, 3610 .vcpu_conf_sizes = vmx_vcpu_conf_sizes, 3611 .state_size = sizeof(struct nvmm_x64_state), 3612 .machine_create = vmx_machine_create, 3613 .machine_destroy = vmx_machine_destroy, 3614 .machine_configure = vmx_machine_configure, 3615 .vcpu_create = vmx_vcpu_create, 3616 .vcpu_destroy = vmx_vcpu_destroy, 3617 .vcpu_configure = vmx_vcpu_configure, 3618 .vcpu_setstate = vmx_vcpu_setstate, 3619 .vcpu_getstate = vmx_vcpu_getstate, 3620 .vcpu_inject = vmx_vcpu_inject, 3621 .vcpu_run = vmx_vcpu_run 3622 }; 3623