1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 /* 29 * This file and its contents are supplied under the terms of the 30 * Common Development and Distribution License ("CDDL"), version 1.0. 31 * You may only use this file in accordance with the terms of version 32 * 1.0 of the CDDL. 33 * 34 * A full copy of the text of the CDDL should have accompanied this 35 * source. A copy of the CDDL is also available via the Internet at 36 * http://www.illumos.org/license/CDDL. 37 * 38 * Copyright 2015 Pluribus Networks Inc. 39 * Copyright 2019 Joyent, Inc. 40 * Copyright 2022 Oxide Computer Company 41 */ 42 43 #ifndef _VMM_H_ 44 #define _VMM_H_ 45 46 enum vm_suspend_how { 47 VM_SUSPEND_NONE, 48 VM_SUSPEND_RESET, 49 VM_SUSPEND_POWEROFF, 50 VM_SUSPEND_HALT, 51 VM_SUSPEND_TRIPLEFAULT, 52 VM_SUSPEND_LAST 53 }; 54 55 /* 56 * Identifiers for architecturally defined registers. 57 */ 58 enum vm_reg_name { 59 VM_REG_GUEST_RAX, 60 VM_REG_GUEST_RBX, 61 VM_REG_GUEST_RCX, 62 VM_REG_GUEST_RDX, 63 VM_REG_GUEST_RSI, 64 VM_REG_GUEST_RDI, 65 VM_REG_GUEST_RBP, 66 VM_REG_GUEST_R8, 67 VM_REG_GUEST_R9, 68 VM_REG_GUEST_R10, 69 VM_REG_GUEST_R11, 70 VM_REG_GUEST_R12, 71 VM_REG_GUEST_R13, 72 VM_REG_GUEST_R14, 73 VM_REG_GUEST_R15, 74 VM_REG_GUEST_CR0, 75 VM_REG_GUEST_CR3, 76 VM_REG_GUEST_CR4, 77 VM_REG_GUEST_DR7, 78 VM_REG_GUEST_RSP, 79 VM_REG_GUEST_RIP, 80 VM_REG_GUEST_RFLAGS, 81 VM_REG_GUEST_ES, 82 VM_REG_GUEST_CS, 83 VM_REG_GUEST_SS, 84 VM_REG_GUEST_DS, 85 VM_REG_GUEST_FS, 86 VM_REG_GUEST_GS, 87 VM_REG_GUEST_LDTR, 88 VM_REG_GUEST_TR, 89 VM_REG_GUEST_IDTR, 90 VM_REG_GUEST_GDTR, 91 VM_REG_GUEST_EFER, 92 VM_REG_GUEST_CR2, 93 VM_REG_GUEST_PDPTE0, 94 VM_REG_GUEST_PDPTE1, 95 VM_REG_GUEST_PDPTE2, 96 VM_REG_GUEST_PDPTE3, 97 VM_REG_GUEST_INTR_SHADOW, 98 VM_REG_GUEST_DR0, 99 VM_REG_GUEST_DR1, 100 VM_REG_GUEST_DR2, 101 VM_REG_GUEST_DR3, 102 VM_REG_GUEST_DR6, 103 VM_REG_GUEST_ENTRY_INST_LENGTH, 104 VM_REG_GUEST_XCR0, 105 VM_REG_LAST 106 }; 107 108 enum x2apic_state { 109 X2APIC_DISABLED, 110 X2APIC_ENABLED, 111 X2APIC_STATE_LAST 112 }; 113 114 #define VM_INTINFO_MASK_VECTOR 0xffUL 115 #define VM_INTINFO_MASK_TYPE 0x700UL 116 #define VM_INTINFO_MASK_RSVD 0x7ffff000UL 117 #define VM_INTINFO_SHIFT_ERRCODE 32 118 119 #define VM_INTINFO_VECTOR(val) ((val) & VM_INTINFO_MASK_VECTOR) 120 #define VM_INTINFO_TYPE(val) ((val) & VM_INTINFO_MASK_TYPE) 121 #define VM_INTINFO_ERRCODE(val) ((val) >> VM_INTINFO_SHIFT_ERRCODE) 122 #define VM_INTINFO_PENDING(val) (((val) & VM_INTINFO_VALID) != 0) 123 #define VM_INTINFO_HAS_ERRCODE(val) (((val) & VM_INTINFO_DEL_ERRCODE) != 0) 124 125 #define VM_INTINFO_VALID (1UL << 31) 126 #define VM_INTINFO_DEL_ERRCODE (1UL << 11) 127 128 #define VM_INTINFO_HWINTR (0 << 8) 129 #define VM_INTINFO_NMI (2 << 8) 130 #define VM_INTINFO_HWEXCP (3 << 8) 131 #define VM_INTINFO_SWINTR (4 << 8) 132 /* Reserved for CPU (read: Intel) specific types */ 133 #define VM_INTINFO_RESV1 (1 << 8) 134 #define VM_INTINFO_RESV5 (5 << 8) 135 #define VM_INTINFO_RESV6 (6 << 8) 136 #define VM_INTINFO_RESV7 (7 << 8) 137 138 /* 139 * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does. 140 * To simplify structure definitions, an arbitrary limit has been chosen. 141 * This same limit is used for memory segment names 142 */ 143 144 #define VM_MAX_NAMELEN 128 145 #define VM_MAX_SEG_NAMELEN 128 146 147 #ifdef _KERNEL 148 #define VM_MAXCPU 32 /* maximum virtual cpus */ 149 #endif 150 151 /* 152 * Identifiers for optional vmm capabilities 153 */ 154 enum vm_cap_type { 155 VM_CAP_HALT_EXIT, 156 VM_CAP_MTRAP_EXIT, 157 VM_CAP_PAUSE_EXIT, 158 VM_CAP_ENABLE_INVPCID, 159 VM_CAP_BPT_EXIT, 160 VM_CAP_MAX 161 }; 162 163 enum vmx_caps { 164 VMX_CAP_NONE = 0, 165 VMX_CAP_TPR_SHADOW = (1UL << 0), 166 VMX_CAP_APICV = (1UL << 1), 167 VMX_CAP_APICV_X2APIC = (1UL << 2), 168 VMX_CAP_APICV_PIR = (1UL << 3), 169 }; 170 171 enum vm_intr_trigger { 172 EDGE_TRIGGER, 173 LEVEL_TRIGGER 174 }; 175 176 /* 177 * The 'access' field has the format specified in Table 21-2 of the Intel 178 * Architecture Manual vol 3b. 179 * 180 * XXX The contents of the 'access' field are architecturally defined except 181 * bit 16 - Segment Unusable. 182 */ 183 struct seg_desc { 184 uint64_t base; 185 uint32_t limit; 186 uint32_t access; 187 }; 188 #define SEG_DESC_TYPE(access) ((access) & 0x001f) 189 #define SEG_DESC_DPL_MASK 0x3 190 #define SEG_DESC_DPL_SHIFT 5 191 #define SEG_DESC_DPL(access) \ 192 (((access) >> SEG_DESC_DPL_SHIFT) & SEG_DESC_DPL_MASK) 193 #define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0) 194 #define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0) 195 #define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0) 196 #define SEG_DESC_UNUSABLE(access) (((access) & 0x10000) ? 1 : 0) 197 198 enum vm_cpu_mode { 199 CPU_MODE_REAL, 200 CPU_MODE_PROTECTED, 201 CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */ 202 CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */ 203 }; 204 205 enum vm_paging_mode { 206 PAGING_MODE_FLAT, 207 PAGING_MODE_32, 208 PAGING_MODE_PAE, 209 PAGING_MODE_64, 210 }; 211 212 struct vm_guest_paging { 213 uint64_t cr3; 214 int cpl; 215 enum vm_cpu_mode cpu_mode; 216 enum vm_paging_mode paging_mode; 217 }; 218 219 enum vm_exitcode { 220 VM_EXITCODE_INOUT, 221 VM_EXITCODE_VMX, 222 VM_EXITCODE_BOGUS, 223 VM_EXITCODE_RDMSR, 224 VM_EXITCODE_WRMSR, 225 VM_EXITCODE_HLT, 226 VM_EXITCODE_MTRAP, 227 VM_EXITCODE_PAUSE, 228 VM_EXITCODE_PAGING, 229 VM_EXITCODE_INST_EMUL, 230 VM_EXITCODE_RUN_STATE, 231 VM_EXITCODE_MMIO_EMUL, 232 VM_EXITCODE_DEPRECATED, /* formerly RUNBLOCK */ 233 VM_EXITCODE_IOAPIC_EOI, 234 VM_EXITCODE_SUSPENDED, 235 VM_EXITCODE_MMIO, 236 VM_EXITCODE_TASK_SWITCH, 237 VM_EXITCODE_MONITOR, 238 VM_EXITCODE_MWAIT, 239 VM_EXITCODE_SVM, 240 VM_EXITCODE_DEPRECATED2, /* formerly REQIDLE */ 241 VM_EXITCODE_DEBUG, 242 VM_EXITCODE_VMINSN, 243 VM_EXITCODE_BPT, 244 VM_EXITCODE_HT, 245 VM_EXITCODE_MAX 246 }; 247 248 enum inout_flags { 249 INOUT_IN = (1U << 0), /* direction: 'in' when set, else 'out' */ 250 251 /* 252 * The following flags are used only for in-kernel emulation logic and 253 * are not exposed to userspace. 254 */ 255 INOUT_STR = (1U << 1), /* ins/outs operation */ 256 INOUT_REP = (1U << 2), /* 'rep' prefix present on instruction */ 257 }; 258 259 struct vm_inout { 260 uint32_t eax; 261 uint16_t port; 262 uint8_t bytes; /* 1 or 2 or 4 */ 263 uint8_t flags; /* see: inout_flags */ 264 265 /* 266 * The address size and segment are relevant to INS/OUTS operations. 267 * Userspace is not concerned with them since the in-kernel emulation 268 * handles those specific aspects. 269 */ 270 uint8_t addrsize; 271 uint8_t segment; 272 }; 273 274 struct vm_mmio { 275 uint8_t bytes; /* 1/2/4/8 bytes */ 276 uint8_t read; /* read: 1, write: 0 */ 277 uint16_t _pad[3]; 278 uint64_t gpa; 279 uint64_t data; 280 }; 281 282 enum task_switch_reason { 283 TSR_CALL, 284 TSR_IRET, 285 TSR_JMP, 286 TSR_IDT_GATE, /* task gate in IDT */ 287 }; 288 289 struct vm_task_switch { 290 uint16_t tsssel; /* new TSS selector */ 291 int ext; /* task switch due to external event */ 292 uint32_t errcode; 293 int errcode_valid; /* push 'errcode' on the new stack */ 294 enum task_switch_reason reason; 295 struct vm_guest_paging paging; 296 }; 297 298 enum vcpu_run_state { 299 VRS_HALT = 0, 300 VRS_INIT = (1 << 0), 301 VRS_RUN = (1 << 1), 302 303 VRS_PEND_INIT = (1 << 14), 304 VRS_PEND_SIPI = (1 << 15), 305 }; 306 #define VRS_MASK_VALID(v) \ 307 ((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI)) 308 #define VRS_IS_VALID(v) ((v) == VRS_MASK_VALID(v)) 309 310 struct vm_exit { 311 enum vm_exitcode exitcode; 312 int inst_length; /* 0 means unknown */ 313 uint64_t rip; 314 union { 315 struct vm_inout inout; 316 struct vm_mmio mmio; 317 struct { 318 uint64_t gpa; 319 int fault_type; 320 } paging; 321 /* 322 * Kernel-internal MMIO decoding and emulation. 323 * Userspace should not expect to see this, but rather a 324 * VM_EXITCODE_MMIO with the above 'mmio' context. 325 */ 326 struct { 327 uint64_t gpa; 328 uint64_t gla; 329 uint64_t cs_base; 330 int cs_d; /* CS.D */ 331 } mmio_emul; 332 struct { 333 uint8_t inst[15]; 334 uint8_t num_valid; 335 } inst_emul; 336 /* 337 * VMX specific payload. Used when there is no "better" 338 * exitcode to represent the VM-exit. 339 */ 340 struct { 341 int status; /* vmx inst status */ 342 /* 343 * 'exit_reason' and 'exit_qualification' are valid 344 * only if 'status' is zero. 345 */ 346 uint32_t exit_reason; 347 uint64_t exit_qualification; 348 /* 349 * 'inst_error' and 'inst_type' are valid 350 * only if 'status' is non-zero. 351 */ 352 int inst_type; 353 int inst_error; 354 } vmx; 355 /* 356 * SVM specific payload. 357 */ 358 struct { 359 uint64_t exitcode; 360 uint64_t exitinfo1; 361 uint64_t exitinfo2; 362 } svm; 363 struct { 364 int inst_length; 365 } bpt; 366 struct { 367 uint32_t code; /* ecx value */ 368 uint64_t wval; 369 } msr; 370 struct { 371 uint64_t rflags; 372 } hlt; 373 struct { 374 int vector; 375 } ioapic_eoi; 376 struct { 377 enum vm_suspend_how how; 378 /* 379 * Source vcpuid for suspend status. Typically -1, 380 * except for triple-fault events which occur on a 381 * specific faulting vCPU. 382 */ 383 int source; 384 /* 385 * When suspend status was set on VM, measured in 386 * nanoseconds since VM boot. 387 */ 388 uint64_t when; 389 } suspended; 390 struct vm_task_switch task_switch; 391 } u; 392 }; 393 394 enum vm_entry_cmds { 395 VEC_DEFAULT = 0, 396 VEC_DISCARD_INSTR, /* discard inst emul state */ 397 VEC_FULFILL_MMIO, /* entry includes result for mmio emul */ 398 VEC_FULFILL_INOUT, /* entry includes result for inout emul */ 399 400 /* Below are flags which can be combined with the above commands: */ 401 402 /* 403 * Exit to userspace when vCPU is in consistent state: when any pending 404 * instruction emulation tasks have been completed and committed to the 405 * architecturally defined state. 406 */ 407 VEC_FLAG_EXIT_CONSISTENT = 1 << 31, 408 }; 409 410 struct vm_entry { 411 int cpuid; 412 uint_t cmd; /* see: vm_entry_cmds */ 413 void *exit_data; 414 union { 415 struct vm_inout inout; 416 struct vm_mmio mmio; 417 } u; 418 }; 419 420 int vm_restart_instruction(void *vm, int vcpuid); 421 422 enum vm_create_flags { 423 /* 424 * Allocate guest memory segments from existing reservoir capacity, 425 * rather than attempting to create transient allocations. 426 */ 427 VCF_RESERVOIR_MEM = (1 << 0), 428 429 /* 430 * Enable dirty page tracking for the guest. 431 */ 432 VCF_TRACK_DIRTY = (1 << 1), 433 }; 434 435 /* 436 * Describes an entry for `cpuid` emulation. 437 * Used internally by bhyve (kernel) in addition to exposed ioctl(2) interface. 438 */ 439 struct vcpu_cpuid_entry { 440 uint32_t vce_function; 441 uint32_t vce_index; 442 uint32_t vce_flags; 443 uint32_t vce_eax; 444 uint32_t vce_ebx; 445 uint32_t vce_ecx; 446 uint32_t vce_edx; 447 uint32_t _pad; 448 }; 449 450 /* 451 * Defined flags for vcpu_cpuid_entry`vce_flags are below. 452 */ 453 454 /* Use index (ecx) input value when matching entry */ 455 #define VCE_FLAG_MATCH_INDEX (1 << 0) 456 457 /* All valid flacts for vcpu_cpuid_entry`vce_flags */ 458 #define VCE_FLAGS_VALID VCE_FLAG_MATCH_INDEX 459 460 /* 461 * Defined flags for vcpu_cpuid configuration are below. 462 * These are used by both the ioctl(2) interface via vm_vcpu_cpuid_config and 463 * internally in the kernel vmm. 464 */ 465 466 /* Use legacy hard-coded cpuid masking tables applied to the host CPU */ 467 #define VCC_FLAG_LEGACY_HANDLING (1 << 0) 468 /* 469 * Emulate Intel-style fallback behavior (emit highest "standard" entry) if the 470 * queried function/index do not match. If not set, emulate AMD-style, where 471 * all zeroes are returned in such cases. 472 */ 473 #define VCC_FLAG_INTEL_FALLBACK (1 << 1) 474 475 /* All valid flacts for vm_vcpu_cpuid_config`vvcc_flags */ 476 #define VCC_FLAGS_VALID \ 477 (VCC_FLAG_LEGACY_HANDLING | VCC_FLAG_INTEL_FALLBACK) 478 479 /* Maximum vcpu_cpuid_entry records per vCPU */ 480 #define VMM_MAX_CPUID_ENTRIES 256 481 482 #endif /* _VMM_H_ */ 483