1 /* $NetBSD: nvmm_x86_svm.c,v 1.46.4.13 2020/09/13 11:56:44 martin Exp $ */ 2 3 /* 4 * Copyright (c) 2018-2019 The NetBSD Foundation, Inc. 5 * All rights reserved. 6 * 7 * This code is derived from software contributed to The NetBSD Foundation 8 * by Maxime Villard. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 20 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 21 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 22 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 23 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 24 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 25 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 26 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 27 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 28 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 29 * POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 35 #include <sys/globaldata.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> /* contigmalloc, contigfree */ 38 #include <sys/thread2.h> /* lwkt_send_ipiq, lwkt_send_ipiq_mask */ 39 40 #include <vm/vm_map.h> 41 42 #include <machine/cputypes.h> /* CPU_VENDOR_* */ 43 #include <machine/md_var.h> /* cpu_*, amd_feature2 */ 44 #include <machine/specialreg.h> 45 46 #include <dev/virtual/nvmm/nvmm_compat.h> 47 #include <dev/virtual/nvmm/nvmm.h> 48 #include <dev/virtual/nvmm/nvmm_internal.h> 49 #include <dev/virtual/nvmm/x86/nvmm_x86.h> 50 51 int svm_vmrun(paddr_t, uint64_t *); 52 53 #define MSR_VM_HSAVE_PA 0xC0010117 54 55 /* -------------------------------------------------------------------------- */ 56 57 #define VMCB_EXITCODE_CR0_READ 0x0000 58 #define VMCB_EXITCODE_CR1_READ 0x0001 59 #define VMCB_EXITCODE_CR2_READ 0x0002 60 #define VMCB_EXITCODE_CR3_READ 0x0003 61 #define VMCB_EXITCODE_CR4_READ 0x0004 62 #define VMCB_EXITCODE_CR5_READ 0x0005 63 #define VMCB_EXITCODE_CR6_READ 0x0006 64 #define VMCB_EXITCODE_CR7_READ 0x0007 65 #define VMCB_EXITCODE_CR8_READ 0x0008 66 #define VMCB_EXITCODE_CR9_READ 0x0009 67 #define VMCB_EXITCODE_CR10_READ 0x000A 68 #define VMCB_EXITCODE_CR11_READ 0x000B 69 #define VMCB_EXITCODE_CR12_READ 0x000C 70 #define VMCB_EXITCODE_CR13_READ 0x000D 71 #define VMCB_EXITCODE_CR14_READ 0x000E 72 #define VMCB_EXITCODE_CR15_READ 0x000F 73 #define VMCB_EXITCODE_CR0_WRITE 0x0010 74 #define VMCB_EXITCODE_CR1_WRITE 0x0011 75 #define VMCB_EXITCODE_CR2_WRITE 0x0012 76 #define VMCB_EXITCODE_CR3_WRITE 0x0013 77 #define VMCB_EXITCODE_CR4_WRITE 0x0014 78 #define VMCB_EXITCODE_CR5_WRITE 0x0015 79 #define VMCB_EXITCODE_CR6_WRITE 0x0016 80 #define VMCB_EXITCODE_CR7_WRITE 0x0017 81 #define VMCB_EXITCODE_CR8_WRITE 0x0018 82 #define VMCB_EXITCODE_CR9_WRITE 0x0019 83 #define VMCB_EXITCODE_CR10_WRITE 0x001A 84 #define VMCB_EXITCODE_CR11_WRITE 0x001B 85 #define VMCB_EXITCODE_CR12_WRITE 0x001C 86 #define VMCB_EXITCODE_CR13_WRITE 0x001D 87 #define VMCB_EXITCODE_CR14_WRITE 0x001E 88 #define VMCB_EXITCODE_CR15_WRITE 0x001F 89 #define VMCB_EXITCODE_DR0_READ 0x0020 90 #define VMCB_EXITCODE_DR1_READ 0x0021 91 #define VMCB_EXITCODE_DR2_READ 0x0022 92 #define VMCB_EXITCODE_DR3_READ 0x0023 93 #define VMCB_EXITCODE_DR4_READ 0x0024 94 #define VMCB_EXITCODE_DR5_READ 0x0025 95 #define VMCB_EXITCODE_DR6_READ 0x0026 96 #define VMCB_EXITCODE_DR7_READ 0x0027 97 #define VMCB_EXITCODE_DR8_READ 0x0028 98 #define VMCB_EXITCODE_DR9_READ 0x0029 99 #define VMCB_EXITCODE_DR10_READ 0x002A 100 #define VMCB_EXITCODE_DR11_READ 0x002B 101 #define VMCB_EXITCODE_DR12_READ 0x002C 102 #define VMCB_EXITCODE_DR13_READ 0x002D 103 #define VMCB_EXITCODE_DR14_READ 0x002E 104 #define VMCB_EXITCODE_DR15_READ 0x002F 105 #define VMCB_EXITCODE_DR0_WRITE 0x0030 106 #define VMCB_EXITCODE_DR1_WRITE 0x0031 107 #define VMCB_EXITCODE_DR2_WRITE 0x0032 108 #define VMCB_EXITCODE_DR3_WRITE 0x0033 109 #define VMCB_EXITCODE_DR4_WRITE 0x0034 110 #define VMCB_EXITCODE_DR5_WRITE 0x0035 111 #define VMCB_EXITCODE_DR6_WRITE 0x0036 112 #define VMCB_EXITCODE_DR7_WRITE 0x0037 113 #define VMCB_EXITCODE_DR8_WRITE 0x0038 114 #define VMCB_EXITCODE_DR9_WRITE 0x0039 115 #define VMCB_EXITCODE_DR10_WRITE 0x003A 116 #define VMCB_EXITCODE_DR11_WRITE 0x003B 117 #define VMCB_EXITCODE_DR12_WRITE 0x003C 118 #define VMCB_EXITCODE_DR13_WRITE 0x003D 119 #define VMCB_EXITCODE_DR14_WRITE 0x003E 120 #define VMCB_EXITCODE_DR15_WRITE 0x003F 121 #define VMCB_EXITCODE_EXCP0 0x0040 122 #define VMCB_EXITCODE_EXCP1 0x0041 123 #define VMCB_EXITCODE_EXCP2 0x0042 124 #define VMCB_EXITCODE_EXCP3 0x0043 125 #define VMCB_EXITCODE_EXCP4 0x0044 126 #define VMCB_EXITCODE_EXCP5 0x0045 127 #define VMCB_EXITCODE_EXCP6 0x0046 128 #define VMCB_EXITCODE_EXCP7 0x0047 129 #define VMCB_EXITCODE_EXCP8 0x0048 130 #define VMCB_EXITCODE_EXCP9 0x0049 131 #define VMCB_EXITCODE_EXCP10 0x004A 132 #define VMCB_EXITCODE_EXCP11 0x004B 133 #define VMCB_EXITCODE_EXCP12 0x004C 134 #define VMCB_EXITCODE_EXCP13 0x004D 135 #define VMCB_EXITCODE_EXCP14 0x004E 136 #define VMCB_EXITCODE_EXCP15 0x004F 137 #define VMCB_EXITCODE_EXCP16 0x0050 138 #define VMCB_EXITCODE_EXCP17 0x0051 139 #define VMCB_EXITCODE_EXCP18 0x0052 140 #define VMCB_EXITCODE_EXCP19 0x0053 141 #define VMCB_EXITCODE_EXCP20 0x0054 142 #define VMCB_EXITCODE_EXCP21 0x0055 143 #define VMCB_EXITCODE_EXCP22 0x0056 144 #define VMCB_EXITCODE_EXCP23 0x0057 145 #define VMCB_EXITCODE_EXCP24 0x0058 146 #define VMCB_EXITCODE_EXCP25 0x0059 147 #define VMCB_EXITCODE_EXCP26 0x005A 148 #define VMCB_EXITCODE_EXCP27 0x005B 149 #define VMCB_EXITCODE_EXCP28 0x005C 150 #define VMCB_EXITCODE_EXCP29 0x005D 151 #define VMCB_EXITCODE_EXCP30 0x005E 152 #define VMCB_EXITCODE_EXCP31 0x005F 153 #define VMCB_EXITCODE_INTR 0x0060 154 #define VMCB_EXITCODE_NMI 0x0061 155 #define VMCB_EXITCODE_SMI 0x0062 156 #define VMCB_EXITCODE_INIT 0x0063 157 #define VMCB_EXITCODE_VINTR 0x0064 158 #define VMCB_EXITCODE_CR0_SEL_WRITE 0x0065 159 #define VMCB_EXITCODE_IDTR_READ 0x0066 160 #define VMCB_EXITCODE_GDTR_READ 0x0067 161 #define VMCB_EXITCODE_LDTR_READ 0x0068 162 #define VMCB_EXITCODE_TR_READ 0x0069 163 #define VMCB_EXITCODE_IDTR_WRITE 0x006A 164 #define VMCB_EXITCODE_GDTR_WRITE 0x006B 165 #define VMCB_EXITCODE_LDTR_WRITE 0x006C 166 #define VMCB_EXITCODE_TR_WRITE 0x006D 167 #define VMCB_EXITCODE_RDTSC 0x006E 168 #define VMCB_EXITCODE_RDPMC 0x006F 169 #define VMCB_EXITCODE_PUSHF 0x0070 170 #define VMCB_EXITCODE_POPF 0x0071 171 #define VMCB_EXITCODE_CPUID 0x0072 172 #define VMCB_EXITCODE_RSM 0x0073 173 #define VMCB_EXITCODE_IRET 0x0074 174 #define VMCB_EXITCODE_SWINT 0x0075 175 #define VMCB_EXITCODE_INVD 0x0076 176 #define VMCB_EXITCODE_PAUSE 0x0077 177 #define VMCB_EXITCODE_HLT 0x0078 178 #define VMCB_EXITCODE_INVLPG 0x0079 179 #define VMCB_EXITCODE_INVLPGA 0x007A 180 #define VMCB_EXITCODE_IOIO 0x007B 181 #define VMCB_EXITCODE_MSR 0x007C 182 #define VMCB_EXITCODE_TASK_SWITCH 0x007D 183 #define VMCB_EXITCODE_FERR_FREEZE 0x007E 184 #define VMCB_EXITCODE_SHUTDOWN 0x007F 185 #define VMCB_EXITCODE_VMRUN 0x0080 186 #define VMCB_EXITCODE_VMMCALL 0x0081 187 #define VMCB_EXITCODE_VMLOAD 0x0082 188 #define VMCB_EXITCODE_VMSAVE 0x0083 189 #define VMCB_EXITCODE_STGI 0x0084 190 #define VMCB_EXITCODE_CLGI 0x0085 191 #define VMCB_EXITCODE_SKINIT 0x0086 192 #define VMCB_EXITCODE_RDTSCP 0x0087 193 #define VMCB_EXITCODE_ICEBP 0x0088 194 #define VMCB_EXITCODE_WBINVD 0x0089 195 #define VMCB_EXITCODE_MONITOR 0x008A 196 #define VMCB_EXITCODE_MWAIT 0x008B 197 #define VMCB_EXITCODE_MWAIT_CONDITIONAL 0x008C 198 #define VMCB_EXITCODE_XSETBV 0x008D 199 #define VMCB_EXITCODE_RDPRU 0x008E 200 #define VMCB_EXITCODE_EFER_WRITE_TRAP 0x008F 201 #define VMCB_EXITCODE_CR0_WRITE_TRAP 0x0090 202 #define VMCB_EXITCODE_CR1_WRITE_TRAP 0x0091 203 #define VMCB_EXITCODE_CR2_WRITE_TRAP 0x0092 204 #define VMCB_EXITCODE_CR3_WRITE_TRAP 0x0093 205 #define VMCB_EXITCODE_CR4_WRITE_TRAP 0x0094 206 #define VMCB_EXITCODE_CR5_WRITE_TRAP 0x0095 207 #define VMCB_EXITCODE_CR6_WRITE_TRAP 0x0096 208 #define VMCB_EXITCODE_CR7_WRITE_TRAP 0x0097 209 #define VMCB_EXITCODE_CR8_WRITE_TRAP 0x0098 210 #define VMCB_EXITCODE_CR9_WRITE_TRAP 0x0099 211 #define VMCB_EXITCODE_CR10_WRITE_TRAP 0x009A 212 #define VMCB_EXITCODE_CR11_WRITE_TRAP 0x009B 213 #define VMCB_EXITCODE_CR12_WRITE_TRAP 0x009C 214 #define VMCB_EXITCODE_CR13_WRITE_TRAP 0x009D 215 #define VMCB_EXITCODE_CR14_WRITE_TRAP 0x009E 216 #define VMCB_EXITCODE_CR15_WRITE_TRAP 0x009F 217 #define VMCB_EXITCODE_INVLPGB 0x00A0 218 #define VMCB_EXITCODE_INVLPGB_ILLEGAL 0x00A1 219 #define VMCB_EXITCODE_INVPCID 0x00A2 220 #define VMCB_EXITCODE_MCOMMIT 0x00A3 221 #define VMCB_EXITCODE_TLBSYNC 0x00A4 222 #define VMCB_EXITCODE_NPF 0x0400 223 #define VMCB_EXITCODE_AVIC_INCOMP_IPI 0x0401 224 #define VMCB_EXITCODE_AVIC_NOACCEL 0x0402 225 #define VMCB_EXITCODE_VMGEXIT 0x0403 226 #define VMCB_EXITCODE_BUSY -2ULL 227 #define VMCB_EXITCODE_INVALID -1ULL 228 229 /* -------------------------------------------------------------------------- */ 230 231 struct vmcb_ctrl { 232 uint32_t intercept_cr; 233 #define VMCB_CTRL_INTERCEPT_RCR(x) __BIT( 0 + x) 234 #define VMCB_CTRL_INTERCEPT_WCR(x) __BIT(16 + x) 235 236 uint32_t intercept_dr; 237 #define VMCB_CTRL_INTERCEPT_RDR(x) __BIT( 0 + x) 238 #define VMCB_CTRL_INTERCEPT_WDR(x) __BIT(16 + x) 239 240 uint32_t intercept_vec; 241 #define VMCB_CTRL_INTERCEPT_VEC(x) __BIT(x) 242 243 uint32_t intercept_misc1; 244 #define VMCB_CTRL_INTERCEPT_INTR __BIT(0) 245 #define VMCB_CTRL_INTERCEPT_NMI __BIT(1) 246 #define VMCB_CTRL_INTERCEPT_SMI __BIT(2) 247 #define VMCB_CTRL_INTERCEPT_INIT __BIT(3) 248 #define VMCB_CTRL_INTERCEPT_VINTR __BIT(4) 249 #define VMCB_CTRL_INTERCEPT_CR0_SPEC __BIT(5) 250 #define VMCB_CTRL_INTERCEPT_RIDTR __BIT(6) 251 #define VMCB_CTRL_INTERCEPT_RGDTR __BIT(7) 252 #define VMCB_CTRL_INTERCEPT_RLDTR __BIT(8) 253 #define VMCB_CTRL_INTERCEPT_RTR __BIT(9) 254 #define VMCB_CTRL_INTERCEPT_WIDTR __BIT(10) 255 #define VMCB_CTRL_INTERCEPT_WGDTR __BIT(11) 256 #define VMCB_CTRL_INTERCEPT_WLDTR __BIT(12) 257 #define VMCB_CTRL_INTERCEPT_WTR __BIT(13) 258 #define VMCB_CTRL_INTERCEPT_RDTSC __BIT(14) 259 #define VMCB_CTRL_INTERCEPT_RDPMC __BIT(15) 260 #define VMCB_CTRL_INTERCEPT_PUSHF __BIT(16) 261 #define VMCB_CTRL_INTERCEPT_POPF __BIT(17) 262 #define VMCB_CTRL_INTERCEPT_CPUID __BIT(18) 263 #define VMCB_CTRL_INTERCEPT_RSM __BIT(19) 264 #define VMCB_CTRL_INTERCEPT_IRET __BIT(20) 265 #define VMCB_CTRL_INTERCEPT_INTN __BIT(21) 266 #define VMCB_CTRL_INTERCEPT_INVD __BIT(22) 267 #define VMCB_CTRL_INTERCEPT_PAUSE __BIT(23) 268 #define VMCB_CTRL_INTERCEPT_HLT __BIT(24) 269 #define VMCB_CTRL_INTERCEPT_INVLPG __BIT(25) 270 #define VMCB_CTRL_INTERCEPT_INVLPGA __BIT(26) 271 #define VMCB_CTRL_INTERCEPT_IOIO_PROT __BIT(27) 272 #define VMCB_CTRL_INTERCEPT_MSR_PROT __BIT(28) 273 #define VMCB_CTRL_INTERCEPT_TASKSW __BIT(29) 274 #define VMCB_CTRL_INTERCEPT_FERR_FREEZE __BIT(30) 275 #define VMCB_CTRL_INTERCEPT_SHUTDOWN __BIT(31) 276 277 uint32_t intercept_misc2; 278 #define VMCB_CTRL_INTERCEPT_VMRUN __BIT(0) 279 #define VMCB_CTRL_INTERCEPT_VMMCALL __BIT(1) 280 #define VMCB_CTRL_INTERCEPT_VMLOAD __BIT(2) 281 #define VMCB_CTRL_INTERCEPT_VMSAVE __BIT(3) 282 #define VMCB_CTRL_INTERCEPT_STGI __BIT(4) 283 #define VMCB_CTRL_INTERCEPT_CLGI __BIT(5) 284 #define VMCB_CTRL_INTERCEPT_SKINIT __BIT(6) 285 #define VMCB_CTRL_INTERCEPT_RDTSCP __BIT(7) 286 #define VMCB_CTRL_INTERCEPT_ICEBP __BIT(8) 287 #define VMCB_CTRL_INTERCEPT_WBINVD __BIT(9) 288 #define VMCB_CTRL_INTERCEPT_MONITOR __BIT(10) 289 #define VMCB_CTRL_INTERCEPT_MWAIT __BIT(11) 290 #define VMCB_CTRL_INTERCEPT_MWAIT_ARMED __BIT(12) 291 #define VMCB_CTRL_INTERCEPT_XSETBV __BIT(13) 292 #define VMCB_CTRL_INTERCEPT_RDPRU __BIT(14) 293 #define VMCB_CTRL_INTERCEPT_EFER_SPEC __BIT(15) 294 #define VMCB_CTRL_INTERCEPT_WCR_SPEC(x) __BIT(16 + x) 295 296 uint32_t intercept_misc3; 297 #define VMCB_CTRL_INTERCEPT_INVLPGB_ALL __BIT(0) 298 #define VMCB_CTRL_INTERCEPT_INVLPGB_ILL __BIT(1) 299 #define VMCB_CTRL_INTERCEPT_PCID __BIT(2) 300 #define VMCB_CTRL_INTERCEPT_MCOMMIT __BIT(3) 301 #define VMCB_CTRL_INTERCEPT_TLBSYNC __BIT(4) 302 303 uint8_t rsvd1[36]; 304 uint16_t pause_filt_thresh; 305 uint16_t pause_filt_cnt; 306 uint64_t iopm_base_pa; 307 uint64_t msrpm_base_pa; 308 uint64_t tsc_offset; 309 uint32_t guest_asid; 310 311 uint32_t tlb_ctrl; 312 #define VMCB_CTRL_TLB_CTRL_FLUSH_ALL 0x01 313 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST 0x03 314 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST_NONGLOBAL 0x07 315 316 uint64_t v; 317 #define VMCB_CTRL_V_TPR __BITS(3,0) 318 #define VMCB_CTRL_V_IRQ __BIT(8) 319 #define VMCB_CTRL_V_VGIF __BIT(9) 320 #define VMCB_CTRL_V_INTR_PRIO __BITS(19,16) 321 #define VMCB_CTRL_V_IGN_TPR __BIT(20) 322 #define VMCB_CTRL_V_INTR_MASKING __BIT(24) 323 #define VMCB_CTRL_V_GUEST_VGIF __BIT(25) 324 #define VMCB_CTRL_V_AVIC_EN __BIT(31) 325 #define VMCB_CTRL_V_INTR_VECTOR __BITS(39,32) 326 327 uint64_t intr; 328 #define VMCB_CTRL_INTR_SHADOW __BIT(0) 329 #define VMCB_CTRL_INTR_MASK __BIT(1) 330 331 uint64_t exitcode; 332 uint64_t exitinfo1; 333 uint64_t exitinfo2; 334 335 uint64_t exitintinfo; 336 #define VMCB_CTRL_EXITINTINFO_VECTOR __BITS(7,0) 337 #define VMCB_CTRL_EXITINTINFO_TYPE __BITS(10,8) 338 #define VMCB_CTRL_EXITINTINFO_EV __BIT(11) 339 #define VMCB_CTRL_EXITINTINFO_V __BIT(31) 340 #define VMCB_CTRL_EXITINTINFO_ERRORCODE __BITS(63,32) 341 342 uint64_t enable1; 343 #define VMCB_CTRL_ENABLE_NP __BIT(0) 344 #define VMCB_CTRL_ENABLE_SEV __BIT(1) 345 #define VMCB_CTRL_ENABLE_ES_SEV __BIT(2) 346 #define VMCB_CTRL_ENABLE_GMET __BIT(3) 347 #define VMCB_CTRL_ENABLE_VTE __BIT(5) 348 349 uint64_t avic; 350 #define VMCB_CTRL_AVIC_APIC_BAR __BITS(51,0) 351 352 uint64_t ghcb; 353 354 uint64_t eventinj; 355 #define VMCB_CTRL_EVENTINJ_VECTOR __BITS(7,0) 356 #define VMCB_CTRL_EVENTINJ_TYPE __BITS(10,8) 357 #define VMCB_CTRL_EVENTINJ_EV __BIT(11) 358 #define VMCB_CTRL_EVENTINJ_V __BIT(31) 359 #define VMCB_CTRL_EVENTINJ_ERRORCODE __BITS(63,32) 360 361 uint64_t n_cr3; 362 363 uint64_t enable2; 364 #define VMCB_CTRL_ENABLE_LBR __BIT(0) 365 #define VMCB_CTRL_ENABLE_VVMSAVE __BIT(1) 366 367 uint32_t vmcb_clean; 368 #define VMCB_CTRL_VMCB_CLEAN_I __BIT(0) 369 #define VMCB_CTRL_VMCB_CLEAN_IOPM __BIT(1) 370 #define VMCB_CTRL_VMCB_CLEAN_ASID __BIT(2) 371 #define VMCB_CTRL_VMCB_CLEAN_TPR __BIT(3) 372 #define VMCB_CTRL_VMCB_CLEAN_NP __BIT(4) 373 #define VMCB_CTRL_VMCB_CLEAN_CR __BIT(5) 374 #define VMCB_CTRL_VMCB_CLEAN_DR __BIT(6) 375 #define VMCB_CTRL_VMCB_CLEAN_DT __BIT(7) 376 #define VMCB_CTRL_VMCB_CLEAN_SEG __BIT(8) 377 #define VMCB_CTRL_VMCB_CLEAN_CR2 __BIT(9) 378 #define VMCB_CTRL_VMCB_CLEAN_LBR __BIT(10) 379 #define VMCB_CTRL_VMCB_CLEAN_AVIC __BIT(11) 380 381 uint32_t rsvd2; 382 uint64_t nrip; 383 uint8_t inst_len; 384 uint8_t inst_bytes[15]; 385 uint64_t avic_abpp; 386 uint64_t rsvd3; 387 uint64_t avic_ltp; 388 389 uint64_t avic_phys; 390 #define VMCB_CTRL_AVIC_PHYS_TABLE_PTR __BITS(51,12) 391 #define VMCB_CTRL_AVIC_PHYS_MAX_INDEX __BITS(7,0) 392 393 uint64_t rsvd4; 394 uint64_t vmsa_ptr; 395 396 uint8_t pad[752]; 397 } __packed; 398 399 CTASSERT(sizeof(struct vmcb_ctrl) == 1024); 400 401 struct vmcb_segment { 402 uint16_t selector; 403 uint16_t attrib; /* hidden */ 404 uint32_t limit; /* hidden */ 405 uint64_t base; /* hidden */ 406 } __packed; 407 408 CTASSERT(sizeof(struct vmcb_segment) == 16); 409 410 struct vmcb_state { 411 struct vmcb_segment es; 412 struct vmcb_segment cs; 413 struct vmcb_segment ss; 414 struct vmcb_segment ds; 415 struct vmcb_segment fs; 416 struct vmcb_segment gs; 417 struct vmcb_segment gdt; 418 struct vmcb_segment ldt; 419 struct vmcb_segment idt; 420 struct vmcb_segment tr; 421 uint8_t rsvd1[43]; 422 uint8_t cpl; 423 uint8_t rsvd2[4]; 424 uint64_t efer; 425 uint8_t rsvd3[112]; 426 uint64_t cr4; 427 uint64_t cr3; 428 uint64_t cr0; 429 uint64_t dr7; 430 uint64_t dr6; 431 uint64_t rflags; 432 uint64_t rip; 433 uint8_t rsvd4[88]; 434 uint64_t rsp; 435 uint8_t rsvd5[24]; 436 uint64_t rax; 437 uint64_t star; 438 uint64_t lstar; 439 uint64_t cstar; 440 uint64_t sfmask; 441 uint64_t kernelgsbase; 442 uint64_t sysenter_cs; 443 uint64_t sysenter_esp; 444 uint64_t sysenter_eip; 445 uint64_t cr2; 446 uint8_t rsvd6[32]; 447 uint64_t g_pat; 448 uint64_t dbgctl; 449 uint64_t br_from; 450 uint64_t br_to; 451 uint64_t int_from; 452 uint64_t int_to; 453 uint8_t pad[2408]; 454 } __packed; 455 456 CTASSERT(sizeof(struct vmcb_state) == 0xC00); 457 458 struct vmcb { 459 struct vmcb_ctrl ctrl; 460 struct vmcb_state state; 461 } __packed; 462 463 CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); 464 CTASSERT(offsetof(struct vmcb, state) == 0x400); 465 466 /* -------------------------------------------------------------------------- */ 467 468 static void svm_vcpu_state_provide(struct nvmm_cpu *, uint64_t); 469 static void svm_vcpu_state_commit(struct nvmm_cpu *); 470 471 struct svm_hsave { 472 paddr_t pa; 473 }; 474 475 static struct svm_hsave hsave[MAXCPUS]; 476 477 static uint8_t *svm_asidmap __read_mostly; 478 static uint32_t svm_maxasid __read_mostly; 479 static kmutex_t svm_asidlock __cacheline_aligned; 480 481 static bool svm_decode_assist __read_mostly; 482 static uint32_t svm_ctrl_tlb_flush __read_mostly; 483 484 #define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 485 static uint64_t svm_xcr0_mask __read_mostly; 486 487 #define SVM_NCPUIDS 32 488 489 #define VMCB_NPAGES 1 490 491 #define MSRBM_NPAGES 2 492 #define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 493 494 #define IOBM_NPAGES 3 495 #define IOBM_SIZE (IOBM_NPAGES * PAGE_SIZE) 496 497 /* Does not include EFER_LMSLE. */ 498 #define EFER_VALID \ 499 (EFER_SCE|EFER_LME|EFER_LMA|EFER_NXE|EFER_SVME|EFER_FFXSR|EFER_TCE) 500 501 #define EFER_TLB_FLUSH \ 502 (EFER_NXE|EFER_LMA|EFER_LME) 503 #define CR0_TLB_FLUSH \ 504 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 505 #define CR4_TLB_FLUSH \ 506 (CR4_PSE|CR4_PAE|CR4_PGE|CR4_PCIDE|CR4_SMEP) 507 508 /* -------------------------------------------------------------------------- */ 509 510 struct svm_machdata { 511 volatile uint64_t mach_htlb_gen; 512 }; 513 514 static const size_t svm_vcpu_conf_sizes[NVMM_X86_VCPU_NCONF] = { 515 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID)] = 516 sizeof(struct nvmm_vcpu_conf_cpuid), 517 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_TPR)] = 518 sizeof(struct nvmm_vcpu_conf_tpr) 519 }; 520 521 struct svm_cpudata { 522 /* General */ 523 bool shared_asid; 524 bool gtlb_want_flush; 525 bool gtsc_want_update; 526 uint64_t vcpu_htlb_gen; 527 528 /* VMCB */ 529 struct vmcb *vmcb; 530 paddr_t vmcb_pa; 531 532 /* I/O bitmap */ 533 uint8_t *iobm; 534 paddr_t iobm_pa; 535 536 /* MSR bitmap */ 537 uint8_t *msrbm; 538 paddr_t msrbm_pa; 539 540 /* Host state */ 541 uint64_t hxcr0; 542 uint64_t star; 543 uint64_t lstar; 544 uint64_t cstar; 545 uint64_t sfmask; 546 uint64_t fsbase; 547 uint64_t kernelgsbase; 548 bool ts_set; 549 mcontext_t hmctx; 550 551 /* Intr state */ 552 bool int_window_exit; 553 bool nmi_window_exit; 554 bool evt_pending; 555 556 /* Guest state */ 557 uint64_t gxcr0; 558 uint64_t gprs[NVMM_X64_NGPR]; 559 uint64_t drs[NVMM_X64_NDR]; 560 uint64_t gtsc; 561 union savefpu gfpu __aligned(64); 562 563 /* VCPU configuration. */ 564 bool cpuidpresent[SVM_NCPUIDS]; 565 struct nvmm_vcpu_conf_cpuid cpuid[SVM_NCPUIDS]; 566 }; 567 568 static void 569 svm_vmcb_cache_default(struct vmcb *vmcb) 570 { 571 vmcb->ctrl.vmcb_clean = 572 VMCB_CTRL_VMCB_CLEAN_I | 573 VMCB_CTRL_VMCB_CLEAN_IOPM | 574 VMCB_CTRL_VMCB_CLEAN_ASID | 575 VMCB_CTRL_VMCB_CLEAN_TPR | 576 VMCB_CTRL_VMCB_CLEAN_NP | 577 VMCB_CTRL_VMCB_CLEAN_CR | 578 VMCB_CTRL_VMCB_CLEAN_DR | 579 VMCB_CTRL_VMCB_CLEAN_DT | 580 VMCB_CTRL_VMCB_CLEAN_SEG | 581 VMCB_CTRL_VMCB_CLEAN_CR2 | 582 VMCB_CTRL_VMCB_CLEAN_LBR | 583 VMCB_CTRL_VMCB_CLEAN_AVIC; 584 } 585 586 static void 587 svm_vmcb_cache_update(struct vmcb *vmcb, uint64_t flags) 588 { 589 if (flags & NVMM_X64_STATE_SEGS) { 590 vmcb->ctrl.vmcb_clean &= 591 ~(VMCB_CTRL_VMCB_CLEAN_SEG | VMCB_CTRL_VMCB_CLEAN_DT); 592 } 593 if (flags & NVMM_X64_STATE_CRS) { 594 vmcb->ctrl.vmcb_clean &= 595 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_CR2 | 596 VMCB_CTRL_VMCB_CLEAN_TPR); 597 } 598 if (flags & NVMM_X64_STATE_DRS) { 599 vmcb->ctrl.vmcb_clean &= ~VMCB_CTRL_VMCB_CLEAN_DR; 600 } 601 if (flags & NVMM_X64_STATE_MSRS) { 602 /* CR for EFER, NP for PAT. */ 603 vmcb->ctrl.vmcb_clean &= 604 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_NP); 605 } 606 } 607 608 static inline void 609 svm_vmcb_cache_flush(struct vmcb *vmcb, uint64_t flags) 610 { 611 vmcb->ctrl.vmcb_clean &= ~flags; 612 } 613 614 static inline void 615 svm_vmcb_cache_flush_all(struct vmcb *vmcb) 616 { 617 vmcb->ctrl.vmcb_clean = 0; 618 } 619 620 #define SVM_EVENT_TYPE_HW_INT 0 621 #define SVM_EVENT_TYPE_NMI 2 622 #define SVM_EVENT_TYPE_EXC 3 623 #define SVM_EVENT_TYPE_SW_INT 4 624 625 static void 626 svm_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 627 { 628 struct svm_cpudata *cpudata = vcpu->cpudata; 629 struct vmcb *vmcb = cpudata->vmcb; 630 631 if (nmi) { 632 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_IRET; 633 cpudata->nmi_window_exit = true; 634 } else { 635 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_VINTR; 636 vmcb->ctrl.v |= (VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 637 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 638 cpudata->int_window_exit = true; 639 } 640 641 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 642 } 643 644 static void 645 svm_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 646 { 647 struct svm_cpudata *cpudata = vcpu->cpudata; 648 struct vmcb *vmcb = cpudata->vmcb; 649 650 if (nmi) { 651 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_IRET; 652 cpudata->nmi_window_exit = false; 653 } else { 654 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_VINTR; 655 vmcb->ctrl.v &= ~(VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 656 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 657 cpudata->int_window_exit = false; 658 } 659 660 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 661 } 662 663 static inline bool 664 svm_excp_has_rf(uint8_t vector) 665 { 666 switch (vector) { 667 case 1: /* #DB */ 668 case 4: /* #OF */ 669 case 8: /* #DF */ 670 case 18: /* #MC */ 671 return false; 672 default: 673 return true; 674 } 675 } 676 677 static inline int 678 svm_excp_has_error(uint8_t vector) 679 { 680 switch (vector) { 681 case 8: /* #DF */ 682 case 10: /* #TS */ 683 case 11: /* #NP */ 684 case 12: /* #SS */ 685 case 13: /* #GP */ 686 case 14: /* #PF */ 687 case 17: /* #AC */ 688 case 30: /* #SX */ 689 return 1; 690 default: 691 return 0; 692 } 693 } 694 695 static int 696 svm_vcpu_inject(struct nvmm_cpu *vcpu) 697 { 698 struct nvmm_comm_page *comm = vcpu->comm; 699 struct svm_cpudata *cpudata = vcpu->cpudata; 700 struct vmcb *vmcb = cpudata->vmcb; 701 u_int evtype; 702 uint8_t vector; 703 uint64_t error; 704 int type = 0, err = 0; 705 706 evtype = comm->event.type; 707 vector = comm->event.vector; 708 error = comm->event.u.excp.error; 709 __insn_barrier(); 710 711 switch (evtype) { 712 case NVMM_VCPU_EVENT_EXCP: 713 type = SVM_EVENT_TYPE_EXC; 714 if (vector == 2 || vector >= 32) 715 return EINVAL; 716 if (vector == 3 || vector == 0) 717 return EINVAL; 718 if (svm_excp_has_rf(vector)) { 719 vmcb->state.rflags |= PSL_RF; 720 } 721 err = svm_excp_has_error(vector); 722 break; 723 case NVMM_VCPU_EVENT_INTR: 724 type = SVM_EVENT_TYPE_HW_INT; 725 if (vector == 2) { 726 type = SVM_EVENT_TYPE_NMI; 727 svm_event_waitexit_enable(vcpu, true); 728 } 729 err = 0; 730 break; 731 default: 732 return EINVAL; 733 } 734 735 vmcb->ctrl.eventinj = 736 __SHIFTIN((uint64_t)vector, VMCB_CTRL_EVENTINJ_VECTOR) | 737 __SHIFTIN((uint64_t)type, VMCB_CTRL_EVENTINJ_TYPE) | 738 __SHIFTIN((uint64_t)err, VMCB_CTRL_EVENTINJ_EV) | 739 __SHIFTIN((uint64_t)1, VMCB_CTRL_EVENTINJ_V) | 740 __SHIFTIN((uint64_t)error, VMCB_CTRL_EVENTINJ_ERRORCODE); 741 742 cpudata->evt_pending = true; 743 744 return 0; 745 } 746 747 static void 748 svm_inject_ud(struct nvmm_cpu *vcpu) 749 { 750 struct nvmm_comm_page *comm = vcpu->comm; 751 int ret __diagused; 752 753 comm->event.type = NVMM_VCPU_EVENT_EXCP; 754 comm->event.vector = 6; 755 comm->event.u.excp.error = 0; 756 757 ret = svm_vcpu_inject(vcpu); 758 KASSERT(ret == 0); 759 } 760 761 static void 762 svm_inject_gp(struct nvmm_cpu *vcpu) 763 { 764 struct nvmm_comm_page *comm = vcpu->comm; 765 int ret __diagused; 766 767 comm->event.type = NVMM_VCPU_EVENT_EXCP; 768 comm->event.vector = 13; 769 comm->event.u.excp.error = 0; 770 771 ret = svm_vcpu_inject(vcpu); 772 KASSERT(ret == 0); 773 } 774 775 static inline int 776 svm_vcpu_event_commit(struct nvmm_cpu *vcpu) 777 { 778 if (__predict_true(!vcpu->comm->event_commit)) { 779 return 0; 780 } 781 vcpu->comm->event_commit = false; 782 return svm_vcpu_inject(vcpu); 783 } 784 785 static inline void 786 svm_inkernel_advance(struct vmcb *vmcb) 787 { 788 /* 789 * Maybe we should also apply single-stepping and debug exceptions. 790 * Matters for guest-ring3, because it can execute 'cpuid' under a 791 * debugger. 792 */ 793 vmcb->state.rip = vmcb->ctrl.nrip; 794 vmcb->state.rflags &= ~PSL_RF; 795 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 796 } 797 798 #define SVM_CPUID_MAX_BASIC 0xD 799 #define SVM_CPUID_MAX_HYPERVISOR 0x40000000 800 #define SVM_CPUID_MAX_EXTENDED 0x8000001F 801 static uint32_t svm_cpuid_max_basic __read_mostly; 802 static uint32_t svm_cpuid_max_extended __read_mostly; 803 804 static void 805 svm_inkernel_exec_cpuid(struct svm_cpudata *cpudata, uint64_t eax, uint64_t ecx) 806 { 807 u_int descs[4]; 808 809 x86_cpuid2(eax, ecx, descs); 810 cpudata->vmcb->state.rax = descs[0]; 811 cpudata->gprs[NVMM_X64_GPR_RBX] = descs[1]; 812 cpudata->gprs[NVMM_X64_GPR_RCX] = descs[2]; 813 cpudata->gprs[NVMM_X64_GPR_RDX] = descs[3]; 814 } 815 816 static void 817 svm_inkernel_handle_cpuid(struct nvmm_cpu *vcpu, uint64_t eax, uint64_t ecx) 818 { 819 struct svm_cpudata *cpudata = vcpu->cpudata; 820 uint64_t cr4; 821 822 if (eax < 0x40000000) { 823 if (__predict_false(eax > svm_cpuid_max_basic)) { 824 eax = svm_cpuid_max_basic; 825 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 826 } 827 } else if (eax < 0x80000000) { 828 if (__predict_false(eax > SVM_CPUID_MAX_HYPERVISOR)) { 829 eax = svm_cpuid_max_basic; 830 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 831 } 832 } else { 833 if (__predict_false(eax > svm_cpuid_max_extended)) { 834 eax = svm_cpuid_max_basic; 835 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 836 } 837 } 838 839 switch (eax) { 840 case 0x00000000: 841 cpudata->vmcb->state.rax = svm_cpuid_max_basic; 842 break; 843 case 0x00000001: 844 cpudata->vmcb->state.rax &= nvmm_cpuid_00000001.eax; 845 846 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_LOCAL_APIC_ID; 847 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 848 CPUID_LOCAL_APIC_ID); 849 850 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 851 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID2_RAZ; 852 853 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 854 855 /* CPUID2_OSXSAVE depends on CR4. */ 856 cr4 = cpudata->vmcb->state.cr4; 857 if (!(cr4 & CR4_OSXSAVE)) { 858 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID2_OSXSAVE; 859 } 860 break; 861 case 0x00000002: /* Empty */ 862 case 0x00000003: /* Empty */ 863 case 0x00000004: /* Empty */ 864 case 0x00000005: /* Monitor/MWait */ 865 case 0x00000006: /* Power Management Related Features */ 866 cpudata->vmcb->state.rax = 0; 867 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 868 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 869 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 870 break; 871 case 0x00000007: /* Structured Extended Features */ 872 switch (ecx) { 873 case 0: 874 cpudata->vmcb->state.rax = 0; 875 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 876 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 877 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 878 break; 879 default: 880 cpudata->vmcb->state.rax = 0; 881 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 882 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 883 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 884 break; 885 } 886 break; 887 case 0x00000008: /* Empty */ 888 case 0x00000009: /* Empty */ 889 case 0x0000000A: /* Empty */ 890 case 0x0000000B: /* Empty */ 891 case 0x0000000C: /* Empty */ 892 cpudata->vmcb->state.rax = 0; 893 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 894 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 895 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 896 break; 897 case 0x0000000D: /* Processor Extended State Enumeration */ 898 if (svm_xcr0_mask == 0) { 899 break; 900 } 901 switch (ecx) { 902 case 0: 903 cpudata->vmcb->state.rax = svm_xcr0_mask & 0xFFFFFFFF; 904 if (cpudata->gxcr0 & XCR0_SSE) { 905 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct savexmm64); 906 } else { 907 cpudata->gprs[NVMM_X64_GPR_RBX] = sizeof(struct save87); 908 } 909 cpudata->gprs[NVMM_X64_GPR_RBX] += 64; /* XSAVE header */ 910 cpudata->gprs[NVMM_X64_GPR_RCX] = sizeof(struct savexmm64) + 64; 911 cpudata->gprs[NVMM_X64_GPR_RDX] = svm_xcr0_mask >> 32; 912 break; 913 case 1: 914 cpudata->vmcb->state.rax &= 915 (CPUID_PES1_XSAVEOPT | CPUID_PES1_XSAVEC | 916 CPUID_PES1_XGETBV); 917 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 918 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 919 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 920 break; 921 default: 922 cpudata->vmcb->state.rax = 0; 923 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 924 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 925 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 926 break; 927 } 928 break; 929 930 case 0x40000000: /* Hypervisor Information */ 931 cpudata->vmcb->state.rax = SVM_CPUID_MAX_HYPERVISOR; 932 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 933 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 934 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 935 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 936 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 937 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 938 break; 939 940 case 0x80000000: 941 cpudata->vmcb->state.rax = svm_cpuid_max_extended; 942 break; 943 case 0x80000001: 944 cpudata->vmcb->state.rax &= nvmm_cpuid_80000001.eax; 945 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 946 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 947 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 948 break; 949 case 0x80000002: /* Extended Processor Name String */ 950 case 0x80000003: /* Extended Processor Name String */ 951 case 0x80000004: /* Extended Processor Name String */ 952 case 0x80000005: /* L1 Cache and TLB Information */ 953 case 0x80000006: /* L2 Cache and TLB and L3 Cache Information */ 954 break; 955 case 0x80000007: /* Processor Power Management and RAS Capabilities */ 956 cpudata->vmcb->state.rax &= nvmm_cpuid_80000007.eax; 957 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000007.ebx; 958 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000007.ecx; 959 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000007.edx; 960 break; 961 case 0x80000008: /* Processor Capacity Parameters and Ext Feat Ident */ 962 cpudata->vmcb->state.rax &= nvmm_cpuid_80000008.eax; 963 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000008.ebx; 964 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000008.ecx; 965 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000008.edx; 966 break; 967 case 0x80000009: /* Empty */ 968 case 0x8000000A: /* SVM Features */ 969 case 0x8000000B: /* Empty */ 970 case 0x8000000C: /* Empty */ 971 case 0x8000000D: /* Empty */ 972 case 0x8000000E: /* Empty */ 973 case 0x8000000F: /* Empty */ 974 case 0x80000010: /* Empty */ 975 case 0x80000011: /* Empty */ 976 case 0x80000012: /* Empty */ 977 case 0x80000013: /* Empty */ 978 case 0x80000014: /* Empty */ 979 case 0x80000015: /* Empty */ 980 case 0x80000016: /* Empty */ 981 case 0x80000017: /* Empty */ 982 case 0x80000018: /* Empty */ 983 cpudata->vmcb->state.rax = 0; 984 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 985 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 986 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 987 break; 988 case 0x80000019: /* TLB Characteristics for 1GB pages */ 989 case 0x8000001A: /* Instruction Optimizations */ 990 break; 991 case 0x8000001B: /* Instruction-Based Sampling Capabilities */ 992 case 0x8000001C: /* Lightweight Profiling Capabilities */ 993 cpudata->vmcb->state.rax = 0; 994 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 995 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 996 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 997 break; 998 case 0x8000001D: /* Cache Topology Information */ 999 case 0x8000001E: /* Processor Topology Information */ 1000 break; /* TODO? */ 1001 case 0x8000001F: /* Encrypted Memory Capabilities */ 1002 cpudata->vmcb->state.rax = 0; 1003 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1004 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1005 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1006 break; 1007 1008 default: 1009 break; 1010 } 1011 } 1012 1013 static void 1014 svm_exit_insn(struct vmcb *vmcb, struct nvmm_vcpu_exit *exit, uint64_t reason) 1015 { 1016 exit->u.insn.npc = vmcb->ctrl.nrip; 1017 exit->reason = reason; 1018 } 1019 1020 static void 1021 svm_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1022 struct nvmm_vcpu_exit *exit) 1023 { 1024 struct svm_cpudata *cpudata = vcpu->cpudata; 1025 struct nvmm_vcpu_conf_cpuid *cpuid; 1026 uint64_t eax, ecx; 1027 size_t i; 1028 1029 eax = cpudata->vmcb->state.rax; 1030 ecx = cpudata->gprs[NVMM_X64_GPR_RCX]; 1031 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 1032 svm_inkernel_handle_cpuid(vcpu, eax, ecx); 1033 1034 for (i = 0; i < SVM_NCPUIDS; i++) { 1035 if (!cpudata->cpuidpresent[i]) { 1036 continue; 1037 } 1038 cpuid = &cpudata->cpuid[i]; 1039 if (cpuid->leaf != eax) { 1040 continue; 1041 } 1042 1043 if (cpuid->exit) { 1044 svm_exit_insn(cpudata->vmcb, exit, NVMM_VCPU_EXIT_CPUID); 1045 return; 1046 } 1047 KASSERT(cpuid->mask); 1048 1049 /* del */ 1050 cpudata->vmcb->state.rax &= ~cpuid->u.mask.del.eax; 1051 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->u.mask.del.ebx; 1052 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->u.mask.del.ecx; 1053 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->u.mask.del.edx; 1054 1055 /* set */ 1056 cpudata->vmcb->state.rax |= cpuid->u.mask.set.eax; 1057 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->u.mask.set.ebx; 1058 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->u.mask.set.ecx; 1059 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->u.mask.set.edx; 1060 1061 break; 1062 } 1063 1064 svm_inkernel_advance(cpudata->vmcb); 1065 exit->reason = NVMM_VCPU_EXIT_NONE; 1066 } 1067 1068 static void 1069 svm_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1070 struct nvmm_vcpu_exit *exit) 1071 { 1072 struct svm_cpudata *cpudata = vcpu->cpudata; 1073 struct vmcb *vmcb = cpudata->vmcb; 1074 1075 if (cpudata->int_window_exit && (vmcb->state.rflags & PSL_I)) { 1076 svm_event_waitexit_disable(vcpu, false); 1077 } 1078 1079 svm_inkernel_advance(cpudata->vmcb); 1080 exit->reason = NVMM_VCPU_EXIT_HALTED; 1081 } 1082 1083 #define SVM_EXIT_IO_PORT __BITS(31,16) 1084 #define SVM_EXIT_IO_SEG __BITS(12,10) 1085 #define SVM_EXIT_IO_A64 __BIT(9) 1086 #define SVM_EXIT_IO_A32 __BIT(8) 1087 #define SVM_EXIT_IO_A16 __BIT(7) 1088 #define SVM_EXIT_IO_SZ32 __BIT(6) 1089 #define SVM_EXIT_IO_SZ16 __BIT(5) 1090 #define SVM_EXIT_IO_SZ8 __BIT(4) 1091 #define SVM_EXIT_IO_REP __BIT(3) 1092 #define SVM_EXIT_IO_STR __BIT(2) 1093 #define SVM_EXIT_IO_IN __BIT(0) 1094 1095 static void 1096 svm_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1097 struct nvmm_vcpu_exit *exit) 1098 { 1099 struct svm_cpudata *cpudata = vcpu->cpudata; 1100 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1101 uint64_t nextpc = cpudata->vmcb->ctrl.exitinfo2; 1102 1103 exit->reason = NVMM_VCPU_EXIT_IO; 1104 1105 exit->u.io.in = (info & SVM_EXIT_IO_IN) != 0; 1106 exit->u.io.port = __SHIFTOUT(info, SVM_EXIT_IO_PORT); 1107 1108 if (svm_decode_assist) { 1109 KASSERT(__SHIFTOUT(info, SVM_EXIT_IO_SEG) < 6); 1110 exit->u.io.seg = __SHIFTOUT(info, SVM_EXIT_IO_SEG); 1111 } else { 1112 exit->u.io.seg = -1; 1113 } 1114 1115 if (info & SVM_EXIT_IO_A64) { 1116 exit->u.io.address_size = 8; 1117 } else if (info & SVM_EXIT_IO_A32) { 1118 exit->u.io.address_size = 4; 1119 } else if (info & SVM_EXIT_IO_A16) { 1120 exit->u.io.address_size = 2; 1121 } 1122 1123 if (info & SVM_EXIT_IO_SZ32) { 1124 exit->u.io.operand_size = 4; 1125 } else if (info & SVM_EXIT_IO_SZ16) { 1126 exit->u.io.operand_size = 2; 1127 } else if (info & SVM_EXIT_IO_SZ8) { 1128 exit->u.io.operand_size = 1; 1129 } 1130 1131 exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0; 1132 exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0; 1133 exit->u.io.npc = nextpc; 1134 1135 svm_vcpu_state_provide(vcpu, 1136 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1137 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1138 } 1139 1140 static const uint64_t msr_ignore_list[] = { 1141 0xc0010055, /* MSR_CMPHALT */ 1142 MSR_DE_CFG, 1143 MSR_IC_CFG, 1144 MSR_UCODE_AMD_PATCHLEVEL 1145 }; 1146 1147 static bool 1148 svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1149 struct nvmm_vcpu_exit *exit) 1150 { 1151 struct svm_cpudata *cpudata = vcpu->cpudata; 1152 struct vmcb *vmcb = cpudata->vmcb; 1153 uint64_t val; 1154 size_t i; 1155 1156 if (exit->reason == NVMM_VCPU_EXIT_RDMSR) { 1157 if (exit->u.rdmsr.msr == MSR_EFER) { 1158 val = vmcb->state.efer & ~EFER_SVME; 1159 vmcb->state.rax = (val & 0xFFFFFFFF); 1160 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1161 goto handled; 1162 } 1163 if (exit->u.rdmsr.msr == MSR_NB_CFG) { 1164 val = NB_CFG_INITAPICCPUIDLO; 1165 vmcb->state.rax = (val & 0xFFFFFFFF); 1166 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1167 goto handled; 1168 } 1169 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1170 if (msr_ignore_list[i] != exit->u.rdmsr.msr) 1171 continue; 1172 val = 0; 1173 vmcb->state.rax = (val & 0xFFFFFFFF); 1174 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1175 goto handled; 1176 } 1177 } else { 1178 if (exit->u.wrmsr.msr == MSR_EFER) { 1179 if (__predict_false(exit->u.wrmsr.val & ~EFER_VALID)) { 1180 goto error; 1181 } 1182 if ((vmcb->state.efer ^ exit->u.wrmsr.val) & 1183 EFER_TLB_FLUSH) { 1184 cpudata->gtlb_want_flush = true; 1185 } 1186 vmcb->state.efer = exit->u.wrmsr.val | EFER_SVME; 1187 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_CR); 1188 goto handled; 1189 } 1190 if (exit->u.wrmsr.msr == MSR_TSC) { 1191 cpudata->gtsc = exit->u.wrmsr.val; 1192 cpudata->gtsc_want_update = true; 1193 goto handled; 1194 } 1195 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1196 if (msr_ignore_list[i] != exit->u.wrmsr.msr) 1197 continue; 1198 goto handled; 1199 } 1200 } 1201 1202 return false; 1203 1204 handled: 1205 svm_inkernel_advance(cpudata->vmcb); 1206 return true; 1207 1208 error: 1209 svm_inject_gp(vcpu); 1210 return true; 1211 } 1212 1213 static inline void 1214 svm_exit_rdmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1215 struct nvmm_vcpu_exit *exit) 1216 { 1217 struct svm_cpudata *cpudata = vcpu->cpudata; 1218 1219 exit->reason = NVMM_VCPU_EXIT_RDMSR; 1220 exit->u.rdmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1221 exit->u.rdmsr.npc = cpudata->vmcb->ctrl.nrip; 1222 1223 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1224 exit->reason = NVMM_VCPU_EXIT_NONE; 1225 return; 1226 } 1227 1228 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1229 } 1230 1231 static inline void 1232 svm_exit_wrmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1233 struct nvmm_vcpu_exit *exit) 1234 { 1235 struct svm_cpudata *cpudata = vcpu->cpudata; 1236 uint64_t rdx, rax; 1237 1238 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1239 rax = cpudata->vmcb->state.rax; 1240 1241 exit->reason = NVMM_VCPU_EXIT_WRMSR; 1242 exit->u.wrmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1243 exit->u.wrmsr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1244 exit->u.wrmsr.npc = cpudata->vmcb->ctrl.nrip; 1245 1246 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1247 exit->reason = NVMM_VCPU_EXIT_NONE; 1248 return; 1249 } 1250 1251 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1252 } 1253 1254 static void 1255 svm_exit_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1256 struct nvmm_vcpu_exit *exit) 1257 { 1258 struct svm_cpudata *cpudata = vcpu->cpudata; 1259 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1260 1261 if (info == 0) { 1262 svm_exit_rdmsr(mach, vcpu, exit); 1263 } else { 1264 svm_exit_wrmsr(mach, vcpu, exit); 1265 } 1266 } 1267 1268 static void 1269 svm_exit_npf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1270 struct nvmm_vcpu_exit *exit) 1271 { 1272 struct svm_cpudata *cpudata = vcpu->cpudata; 1273 gpaddr_t gpa = cpudata->vmcb->ctrl.exitinfo2; 1274 1275 exit->reason = NVMM_VCPU_EXIT_MEMORY; 1276 if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_W) 1277 exit->u.mem.prot = PROT_WRITE; 1278 else if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_I) 1279 exit->u.mem.prot = PROT_EXEC; 1280 else 1281 exit->u.mem.prot = PROT_READ; 1282 exit->u.mem.gpa = gpa; 1283 exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len; 1284 memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes, 1285 sizeof(exit->u.mem.inst_bytes)); 1286 1287 svm_vcpu_state_provide(vcpu, 1288 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1289 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1290 } 1291 1292 static void 1293 svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1294 struct nvmm_vcpu_exit *exit) 1295 { 1296 struct svm_cpudata *cpudata = vcpu->cpudata; 1297 struct vmcb *vmcb = cpudata->vmcb; 1298 uint64_t val; 1299 1300 exit->reason = NVMM_VCPU_EXIT_NONE; 1301 1302 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1303 (vmcb->state.rax & 0xFFFFFFFF); 1304 1305 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1306 goto error; 1307 } else if (__predict_false(vmcb->state.cpl != 0)) { 1308 goto error; 1309 } else if (__predict_false((val & ~svm_xcr0_mask) != 0)) { 1310 goto error; 1311 } else if (__predict_false((val & XCR0_X87) == 0)) { 1312 goto error; 1313 } 1314 1315 cpudata->gxcr0 = val; 1316 1317 svm_inkernel_advance(cpudata->vmcb); 1318 return; 1319 1320 error: 1321 svm_inject_gp(vcpu); 1322 } 1323 1324 static void 1325 svm_exit_invalid(struct nvmm_vcpu_exit *exit, uint64_t code) 1326 { 1327 exit->u.inv.hwcode = code; 1328 exit->reason = NVMM_VCPU_EXIT_INVALID; 1329 } 1330 1331 /* -------------------------------------------------------------------------- */ 1332 1333 static void 1334 svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1335 { 1336 struct svm_cpudata *cpudata = vcpu->cpudata; 1337 1338 cpudata->ts_set = (rcr0() & CR0_TS) != 0; 1339 1340 #ifdef __NetBSD__ 1341 fpu_area_save(&cpudata->hfpu, svm_xcr0_mask); 1342 fpu_area_restore(&cpudata->gfpu, svm_xcr0_mask); 1343 #else /* DragonFly */ 1344 /* 1345 * NOTE: Host FPU state depends on whether the user program used the 1346 * FPU or not. Need to use npxpush()/npxpop() to handle this. 1347 */ 1348 npxpush(&cpudata->hmctx); 1349 clts(); 1350 fpurstor(&cpudata->gfpu, svm_xcr0_mask); 1351 #endif 1352 1353 if (svm_xcr0_mask != 0) { 1354 cpudata->hxcr0 = rdxcr(0); 1355 wrxcr(0, cpudata->gxcr0); 1356 } 1357 } 1358 1359 static void 1360 svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 1361 { 1362 struct svm_cpudata *cpudata = vcpu->cpudata; 1363 1364 if (svm_xcr0_mask != 0) { 1365 cpudata->gxcr0 = rdxcr(0); 1366 wrxcr(0, cpudata->hxcr0); 1367 } 1368 1369 #ifdef __NetBSD__ 1370 fpu_area_save(&cpudata->gfpu, svm_xcr0_mask); 1371 fpu_area_restore(&cpudata->hfpu, svm_xcr0_mask); 1372 #else /* DragonFly */ 1373 fpusave(&cpudata->gfpu, svm_xcr0_mask); 1374 stts(); 1375 npxpop(&cpudata->hmctx); 1376 #endif 1377 1378 if (cpudata->ts_set) { 1379 stts(); 1380 } 1381 } 1382 1383 static void 1384 svm_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 1385 { 1386 struct svm_cpudata *cpudata = vcpu->cpudata; 1387 1388 x86_dbregs_save(curlwp); 1389 1390 ldr7(0); 1391 1392 ldr0(cpudata->drs[NVMM_X64_DR_DR0]); 1393 ldr1(cpudata->drs[NVMM_X64_DR_DR1]); 1394 ldr2(cpudata->drs[NVMM_X64_DR_DR2]); 1395 ldr3(cpudata->drs[NVMM_X64_DR_DR3]); 1396 } 1397 1398 static void 1399 svm_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 1400 { 1401 struct svm_cpudata *cpudata = vcpu->cpudata; 1402 1403 cpudata->drs[NVMM_X64_DR_DR0] = rdr0(); 1404 cpudata->drs[NVMM_X64_DR_DR1] = rdr1(); 1405 cpudata->drs[NVMM_X64_DR_DR2] = rdr2(); 1406 cpudata->drs[NVMM_X64_DR_DR3] = rdr3(); 1407 1408 x86_dbregs_restore(curlwp); 1409 } 1410 1411 static void 1412 svm_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 1413 { 1414 struct svm_cpudata *cpudata = vcpu->cpudata; 1415 1416 cpudata->fsbase = rdmsr(MSR_FSBASE); 1417 cpudata->kernelgsbase = rdmsr(MSR_KERNELGSBASE); 1418 } 1419 1420 static void 1421 svm_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 1422 { 1423 struct svm_cpudata *cpudata = vcpu->cpudata; 1424 1425 wrmsr(MSR_STAR, cpudata->star); 1426 wrmsr(MSR_LSTAR, cpudata->lstar); 1427 wrmsr(MSR_CSTAR, cpudata->cstar); 1428 wrmsr(MSR_SFMASK, cpudata->sfmask); 1429 wrmsr(MSR_FSBASE, cpudata->fsbase); 1430 wrmsr(MSR_KERNELGSBASE, cpudata->kernelgsbase); 1431 } 1432 1433 /* -------------------------------------------------------------------------- */ 1434 1435 static inline void 1436 svm_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1437 { 1438 struct svm_cpudata *cpudata = vcpu->cpudata; 1439 1440 if (vcpu->hcpu_last != hcpu || cpudata->shared_asid) { 1441 cpudata->gtlb_want_flush = true; 1442 } 1443 } 1444 1445 static inline void 1446 svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1447 { 1448 /* 1449 * Nothing to do. If an hTLB flush was needed, either the VCPU was 1450 * executing on this hCPU and the hTLB already got flushed, or it 1451 * was executing on another hCPU in which case the catchup is done 1452 * in svm_gtlb_catchup(). 1453 */ 1454 } 1455 1456 static inline uint64_t 1457 svm_htlb_flush(struct svm_machdata *machdata, struct svm_cpudata *cpudata) 1458 { 1459 struct vmcb *vmcb = cpudata->vmcb; 1460 uint64_t machgen; 1461 1462 machgen = machdata->mach_htlb_gen; 1463 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 1464 return machgen; 1465 } 1466 1467 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1468 return machgen; 1469 } 1470 1471 static inline void 1472 svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen) 1473 { 1474 struct vmcb *vmcb = cpudata->vmcb; 1475 1476 if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) { 1477 cpudata->vcpu_htlb_gen = machgen; 1478 } 1479 } 1480 1481 static inline void 1482 svm_exit_evt(struct svm_cpudata *cpudata, struct vmcb *vmcb) 1483 { 1484 cpudata->evt_pending = false; 1485 1486 if (__predict_false(vmcb->ctrl.exitintinfo & VMCB_CTRL_EXITINTINFO_V)) { 1487 vmcb->ctrl.eventinj = vmcb->ctrl.exitintinfo; 1488 cpudata->evt_pending = true; 1489 } 1490 } 1491 1492 static int 1493 svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1494 struct nvmm_vcpu_exit *exit) 1495 { 1496 struct nvmm_comm_page *comm = vcpu->comm; 1497 struct svm_machdata *machdata = mach->machdata; 1498 struct svm_cpudata *cpudata = vcpu->cpudata; 1499 struct vmcb *vmcb = cpudata->vmcb; 1500 uint64_t machgen; 1501 int hcpu, s; 1502 1503 svm_vcpu_state_commit(vcpu); 1504 comm->state_cached = 0; 1505 1506 if (__predict_false(svm_vcpu_event_commit(vcpu) != 0)) { 1507 return EINVAL; 1508 } 1509 1510 kpreempt_disable(); 1511 hcpu = mycpuid; 1512 1513 svm_gtlb_catchup(vcpu, hcpu); 1514 svm_htlb_catchup(vcpu, hcpu); 1515 1516 if (vcpu->hcpu_last != hcpu) { 1517 svm_vmcb_cache_flush_all(vmcb); 1518 cpudata->gtsc_want_update = true; 1519 } 1520 1521 svm_vcpu_guest_dbregs_enter(vcpu); 1522 svm_vcpu_guest_misc_enter(vcpu); 1523 1524 while (1) { 1525 if (cpudata->gtlb_want_flush) { 1526 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1527 } else { 1528 vmcb->ctrl.tlb_ctrl = 0; 1529 } 1530 1531 if (__predict_false(cpudata->gtsc_want_update)) { 1532 vmcb->ctrl.tsc_offset = cpudata->gtsc - rdtsc(); 1533 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 1534 } 1535 1536 s = splhigh(); 1537 machgen = svm_htlb_flush(machdata, cpudata); 1538 svm_vcpu_guest_fpu_enter(vcpu); 1539 svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); 1540 svm_vcpu_guest_fpu_leave(vcpu); 1541 svm_htlb_flush_ack(cpudata, machgen); 1542 splx(s); 1543 1544 svm_vmcb_cache_default(vmcb); 1545 1546 if (vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID) { 1547 cpudata->gtlb_want_flush = false; 1548 cpudata->gtsc_want_update = false; 1549 vcpu->hcpu_last = hcpu; 1550 } 1551 svm_exit_evt(cpudata, vmcb); 1552 1553 switch (vmcb->ctrl.exitcode) { 1554 case VMCB_EXITCODE_INTR: 1555 case VMCB_EXITCODE_NMI: 1556 exit->reason = NVMM_VCPU_EXIT_NONE; 1557 break; 1558 case VMCB_EXITCODE_VINTR: 1559 svm_event_waitexit_disable(vcpu, false); 1560 exit->reason = NVMM_VCPU_EXIT_INT_READY; 1561 break; 1562 case VMCB_EXITCODE_IRET: 1563 svm_event_waitexit_disable(vcpu, true); 1564 exit->reason = NVMM_VCPU_EXIT_NMI_READY; 1565 break; 1566 case VMCB_EXITCODE_CPUID: 1567 svm_exit_cpuid(mach, vcpu, exit); 1568 break; 1569 case VMCB_EXITCODE_HLT: 1570 svm_exit_hlt(mach, vcpu, exit); 1571 break; 1572 case VMCB_EXITCODE_IOIO: 1573 svm_exit_io(mach, vcpu, exit); 1574 break; 1575 case VMCB_EXITCODE_MSR: 1576 svm_exit_msr(mach, vcpu, exit); 1577 break; 1578 case VMCB_EXITCODE_SHUTDOWN: 1579 exit->reason = NVMM_VCPU_EXIT_SHUTDOWN; 1580 break; 1581 case VMCB_EXITCODE_RDPMC: 1582 case VMCB_EXITCODE_RSM: 1583 case VMCB_EXITCODE_INVLPGA: 1584 case VMCB_EXITCODE_VMRUN: 1585 case VMCB_EXITCODE_VMMCALL: 1586 case VMCB_EXITCODE_VMLOAD: 1587 case VMCB_EXITCODE_VMSAVE: 1588 case VMCB_EXITCODE_STGI: 1589 case VMCB_EXITCODE_CLGI: 1590 case VMCB_EXITCODE_SKINIT: 1591 case VMCB_EXITCODE_RDTSCP: 1592 case VMCB_EXITCODE_RDPRU: 1593 case VMCB_EXITCODE_INVLPGB: 1594 case VMCB_EXITCODE_INVPCID: 1595 case VMCB_EXITCODE_MCOMMIT: 1596 case VMCB_EXITCODE_TLBSYNC: 1597 svm_inject_ud(vcpu); 1598 exit->reason = NVMM_VCPU_EXIT_NONE; 1599 break; 1600 case VMCB_EXITCODE_MONITOR: 1601 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MONITOR); 1602 break; 1603 case VMCB_EXITCODE_MWAIT: 1604 case VMCB_EXITCODE_MWAIT_CONDITIONAL: 1605 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MWAIT); 1606 break; 1607 case VMCB_EXITCODE_XSETBV: 1608 svm_exit_xsetbv(mach, vcpu, exit); 1609 break; 1610 case VMCB_EXITCODE_NPF: 1611 svm_exit_npf(mach, vcpu, exit); 1612 break; 1613 case VMCB_EXITCODE_FERR_FREEZE: /* ? */ 1614 default: 1615 svm_exit_invalid(exit, vmcb->ctrl.exitcode); 1616 break; 1617 } 1618 1619 /* If no reason to return to userland, keep rolling. */ 1620 if (nvmm_return_needed()) { 1621 break; 1622 } 1623 if (exit->reason != NVMM_VCPU_EXIT_NONE) { 1624 break; 1625 } 1626 } 1627 1628 cpudata->gtsc = rdtsc() + vmcb->ctrl.tsc_offset; 1629 1630 svm_vcpu_guest_misc_leave(vcpu); 1631 svm_vcpu_guest_dbregs_leave(vcpu); 1632 1633 kpreempt_enable(); 1634 1635 exit->exitstate.rflags = vmcb->state.rflags; 1636 exit->exitstate.cr8 = __SHIFTOUT(vmcb->ctrl.v, VMCB_CTRL_V_TPR); 1637 exit->exitstate.int_shadow = 1638 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0); 1639 exit->exitstate.int_window_exiting = cpudata->int_window_exit; 1640 exit->exitstate.nmi_window_exiting = cpudata->nmi_window_exit; 1641 exit->exitstate.evt_pending = cpudata->evt_pending; 1642 1643 return 0; 1644 } 1645 1646 /* -------------------------------------------------------------------------- */ 1647 1648 static int 1649 svm_memalloc(paddr_t *pa, vaddr_t *va, size_t npages) 1650 { 1651 #ifdef __NetBSD__ 1652 struct pglist pglist; 1653 paddr_t _pa; 1654 vaddr_t _va; 1655 size_t i; 1656 int ret; 1657 1658 ret = uvm_pglistalloc(npages * PAGE_SIZE, 0, ~0UL, PAGE_SIZE, 0, 1659 &pglist, 1, 0); 1660 if (ret != 0) 1661 return ENOMEM; 1662 _pa = VM_PAGE_TO_PHYS(TAILQ_FIRST(&pglist)); 1663 _va = uvm_km_alloc(kernel_map, npages * PAGE_SIZE, 0, 1664 UVM_KMF_VAONLY | UVM_KMF_NOWAIT); 1665 if (_va == 0) 1666 goto error; 1667 1668 for (i = 0; i < npages; i++) { 1669 pmap_kenter_pa(_va + i * PAGE_SIZE, _pa + i * PAGE_SIZE, 1670 VM_PROT_READ | VM_PROT_WRITE, PMAP_WRITE_BACK); 1671 } 1672 pmap_update(pmap_kernel()); 1673 1674 memset((void *)_va, 0, npages * PAGE_SIZE); 1675 1676 *pa = _pa; 1677 *va = _va; 1678 return 0; 1679 1680 error: 1681 for (i = 0; i < npages; i++) { 1682 uvm_pagefree(PHYS_TO_VM_PAGE(_pa + i * PAGE_SIZE)); 1683 } 1684 return ENOMEM; 1685 1686 #else /* DragonFly */ 1687 void *addr; 1688 1689 addr = contigmalloc(npages * PAGE_SIZE, M_NVMM, M_WAITOK | M_ZERO, 1690 0, ~0UL, PAGE_SIZE, 0); 1691 if (addr == NULL) 1692 return ENOMEM; 1693 1694 *va = (vaddr_t)addr; 1695 *pa = vtophys(addr); 1696 return 0; 1697 #endif /* __NetBSD__ */ 1698 } 1699 1700 static void 1701 svm_memfree(paddr_t pa __unused, vaddr_t va, size_t npages) 1702 { 1703 #ifdef __NetBSD__ 1704 size_t i; 1705 1706 pmap_kremove(va, npages * PAGE_SIZE); 1707 pmap_update(pmap_kernel()); 1708 uvm_km_free(kernel_map, va, npages * PAGE_SIZE, UVM_KMF_VAONLY); 1709 for (i = 0; i < npages; i++) { 1710 uvm_pagefree(PHYS_TO_VM_PAGE(pa + i * PAGE_SIZE)); 1711 } 1712 #else /* DragonFly */ 1713 contigfree((void *)va, npages * PAGE_SIZE, M_NVMM); 1714 #endif /* __NetBSD__ */ 1715 } 1716 1717 /* -------------------------------------------------------------------------- */ 1718 1719 #define SVM_MSRBM_READ __BIT(0) 1720 #define SVM_MSRBM_WRITE __BIT(1) 1721 1722 static void 1723 svm_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 1724 { 1725 uint64_t byte; 1726 uint8_t bitoff; 1727 1728 if (msr < 0x00002000) { 1729 /* Range 1 */ 1730 byte = ((msr - 0x00000000) >> 2UL) + 0x0000; 1731 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 1732 /* Range 2 */ 1733 byte = ((msr - 0xC0000000) >> 2UL) + 0x0800; 1734 } else if (msr >= 0xC0010000 && msr < 0xC0012000) { 1735 /* Range 3 */ 1736 byte = ((msr - 0xC0010000) >> 2UL) + 0x1000; 1737 } else { 1738 panic("%s: wrong range", __func__); 1739 } 1740 1741 bitoff = (msr & 0x3) << 1; 1742 1743 if (read) { 1744 bitmap[byte] &= ~(SVM_MSRBM_READ << bitoff); 1745 } 1746 if (write) { 1747 bitmap[byte] &= ~(SVM_MSRBM_WRITE << bitoff); 1748 } 1749 } 1750 1751 #define SVM_SEG_ATTRIB_TYPE __BITS(3,0) 1752 #define SVM_SEG_ATTRIB_S __BIT(4) 1753 #define SVM_SEG_ATTRIB_DPL __BITS(6,5) 1754 #define SVM_SEG_ATTRIB_P __BIT(7) 1755 #define SVM_SEG_ATTRIB_AVL __BIT(8) 1756 #define SVM_SEG_ATTRIB_L __BIT(9) 1757 #define SVM_SEG_ATTRIB_DEF __BIT(10) 1758 #define SVM_SEG_ATTRIB_G __BIT(11) 1759 1760 static void 1761 svm_vcpu_setstate_seg(const struct nvmm_x64_state_seg *seg, 1762 struct vmcb_segment *vseg) 1763 { 1764 vseg->selector = seg->selector; 1765 vseg->attrib = 1766 __SHIFTIN(seg->attrib.type, SVM_SEG_ATTRIB_TYPE) | 1767 __SHIFTIN(seg->attrib.s, SVM_SEG_ATTRIB_S) | 1768 __SHIFTIN(seg->attrib.dpl, SVM_SEG_ATTRIB_DPL) | 1769 __SHIFTIN(seg->attrib.p, SVM_SEG_ATTRIB_P) | 1770 __SHIFTIN(seg->attrib.avl, SVM_SEG_ATTRIB_AVL) | 1771 __SHIFTIN(seg->attrib.l, SVM_SEG_ATTRIB_L) | 1772 __SHIFTIN(seg->attrib.def, SVM_SEG_ATTRIB_DEF) | 1773 __SHIFTIN(seg->attrib.g, SVM_SEG_ATTRIB_G); 1774 vseg->limit = seg->limit; 1775 vseg->base = seg->base; 1776 } 1777 1778 static void 1779 svm_vcpu_getstate_seg(struct nvmm_x64_state_seg *seg, struct vmcb_segment *vseg) 1780 { 1781 seg->selector = vseg->selector; 1782 seg->attrib.type = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_TYPE); 1783 seg->attrib.s = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_S); 1784 seg->attrib.dpl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DPL); 1785 seg->attrib.p = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_P); 1786 seg->attrib.avl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_AVL); 1787 seg->attrib.l = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_L); 1788 seg->attrib.def = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DEF); 1789 seg->attrib.g = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_G); 1790 seg->limit = vseg->limit; 1791 seg->base = vseg->base; 1792 } 1793 1794 static inline bool 1795 svm_state_tlb_flush(const struct vmcb *vmcb, const struct nvmm_x64_state *state, 1796 uint64_t flags) 1797 { 1798 if (flags & NVMM_X64_STATE_CRS) { 1799 if ((vmcb->state.cr0 ^ 1800 state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 1801 return true; 1802 } 1803 if (vmcb->state.cr3 != state->crs[NVMM_X64_CR_CR3]) { 1804 return true; 1805 } 1806 if ((vmcb->state.cr4 ^ 1807 state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 1808 return true; 1809 } 1810 } 1811 1812 if (flags & NVMM_X64_STATE_MSRS) { 1813 if ((vmcb->state.efer ^ 1814 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 1815 return true; 1816 } 1817 } 1818 1819 return false; 1820 } 1821 1822 static void 1823 svm_vcpu_setstate(struct nvmm_cpu *vcpu) 1824 { 1825 struct nvmm_comm_page *comm = vcpu->comm; 1826 const struct nvmm_x64_state *state = &comm->state; 1827 struct svm_cpudata *cpudata = vcpu->cpudata; 1828 struct vmcb *vmcb = cpudata->vmcb; 1829 struct savexmm64 *fpustate; 1830 uint64_t flags; 1831 1832 flags = comm->state_wanted; 1833 1834 if (svm_state_tlb_flush(vmcb, state, flags)) { 1835 cpudata->gtlb_want_flush = true; 1836 } 1837 1838 if (flags & NVMM_X64_STATE_SEGS) { 1839 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_CS], 1840 &vmcb->state.cs); 1841 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_DS], 1842 &vmcb->state.ds); 1843 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_ES], 1844 &vmcb->state.es); 1845 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_FS], 1846 &vmcb->state.fs); 1847 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GS], 1848 &vmcb->state.gs); 1849 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_SS], 1850 &vmcb->state.ss); 1851 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1852 &vmcb->state.gdt); 1853 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1854 &vmcb->state.idt); 1855 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1856 &vmcb->state.ldt); 1857 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_TR], 1858 &vmcb->state.tr); 1859 1860 vmcb->state.cpl = state->segs[NVMM_X64_SEG_SS].attrib.dpl; 1861 } 1862 1863 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1864 if (flags & NVMM_X64_STATE_GPRS) { 1865 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 1866 1867 vmcb->state.rip = state->gprs[NVMM_X64_GPR_RIP]; 1868 vmcb->state.rsp = state->gprs[NVMM_X64_GPR_RSP]; 1869 vmcb->state.rax = state->gprs[NVMM_X64_GPR_RAX]; 1870 vmcb->state.rflags = state->gprs[NVMM_X64_GPR_RFLAGS]; 1871 } 1872 1873 if (flags & NVMM_X64_STATE_CRS) { 1874 vmcb->state.cr0 = state->crs[NVMM_X64_CR_CR0]; 1875 vmcb->state.cr2 = state->crs[NVMM_X64_CR_CR2]; 1876 vmcb->state.cr3 = state->crs[NVMM_X64_CR_CR3]; 1877 vmcb->state.cr4 = state->crs[NVMM_X64_CR_CR4]; 1878 1879 vmcb->ctrl.v &= ~VMCB_CTRL_V_TPR; 1880 vmcb->ctrl.v |= __SHIFTIN(state->crs[NVMM_X64_CR_CR8], 1881 VMCB_CTRL_V_TPR); 1882 1883 if (svm_xcr0_mask != 0) { 1884 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 1885 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 1886 cpudata->gxcr0 &= svm_xcr0_mask; 1887 cpudata->gxcr0 |= XCR0_X87; 1888 } 1889 } 1890 1891 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1892 if (flags & NVMM_X64_STATE_DRS) { 1893 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 1894 1895 vmcb->state.dr6 = state->drs[NVMM_X64_DR_DR6]; 1896 vmcb->state.dr7 = state->drs[NVMM_X64_DR_DR7]; 1897 } 1898 1899 if (flags & NVMM_X64_STATE_MSRS) { 1900 /* 1901 * EFER_SVME is mandatory. 1902 */ 1903 vmcb->state.efer = state->msrs[NVMM_X64_MSR_EFER] | EFER_SVME; 1904 vmcb->state.star = state->msrs[NVMM_X64_MSR_STAR]; 1905 vmcb->state.lstar = state->msrs[NVMM_X64_MSR_LSTAR]; 1906 vmcb->state.cstar = state->msrs[NVMM_X64_MSR_CSTAR]; 1907 vmcb->state.sfmask = state->msrs[NVMM_X64_MSR_SFMASK]; 1908 vmcb->state.kernelgsbase = 1909 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 1910 vmcb->state.sysenter_cs = 1911 state->msrs[NVMM_X64_MSR_SYSENTER_CS]; 1912 vmcb->state.sysenter_esp = 1913 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; 1914 vmcb->state.sysenter_eip = 1915 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; 1916 vmcb->state.g_pat = state->msrs[NVMM_X64_MSR_PAT]; 1917 1918 cpudata->gtsc = state->msrs[NVMM_X64_MSR_TSC]; 1919 cpudata->gtsc_want_update = true; 1920 } 1921 1922 if (flags & NVMM_X64_STATE_INTR) { 1923 if (state->intr.int_shadow) { 1924 vmcb->ctrl.intr |= VMCB_CTRL_INTR_SHADOW; 1925 } else { 1926 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 1927 } 1928 1929 if (state->intr.int_window_exiting) { 1930 svm_event_waitexit_enable(vcpu, false); 1931 } else { 1932 svm_event_waitexit_disable(vcpu, false); 1933 } 1934 1935 if (state->intr.nmi_window_exiting) { 1936 svm_event_waitexit_enable(vcpu, true); 1937 } else { 1938 svm_event_waitexit_disable(vcpu, true); 1939 } 1940 } 1941 1942 CTASSERT(sizeof(cpudata->gfpu) == sizeof(state->fpu)); 1943 if (flags & NVMM_X64_STATE_FPU) { 1944 memcpy(&cpudata->gfpu, &state->fpu, sizeof(state->fpu)); 1945 1946 fpustate = &cpudata->gfpu.sv_xmm64; 1947 fpustate->sv_env.en_mxcsr_mask &= x86_fpu_mxcsr_mask; 1948 fpustate->sv_env.en_mxcsr &= fpustate->sv_env.en_mxcsr_mask; 1949 1950 #ifdef __NetBSD__ 1951 if (svm_xcr0_mask != 0) { 1952 /* Reset XSTATE_BV, to force a reload. */ 1953 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 1954 } 1955 #endif /* __NetBSD__ */ 1956 } 1957 1958 svm_vmcb_cache_update(vmcb, flags); 1959 1960 comm->state_wanted = 0; 1961 comm->state_cached |= flags; 1962 } 1963 1964 static void 1965 svm_vcpu_getstate(struct nvmm_cpu *vcpu) 1966 { 1967 struct nvmm_comm_page *comm = vcpu->comm; 1968 struct nvmm_x64_state *state = &comm->state; 1969 struct svm_cpudata *cpudata = vcpu->cpudata; 1970 struct vmcb *vmcb = cpudata->vmcb; 1971 uint64_t flags; 1972 1973 flags = comm->state_wanted; 1974 1975 if (flags & NVMM_X64_STATE_SEGS) { 1976 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS], 1977 &vmcb->state.cs); 1978 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_DS], 1979 &vmcb->state.ds); 1980 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_ES], 1981 &vmcb->state.es); 1982 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_FS], 1983 &vmcb->state.fs); 1984 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GS], 1985 &vmcb->state.gs); 1986 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_SS], 1987 &vmcb->state.ss); 1988 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1989 &vmcb->state.gdt); 1990 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1991 &vmcb->state.idt); 1992 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1993 &vmcb->state.ldt); 1994 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_TR], 1995 &vmcb->state.tr); 1996 1997 state->segs[NVMM_X64_SEG_SS].attrib.dpl = vmcb->state.cpl; 1998 } 1999 2000 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 2001 if (flags & NVMM_X64_STATE_GPRS) { 2002 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 2003 2004 state->gprs[NVMM_X64_GPR_RIP] = vmcb->state.rip; 2005 state->gprs[NVMM_X64_GPR_RSP] = vmcb->state.rsp; 2006 state->gprs[NVMM_X64_GPR_RAX] = vmcb->state.rax; 2007 state->gprs[NVMM_X64_GPR_RFLAGS] = vmcb->state.rflags; 2008 } 2009 2010 if (flags & NVMM_X64_STATE_CRS) { 2011 state->crs[NVMM_X64_CR_CR0] = vmcb->state.cr0; 2012 state->crs[NVMM_X64_CR_CR2] = vmcb->state.cr2; 2013 state->crs[NVMM_X64_CR_CR3] = vmcb->state.cr3; 2014 state->crs[NVMM_X64_CR_CR4] = vmcb->state.cr4; 2015 state->crs[NVMM_X64_CR_CR8] = __SHIFTOUT(vmcb->ctrl.v, 2016 VMCB_CTRL_V_TPR); 2017 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 2018 } 2019 2020 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 2021 if (flags & NVMM_X64_STATE_DRS) { 2022 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 2023 2024 state->drs[NVMM_X64_DR_DR6] = vmcb->state.dr6; 2025 state->drs[NVMM_X64_DR_DR7] = vmcb->state.dr7; 2026 } 2027 2028 if (flags & NVMM_X64_STATE_MSRS) { 2029 state->msrs[NVMM_X64_MSR_EFER] = vmcb->state.efer; 2030 state->msrs[NVMM_X64_MSR_STAR] = vmcb->state.star; 2031 state->msrs[NVMM_X64_MSR_LSTAR] = vmcb->state.lstar; 2032 state->msrs[NVMM_X64_MSR_CSTAR] = vmcb->state.cstar; 2033 state->msrs[NVMM_X64_MSR_SFMASK] = vmcb->state.sfmask; 2034 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 2035 vmcb->state.kernelgsbase; 2036 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 2037 vmcb->state.sysenter_cs; 2038 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 2039 vmcb->state.sysenter_esp; 2040 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 2041 vmcb->state.sysenter_eip; 2042 state->msrs[NVMM_X64_MSR_PAT] = vmcb->state.g_pat; 2043 state->msrs[NVMM_X64_MSR_TSC] = cpudata->gtsc; 2044 2045 /* Hide SVME. */ 2046 state->msrs[NVMM_X64_MSR_EFER] &= ~EFER_SVME; 2047 } 2048 2049 if (flags & NVMM_X64_STATE_INTR) { 2050 state->intr.int_shadow = 2051 (vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0; 2052 state->intr.int_window_exiting = cpudata->int_window_exit; 2053 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 2054 state->intr.evt_pending = cpudata->evt_pending; 2055 } 2056 2057 CTASSERT(sizeof(cpudata->gfpu) == sizeof(state->fpu)); 2058 if (flags & NVMM_X64_STATE_FPU) { 2059 memcpy(&state->fpu, &cpudata->gfpu, sizeof(state->fpu)); 2060 } 2061 2062 comm->state_wanted = 0; 2063 comm->state_cached |= flags; 2064 } 2065 2066 static void 2067 svm_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags) 2068 { 2069 vcpu->comm->state_wanted = flags; 2070 svm_vcpu_getstate(vcpu); 2071 } 2072 2073 static void 2074 svm_vcpu_state_commit(struct nvmm_cpu *vcpu) 2075 { 2076 vcpu->comm->state_wanted = vcpu->comm->state_commit; 2077 vcpu->comm->state_commit = 0; 2078 svm_vcpu_setstate(vcpu); 2079 } 2080 2081 /* -------------------------------------------------------------------------- */ 2082 2083 static void 2084 svm_asid_alloc(struct nvmm_cpu *vcpu) 2085 { 2086 struct svm_cpudata *cpudata = vcpu->cpudata; 2087 struct vmcb *vmcb = cpudata->vmcb; 2088 size_t i, oct, bit; 2089 2090 mutex_enter(&svm_asidlock); 2091 2092 for (i = 0; i < svm_maxasid; i++) { 2093 oct = i / 8; 2094 bit = i % 8; 2095 2096 if (svm_asidmap[oct] & __BIT(bit)) { 2097 continue; 2098 } 2099 2100 svm_asidmap[oct] |= __BIT(bit); 2101 vmcb->ctrl.guest_asid = i; 2102 mutex_exit(&svm_asidlock); 2103 return; 2104 } 2105 2106 /* 2107 * No free ASID. Use the last one, which is shared and requires 2108 * special TLB handling. 2109 */ 2110 cpudata->shared_asid = true; 2111 vmcb->ctrl.guest_asid = svm_maxasid - 1; 2112 mutex_exit(&svm_asidlock); 2113 } 2114 2115 static void 2116 svm_asid_free(struct nvmm_cpu *vcpu) 2117 { 2118 struct svm_cpudata *cpudata = vcpu->cpudata; 2119 struct vmcb *vmcb = cpudata->vmcb; 2120 size_t oct, bit; 2121 2122 if (cpudata->shared_asid) { 2123 return; 2124 } 2125 2126 oct = vmcb->ctrl.guest_asid / 8; 2127 bit = vmcb->ctrl.guest_asid % 8; 2128 2129 mutex_enter(&svm_asidlock); 2130 svm_asidmap[oct] &= ~__BIT(bit); 2131 mutex_exit(&svm_asidlock); 2132 } 2133 2134 static void 2135 svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2136 { 2137 struct svm_cpudata *cpudata = vcpu->cpudata; 2138 struct vmcb *vmcb = cpudata->vmcb; 2139 2140 /* Allow reads/writes of Control Registers. */ 2141 vmcb->ctrl.intercept_cr = 0; 2142 2143 /* Allow reads/writes of Debug Registers. */ 2144 vmcb->ctrl.intercept_dr = 0; 2145 2146 /* Allow exceptions 0 to 31. */ 2147 vmcb->ctrl.intercept_vec = 0; 2148 2149 /* 2150 * Allow: 2151 * - SMI [smm interrupts] 2152 * - VINTR [virtual interrupts] 2153 * - CR0_SPEC [CR0 writes changing other fields than CR0.TS or CR0.MP] 2154 * - RIDTR [reads of IDTR] 2155 * - RGDTR [reads of GDTR] 2156 * - RLDTR [reads of LDTR] 2157 * - RTR [reads of TR] 2158 * - WIDTR [writes of IDTR] 2159 * - WGDTR [writes of GDTR] 2160 * - WLDTR [writes of LDTR] 2161 * - WTR [writes of TR] 2162 * - RDTSC [rdtsc instruction] 2163 * - PUSHF [pushf instruction] 2164 * - POPF [popf instruction] 2165 * - IRET [iret instruction] 2166 * - INTN [int $n instructions] 2167 * - PAUSE [pause instruction] 2168 * - INVLPG [invplg instruction] 2169 * - TASKSW [task switches] 2170 * 2171 * Intercept the rest below. 2172 */ 2173 vmcb->ctrl.intercept_misc1 = 2174 VMCB_CTRL_INTERCEPT_INTR | 2175 VMCB_CTRL_INTERCEPT_NMI | 2176 VMCB_CTRL_INTERCEPT_INIT | 2177 VMCB_CTRL_INTERCEPT_RDPMC | 2178 VMCB_CTRL_INTERCEPT_CPUID | 2179 VMCB_CTRL_INTERCEPT_RSM | 2180 VMCB_CTRL_INTERCEPT_INVD | 2181 VMCB_CTRL_INTERCEPT_HLT | 2182 VMCB_CTRL_INTERCEPT_INVLPGA | 2183 VMCB_CTRL_INTERCEPT_IOIO_PROT | 2184 VMCB_CTRL_INTERCEPT_MSR_PROT | 2185 VMCB_CTRL_INTERCEPT_FERR_FREEZE | 2186 VMCB_CTRL_INTERCEPT_SHUTDOWN; 2187 2188 /* 2189 * Allow: 2190 * - ICEBP [icebp instruction] 2191 * - WBINVD [wbinvd instruction] 2192 * - WCR_SPEC(0..15) [writes of CR0-15, received after instruction] 2193 * 2194 * Intercept the rest below. 2195 */ 2196 vmcb->ctrl.intercept_misc2 = 2197 VMCB_CTRL_INTERCEPT_VMRUN | 2198 VMCB_CTRL_INTERCEPT_VMMCALL | 2199 VMCB_CTRL_INTERCEPT_VMLOAD | 2200 VMCB_CTRL_INTERCEPT_VMSAVE | 2201 VMCB_CTRL_INTERCEPT_STGI | 2202 VMCB_CTRL_INTERCEPT_CLGI | 2203 VMCB_CTRL_INTERCEPT_SKINIT | 2204 VMCB_CTRL_INTERCEPT_RDTSCP | 2205 VMCB_CTRL_INTERCEPT_MONITOR | 2206 VMCB_CTRL_INTERCEPT_MWAIT | 2207 VMCB_CTRL_INTERCEPT_XSETBV | 2208 VMCB_CTRL_INTERCEPT_RDPRU; 2209 2210 /* 2211 * Intercept everything. 2212 */ 2213 vmcb->ctrl.intercept_misc3 = 2214 VMCB_CTRL_INTERCEPT_INVLPGB_ALL | 2215 VMCB_CTRL_INTERCEPT_PCID | 2216 VMCB_CTRL_INTERCEPT_MCOMMIT | 2217 VMCB_CTRL_INTERCEPT_TLBSYNC; 2218 2219 /* Intercept all I/O accesses. */ 2220 memset(cpudata->iobm, 0xFF, IOBM_SIZE); 2221 vmcb->ctrl.iopm_base_pa = cpudata->iobm_pa; 2222 2223 /* Allow direct access to certain MSRs. */ 2224 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 2225 svm_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 2226 svm_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 2227 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 2228 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 2229 svm_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 2230 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 2231 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 2232 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 2233 svm_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 2234 svm_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 2235 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CR_PAT, true, true); 2236 svm_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 2237 vmcb->ctrl.msrpm_base_pa = cpudata->msrbm_pa; 2238 2239 /* Generate ASID. */ 2240 svm_asid_alloc(vcpu); 2241 2242 /* Virtual TPR. */ 2243 vmcb->ctrl.v = VMCB_CTRL_V_INTR_MASKING; 2244 2245 /* Enable Nested Paging. */ 2246 vmcb->ctrl.enable1 = VMCB_CTRL_ENABLE_NP; 2247 vmcb->ctrl.n_cr3 = vtophys(vmspace_pmap(mach->vm)->pm_pml4); 2248 2249 #ifdef __NetBSD__ 2250 /* Init XSAVE header. */ 2251 cpudata->gfpu.xsh_xstate_bv = svm_xcr0_mask; 2252 cpudata->gfpu.xsh_xcomp_bv = 0; 2253 #endif /* __NetBSD__ */ 2254 2255 /* These MSRs are static. */ 2256 cpudata->star = rdmsr(MSR_STAR); 2257 cpudata->lstar = rdmsr(MSR_LSTAR); 2258 cpudata->cstar = rdmsr(MSR_CSTAR); 2259 cpudata->sfmask = rdmsr(MSR_SFMASK); 2260 2261 /* Install the RESET state. */ 2262 memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, 2263 sizeof(nvmm_x86_reset_state)); 2264 vcpu->comm->state_wanted = NVMM_X64_STATE_ALL; 2265 vcpu->comm->state_cached = 0; 2266 svm_vcpu_setstate(vcpu); 2267 } 2268 2269 static int 2270 svm_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2271 { 2272 struct svm_cpudata *cpudata; 2273 int error; 2274 2275 /* Allocate the SVM cpudata. */ 2276 cpudata = (struct svm_cpudata *)uvm_km_alloc(kernel_map, 2277 roundup(sizeof(*cpudata), PAGE_SIZE), 0, 2278 UVM_KMF_WIRED|UVM_KMF_ZERO); 2279 if (cpudata == NULL) 2280 return ENOMEM; 2281 2282 vcpu->cpudata = cpudata; 2283 2284 /* VMCB */ 2285 error = svm_memalloc(&cpudata->vmcb_pa, (vaddr_t *)&cpudata->vmcb, 2286 VMCB_NPAGES); 2287 if (error) 2288 goto error; 2289 2290 /* I/O Bitmap */ 2291 error = svm_memalloc(&cpudata->iobm_pa, (vaddr_t *)&cpudata->iobm, 2292 IOBM_NPAGES); 2293 if (error) 2294 goto error; 2295 2296 /* MSR Bitmap */ 2297 error = svm_memalloc(&cpudata->msrbm_pa, (vaddr_t *)&cpudata->msrbm, 2298 MSRBM_NPAGES); 2299 if (error) 2300 goto error; 2301 2302 /* Init the VCPU info. */ 2303 svm_vcpu_init(mach, vcpu); 2304 2305 return 0; 2306 2307 error: 2308 if (cpudata->vmcb_pa) { 2309 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2310 VMCB_NPAGES); 2311 } 2312 if (cpudata->iobm_pa) { 2313 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2314 IOBM_NPAGES); 2315 } 2316 if (cpudata->msrbm_pa) { 2317 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2318 MSRBM_NPAGES); 2319 } 2320 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2321 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2322 return error; 2323 } 2324 2325 static void 2326 svm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2327 { 2328 struct svm_cpudata *cpudata = vcpu->cpudata; 2329 2330 svm_asid_free(vcpu); 2331 2332 svm_memfree(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, VMCB_NPAGES); 2333 svm_memfree(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, IOBM_NPAGES); 2334 svm_memfree(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, MSRBM_NPAGES); 2335 2336 uvm_km_free(kernel_map, (vaddr_t)cpudata, 2337 roundup(sizeof(*cpudata), PAGE_SIZE), UVM_KMF_WIRED); 2338 } 2339 2340 /* -------------------------------------------------------------------------- */ 2341 2342 static int 2343 svm_vcpu_configure_cpuid(struct svm_cpudata *cpudata, void *data) 2344 { 2345 struct nvmm_vcpu_conf_cpuid *cpuid = data; 2346 size_t i; 2347 2348 if (__predict_false(cpuid->mask && cpuid->exit)) { 2349 return EINVAL; 2350 } 2351 if (__predict_false(cpuid->mask && 2352 ((cpuid->u.mask.set.eax & cpuid->u.mask.del.eax) || 2353 (cpuid->u.mask.set.ebx & cpuid->u.mask.del.ebx) || 2354 (cpuid->u.mask.set.ecx & cpuid->u.mask.del.ecx) || 2355 (cpuid->u.mask.set.edx & cpuid->u.mask.del.edx)))) { 2356 return EINVAL; 2357 } 2358 2359 /* If unset, delete, to restore the default behavior. */ 2360 if (!cpuid->mask && !cpuid->exit) { 2361 for (i = 0; i < SVM_NCPUIDS; i++) { 2362 if (!cpudata->cpuidpresent[i]) { 2363 continue; 2364 } 2365 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2366 cpudata->cpuidpresent[i] = false; 2367 } 2368 } 2369 return 0; 2370 } 2371 2372 /* If already here, replace. */ 2373 for (i = 0; i < SVM_NCPUIDS; i++) { 2374 if (!cpudata->cpuidpresent[i]) { 2375 continue; 2376 } 2377 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2378 memcpy(&cpudata->cpuid[i], cpuid, 2379 sizeof(struct nvmm_vcpu_conf_cpuid)); 2380 return 0; 2381 } 2382 } 2383 2384 /* Not here, insert. */ 2385 for (i = 0; i < SVM_NCPUIDS; i++) { 2386 if (!cpudata->cpuidpresent[i]) { 2387 cpudata->cpuidpresent[i] = true; 2388 memcpy(&cpudata->cpuid[i], cpuid, 2389 sizeof(struct nvmm_vcpu_conf_cpuid)); 2390 return 0; 2391 } 2392 } 2393 2394 return ENOBUFS; 2395 } 2396 2397 static int 2398 svm_vcpu_configure(struct nvmm_cpu *vcpu, uint64_t op, void *data) 2399 { 2400 struct svm_cpudata *cpudata = vcpu->cpudata; 2401 2402 switch (op) { 2403 case NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID): 2404 return svm_vcpu_configure_cpuid(cpudata, data); 2405 default: 2406 return EINVAL; 2407 } 2408 } 2409 2410 /* -------------------------------------------------------------------------- */ 2411 2412 static void 2413 svm_tlb_flush(struct pmap *pm) 2414 { 2415 struct nvmm_machine *mach = pm->pm_data; 2416 struct svm_machdata *machdata = mach->machdata; 2417 2418 atomic_inc_64(&machdata->mach_htlb_gen); 2419 2420 /* Generates IPIs, which cause #VMEXITs. */ 2421 pmap_tlb_shootdown(pmap_kernel(), -1, PTE_G, TLBSHOOT_UPDATE); 2422 } 2423 2424 static void 2425 svm_machine_create(struct nvmm_machine *mach) 2426 { 2427 struct pmap *pmap = vmspace_pmap(mach->vm); 2428 struct svm_machdata *machdata; 2429 2430 /* Fill in pmap info. */ 2431 pmap->pm_data = (void *)mach; 2432 pmap->pm_tlb_flush = svm_tlb_flush; 2433 2434 machdata = kmem_zalloc(sizeof(struct svm_machdata), KM_SLEEP); 2435 mach->machdata = machdata; 2436 2437 /* Start with an hTLB flush everywhere. */ 2438 machdata->mach_htlb_gen = 1; 2439 } 2440 2441 static void 2442 svm_machine_destroy(struct nvmm_machine *mach) 2443 { 2444 kmem_free(mach->machdata, sizeof(struct svm_machdata)); 2445 } 2446 2447 static int 2448 svm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 2449 { 2450 panic("%s: impossible", __func__); 2451 } 2452 2453 /* -------------------------------------------------------------------------- */ 2454 2455 static bool 2456 svm_ident(void) 2457 { 2458 u_int descs[4]; 2459 uint64_t msr; 2460 2461 if (cpu_vendor_id != CPU_VENDOR_AMD) { 2462 return false; 2463 } 2464 if (!(amd_feature2 & CPUID_SVM)) { 2465 printf("NVMM: SVM not supported\n"); 2466 return false; 2467 } 2468 2469 if (cpu_exthigh < 0x8000000a) { 2470 printf("NVMM: CPUID leaf not available\n"); 2471 return false; 2472 } 2473 x86_cpuid(0x8000000a, descs); 2474 2475 /* Expect revision 1. */ 2476 if (__SHIFTOUT(descs[0], CPUID_AMD_SVM_REV) != 1) { 2477 printf("NVMM: SVM revision not supported\n"); 2478 return false; 2479 } 2480 2481 /* Want Nested Paging. */ 2482 if (!(descs[3] & CPUID_AMD_SVM_NP)) { 2483 printf("NVMM: SVM-NP not supported\n"); 2484 return false; 2485 } 2486 2487 /* Want nRIP. */ 2488 if (!(descs[3] & CPUID_AMD_SVM_NRIPS)) { 2489 printf("NVMM: SVM-NRIPS not supported\n"); 2490 return false; 2491 } 2492 2493 svm_decode_assist = (descs[3] & CPUID_AMD_SVM_DecodeAssist) != 0; 2494 2495 msr = rdmsr(MSR_VMCR); 2496 if ((msr & VMCR_SVMED) && (msr & VMCR_LOCK)) { 2497 printf("NVMM: SVM disabled in BIOS\n"); 2498 return false; 2499 } 2500 2501 return true; 2502 } 2503 2504 static void 2505 svm_init_asid(uint32_t maxasid) 2506 { 2507 size_t i, j, allocsz; 2508 2509 mutex_init(&svm_asidlock, MUTEX_DEFAULT, IPL_NONE); 2510 2511 /* Arbitrarily limit. */ 2512 maxasid = uimin(maxasid, 8192); 2513 2514 svm_maxasid = maxasid; 2515 allocsz = roundup(maxasid, 8) / 8; 2516 svm_asidmap = kmem_zalloc(allocsz, KM_SLEEP); 2517 2518 /* ASID 0 is reserved for the host. */ 2519 svm_asidmap[0] |= __BIT(0); 2520 2521 /* ASID n-1 is special, we share it. */ 2522 i = (maxasid - 1) / 8; 2523 j = (maxasid - 1) % 8; 2524 svm_asidmap[i] |= __BIT(j); 2525 } 2526 2527 static void 2528 svm_change_cpu(void *arg1) 2529 { 2530 bool enable = arg1 != NULL; 2531 uint64_t msr; 2532 2533 msr = rdmsr(MSR_VMCR); 2534 if (msr & VMCR_SVMED) { 2535 wrmsr(MSR_VMCR, msr & ~VMCR_SVMED); 2536 } 2537 2538 if (!enable) { 2539 wrmsr(MSR_VM_HSAVE_PA, 0); 2540 } 2541 2542 msr = rdmsr(MSR_EFER); 2543 if (enable) { 2544 msr |= EFER_SVME; 2545 } else { 2546 msr &= ~EFER_SVME; 2547 } 2548 wrmsr(MSR_EFER, msr); 2549 2550 if (enable) { 2551 wrmsr(MSR_VM_HSAVE_PA, hsave[mycpuid].pa); 2552 } 2553 } 2554 2555 static void 2556 svm_init(void) 2557 { 2558 struct vm_page *pg; 2559 u_int descs[4]; 2560 int i; 2561 2562 x86_cpuid(0x8000000a, descs); 2563 2564 /* The guest TLB flush command. */ 2565 if (descs[3] & CPUID_AMD_SVM_FlushByASID) { 2566 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_GUEST; 2567 } else { 2568 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_ALL; 2569 } 2570 2571 /* Init the ASID. */ 2572 svm_init_asid(descs[1]); 2573 2574 /* Init the XCR0 mask. */ 2575 svm_xcr0_mask = SVM_XCR0_MASK_DEFAULT & x86_xsave_features; 2576 2577 /* Init the max basic CPUID leaf. */ 2578 svm_cpuid_max_basic = uimin(cpuid_level, SVM_CPUID_MAX_BASIC); 2579 2580 /* Init the max extended CPUID leaf. */ 2581 x86_cpuid(0x80000000, descs); 2582 svm_cpuid_max_extended = uimin(descs[0], SVM_CPUID_MAX_EXTENDED); 2583 2584 memset(hsave, 0, sizeof(hsave)); 2585 for (i = 0; i < ncpus; i++) { 2586 pg = uvm_pagealloc(NULL, 0, NULL, UVM_PGA_ZERO); 2587 hsave[i].pa = VM_PAGE_TO_PHYS(pg); 2588 } 2589 2590 #ifdef __NetBSD__ 2591 uint64_t xc; 2592 xc = xc_broadcast(0, svm_change_cpu, (void *)true, NULL); 2593 xc_wait(xc); 2594 #endif /* __NetBSD__ */ 2595 2596 lwkt_send_ipiq_mask(smp_active_mask, svm_change_cpu, (void *)true); 2597 /* XXX: need any cpu fence ?? */ 2598 } 2599 2600 static void 2601 svm_fini_asid(void) 2602 { 2603 size_t allocsz; 2604 2605 allocsz = roundup(svm_maxasid, 8) / 8; 2606 kmem_free(svm_asidmap, allocsz); 2607 2608 mutex_destroy(&svm_asidlock); 2609 } 2610 2611 static void 2612 svm_fini(void) 2613 { 2614 size_t i; 2615 2616 #ifdef __NetBSD__ 2617 uint64_t xc; 2618 xc = xc_broadcast(0, svm_change_cpu, (void *)false, NULL); 2619 xc_wait(xc); 2620 #endif /* __NetBSD__ */ 2621 2622 lwkt_send_ipiq_mask(smp_active_mask, svm_change_cpu, (void *)false); 2623 /* XXX: need any cpu fence ?? */ 2624 2625 for (i = 0; i < MAXCPUS; i++) { 2626 if (hsave[i].pa != 0) 2627 uvm_pagefree(PHYS_TO_VM_PAGE(hsave[i].pa)); 2628 } 2629 2630 svm_fini_asid(); 2631 } 2632 2633 static void 2634 svm_capability(struct nvmm_capability *cap) 2635 { 2636 cap->arch.mach_conf_support = 0; 2637 cap->arch.vcpu_conf_support = 2638 NVMM_CAP_ARCH_VCPU_CONF_CPUID; 2639 cap->arch.xcr0_mask = svm_xcr0_mask; 2640 cap->arch.mxcsr_mask = x86_fpu_mxcsr_mask; 2641 cap->arch.conf_cpuid_maxops = SVM_NCPUIDS; 2642 } 2643 2644 const struct nvmm_impl nvmm_x86_svm = { 2645 .name = "x86-svm", 2646 .ident = svm_ident, 2647 .init = svm_init, 2648 .fini = svm_fini, 2649 .capability = svm_capability, 2650 .mach_conf_max = NVMM_X86_MACH_NCONF, 2651 .mach_conf_sizes = NULL, 2652 .vcpu_conf_max = NVMM_X86_VCPU_NCONF, 2653 .vcpu_conf_sizes = svm_vcpu_conf_sizes, 2654 .state_size = sizeof(struct nvmm_x64_state), 2655 .machine_create = svm_machine_create, 2656 .machine_destroy = svm_machine_destroy, 2657 .machine_configure = svm_machine_configure, 2658 .vcpu_create = svm_vcpu_create, 2659 .vcpu_destroy = svm_vcpu_destroy, 2660 .vcpu_configure = svm_vcpu_configure, 2661 .vcpu_setstate = svm_vcpu_setstate, 2662 .vcpu_getstate = svm_vcpu_getstate, 2663 .vcpu_inject = svm_vcpu_inject, 2664 .vcpu_run = svm_vcpu_run 2665 }; 2666