1 /* 2 * Copyright (c) 2018-2021 Maxime Villard, m00nbsd.net 3 * All rights reserved. 4 * 5 * This code is part of the NVMM hypervisor. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 17 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 19 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 22 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/systm.h> 31 #include <sys/kernel.h> 32 #include <sys/mman.h> 33 34 #include "../nvmm.h" 35 #include "../nvmm_internal.h" 36 #include "nvmm_x86.h" 37 38 void svm_vmrun(paddr_t, uint64_t *); 39 40 static inline void 41 svm_clgi(void) 42 { 43 __asm volatile ("clgi" ::: "memory"); 44 } 45 46 static inline void 47 svm_stgi(void) 48 { 49 __asm volatile ("stgi" ::: "memory"); 50 } 51 52 #define MSR_VM_HSAVE_PA 0xC0010117 53 54 #define MSR_VM_CR 0xc0010114 /* Virtual Machine Control Register */ 55 #define VM_CR_DPD 0x00000001 /* Debug port disable */ 56 #define VM_CR_RINIT 0x00000002 /* Intercept init */ 57 #define VM_CR_DISA20 0x00000004 /* Disable A20 masking */ 58 #define VM_CR_LOCK 0x00000008 /* SVM Lock */ 59 #define VM_CR_SVMED 0x00000010 /* SVME Disable */ 60 61 /* -------------------------------------------------------------------------- */ 62 63 #define VMCB_EXITCODE_CR0_READ 0x0000 64 #define VMCB_EXITCODE_CR1_READ 0x0001 65 #define VMCB_EXITCODE_CR2_READ 0x0002 66 #define VMCB_EXITCODE_CR3_READ 0x0003 67 #define VMCB_EXITCODE_CR4_READ 0x0004 68 #define VMCB_EXITCODE_CR5_READ 0x0005 69 #define VMCB_EXITCODE_CR6_READ 0x0006 70 #define VMCB_EXITCODE_CR7_READ 0x0007 71 #define VMCB_EXITCODE_CR8_READ 0x0008 72 #define VMCB_EXITCODE_CR9_READ 0x0009 73 #define VMCB_EXITCODE_CR10_READ 0x000A 74 #define VMCB_EXITCODE_CR11_READ 0x000B 75 #define VMCB_EXITCODE_CR12_READ 0x000C 76 #define VMCB_EXITCODE_CR13_READ 0x000D 77 #define VMCB_EXITCODE_CR14_READ 0x000E 78 #define VMCB_EXITCODE_CR15_READ 0x000F 79 #define VMCB_EXITCODE_CR0_WRITE 0x0010 80 #define VMCB_EXITCODE_CR1_WRITE 0x0011 81 #define VMCB_EXITCODE_CR2_WRITE 0x0012 82 #define VMCB_EXITCODE_CR3_WRITE 0x0013 83 #define VMCB_EXITCODE_CR4_WRITE 0x0014 84 #define VMCB_EXITCODE_CR5_WRITE 0x0015 85 #define VMCB_EXITCODE_CR6_WRITE 0x0016 86 #define VMCB_EXITCODE_CR7_WRITE 0x0017 87 #define VMCB_EXITCODE_CR8_WRITE 0x0018 88 #define VMCB_EXITCODE_CR9_WRITE 0x0019 89 #define VMCB_EXITCODE_CR10_WRITE 0x001A 90 #define VMCB_EXITCODE_CR11_WRITE 0x001B 91 #define VMCB_EXITCODE_CR12_WRITE 0x001C 92 #define VMCB_EXITCODE_CR13_WRITE 0x001D 93 #define VMCB_EXITCODE_CR14_WRITE 0x001E 94 #define VMCB_EXITCODE_CR15_WRITE 0x001F 95 #define VMCB_EXITCODE_DR0_READ 0x0020 96 #define VMCB_EXITCODE_DR1_READ 0x0021 97 #define VMCB_EXITCODE_DR2_READ 0x0022 98 #define VMCB_EXITCODE_DR3_READ 0x0023 99 #define VMCB_EXITCODE_DR4_READ 0x0024 100 #define VMCB_EXITCODE_DR5_READ 0x0025 101 #define VMCB_EXITCODE_DR6_READ 0x0026 102 #define VMCB_EXITCODE_DR7_READ 0x0027 103 #define VMCB_EXITCODE_DR8_READ 0x0028 104 #define VMCB_EXITCODE_DR9_READ 0x0029 105 #define VMCB_EXITCODE_DR10_READ 0x002A 106 #define VMCB_EXITCODE_DR11_READ 0x002B 107 #define VMCB_EXITCODE_DR12_READ 0x002C 108 #define VMCB_EXITCODE_DR13_READ 0x002D 109 #define VMCB_EXITCODE_DR14_READ 0x002E 110 #define VMCB_EXITCODE_DR15_READ 0x002F 111 #define VMCB_EXITCODE_DR0_WRITE 0x0030 112 #define VMCB_EXITCODE_DR1_WRITE 0x0031 113 #define VMCB_EXITCODE_DR2_WRITE 0x0032 114 #define VMCB_EXITCODE_DR3_WRITE 0x0033 115 #define VMCB_EXITCODE_DR4_WRITE 0x0034 116 #define VMCB_EXITCODE_DR5_WRITE 0x0035 117 #define VMCB_EXITCODE_DR6_WRITE 0x0036 118 #define VMCB_EXITCODE_DR7_WRITE 0x0037 119 #define VMCB_EXITCODE_DR8_WRITE 0x0038 120 #define VMCB_EXITCODE_DR9_WRITE 0x0039 121 #define VMCB_EXITCODE_DR10_WRITE 0x003A 122 #define VMCB_EXITCODE_DR11_WRITE 0x003B 123 #define VMCB_EXITCODE_DR12_WRITE 0x003C 124 #define VMCB_EXITCODE_DR13_WRITE 0x003D 125 #define VMCB_EXITCODE_DR14_WRITE 0x003E 126 #define VMCB_EXITCODE_DR15_WRITE 0x003F 127 #define VMCB_EXITCODE_EXCP0 0x0040 128 #define VMCB_EXITCODE_EXCP1 0x0041 129 #define VMCB_EXITCODE_EXCP2 0x0042 130 #define VMCB_EXITCODE_EXCP3 0x0043 131 #define VMCB_EXITCODE_EXCP4 0x0044 132 #define VMCB_EXITCODE_EXCP5 0x0045 133 #define VMCB_EXITCODE_EXCP6 0x0046 134 #define VMCB_EXITCODE_EXCP7 0x0047 135 #define VMCB_EXITCODE_EXCP8 0x0048 136 #define VMCB_EXITCODE_EXCP9 0x0049 137 #define VMCB_EXITCODE_EXCP10 0x004A 138 #define VMCB_EXITCODE_EXCP11 0x004B 139 #define VMCB_EXITCODE_EXCP12 0x004C 140 #define VMCB_EXITCODE_EXCP13 0x004D 141 #define VMCB_EXITCODE_EXCP14 0x004E 142 #define VMCB_EXITCODE_EXCP15 0x004F 143 #define VMCB_EXITCODE_EXCP16 0x0050 144 #define VMCB_EXITCODE_EXCP17 0x0051 145 #define VMCB_EXITCODE_EXCP18 0x0052 146 #define VMCB_EXITCODE_EXCP19 0x0053 147 #define VMCB_EXITCODE_EXCP20 0x0054 148 #define VMCB_EXITCODE_EXCP21 0x0055 149 #define VMCB_EXITCODE_EXCP22 0x0056 150 #define VMCB_EXITCODE_EXCP23 0x0057 151 #define VMCB_EXITCODE_EXCP24 0x0058 152 #define VMCB_EXITCODE_EXCP25 0x0059 153 #define VMCB_EXITCODE_EXCP26 0x005A 154 #define VMCB_EXITCODE_EXCP27 0x005B 155 #define VMCB_EXITCODE_EXCP28 0x005C 156 #define VMCB_EXITCODE_EXCP29 0x005D 157 #define VMCB_EXITCODE_EXCP30 0x005E 158 #define VMCB_EXITCODE_EXCP31 0x005F 159 #define VMCB_EXITCODE_INTR 0x0060 160 #define VMCB_EXITCODE_NMI 0x0061 161 #define VMCB_EXITCODE_SMI 0x0062 162 #define VMCB_EXITCODE_INIT 0x0063 163 #define VMCB_EXITCODE_VINTR 0x0064 164 #define VMCB_EXITCODE_CR0_SEL_WRITE 0x0065 165 #define VMCB_EXITCODE_IDTR_READ 0x0066 166 #define VMCB_EXITCODE_GDTR_READ 0x0067 167 #define VMCB_EXITCODE_LDTR_READ 0x0068 168 #define VMCB_EXITCODE_TR_READ 0x0069 169 #define VMCB_EXITCODE_IDTR_WRITE 0x006A 170 #define VMCB_EXITCODE_GDTR_WRITE 0x006B 171 #define VMCB_EXITCODE_LDTR_WRITE 0x006C 172 #define VMCB_EXITCODE_TR_WRITE 0x006D 173 #define VMCB_EXITCODE_RDTSC 0x006E 174 #define VMCB_EXITCODE_RDPMC 0x006F 175 #define VMCB_EXITCODE_PUSHF 0x0070 176 #define VMCB_EXITCODE_POPF 0x0071 177 #define VMCB_EXITCODE_CPUID 0x0072 178 #define VMCB_EXITCODE_RSM 0x0073 179 #define VMCB_EXITCODE_IRET 0x0074 180 #define VMCB_EXITCODE_SWINT 0x0075 181 #define VMCB_EXITCODE_INVD 0x0076 182 #define VMCB_EXITCODE_PAUSE 0x0077 183 #define VMCB_EXITCODE_HLT 0x0078 184 #define VMCB_EXITCODE_INVLPG 0x0079 185 #define VMCB_EXITCODE_INVLPGA 0x007A 186 #define VMCB_EXITCODE_IOIO 0x007B 187 #define VMCB_EXITCODE_MSR 0x007C 188 #define VMCB_EXITCODE_TASK_SWITCH 0x007D 189 #define VMCB_EXITCODE_FERR_FREEZE 0x007E 190 #define VMCB_EXITCODE_SHUTDOWN 0x007F 191 #define VMCB_EXITCODE_VMRUN 0x0080 192 #define VMCB_EXITCODE_VMMCALL 0x0081 193 #define VMCB_EXITCODE_VMLOAD 0x0082 194 #define VMCB_EXITCODE_VMSAVE 0x0083 195 #define VMCB_EXITCODE_STGI 0x0084 196 #define VMCB_EXITCODE_CLGI 0x0085 197 #define VMCB_EXITCODE_SKINIT 0x0086 198 #define VMCB_EXITCODE_RDTSCP 0x0087 199 #define VMCB_EXITCODE_ICEBP 0x0088 200 #define VMCB_EXITCODE_WBINVD 0x0089 201 #define VMCB_EXITCODE_MONITOR 0x008A 202 #define VMCB_EXITCODE_MWAIT 0x008B 203 #define VMCB_EXITCODE_MWAIT_CONDITIONAL 0x008C 204 #define VMCB_EXITCODE_XSETBV 0x008D 205 #define VMCB_EXITCODE_RDPRU 0x008E 206 #define VMCB_EXITCODE_EFER_WRITE_TRAP 0x008F 207 #define VMCB_EXITCODE_CR0_WRITE_TRAP 0x0090 208 #define VMCB_EXITCODE_CR1_WRITE_TRAP 0x0091 209 #define VMCB_EXITCODE_CR2_WRITE_TRAP 0x0092 210 #define VMCB_EXITCODE_CR3_WRITE_TRAP 0x0093 211 #define VMCB_EXITCODE_CR4_WRITE_TRAP 0x0094 212 #define VMCB_EXITCODE_CR5_WRITE_TRAP 0x0095 213 #define VMCB_EXITCODE_CR6_WRITE_TRAP 0x0096 214 #define VMCB_EXITCODE_CR7_WRITE_TRAP 0x0097 215 #define VMCB_EXITCODE_CR8_WRITE_TRAP 0x0098 216 #define VMCB_EXITCODE_CR9_WRITE_TRAP 0x0099 217 #define VMCB_EXITCODE_CR10_WRITE_TRAP 0x009A 218 #define VMCB_EXITCODE_CR11_WRITE_TRAP 0x009B 219 #define VMCB_EXITCODE_CR12_WRITE_TRAP 0x009C 220 #define VMCB_EXITCODE_CR13_WRITE_TRAP 0x009D 221 #define VMCB_EXITCODE_CR14_WRITE_TRAP 0x009E 222 #define VMCB_EXITCODE_CR15_WRITE_TRAP 0x009F 223 #define VMCB_EXITCODE_INVLPGB 0x00A0 224 #define VMCB_EXITCODE_INVLPGB_ILLEGAL 0x00A1 225 #define VMCB_EXITCODE_INVPCID 0x00A2 226 #define VMCB_EXITCODE_MCOMMIT 0x00A3 227 #define VMCB_EXITCODE_TLBSYNC 0x00A4 228 #define VMCB_EXITCODE_NPF 0x0400 229 #define VMCB_EXITCODE_AVIC_INCOMP_IPI 0x0401 230 #define VMCB_EXITCODE_AVIC_NOACCEL 0x0402 231 #define VMCB_EXITCODE_VMGEXIT 0x0403 232 #define VMCB_EXITCODE_BUSY -2ULL 233 #define VMCB_EXITCODE_INVALID -1ULL 234 235 /* -------------------------------------------------------------------------- */ 236 237 struct vmcb_ctrl { 238 uint32_t intercept_cr; 239 #define VMCB_CTRL_INTERCEPT_RCR(x) __BIT( 0 + x) 240 #define VMCB_CTRL_INTERCEPT_WCR(x) __BIT(16 + x) 241 242 uint32_t intercept_dr; 243 #define VMCB_CTRL_INTERCEPT_RDR(x) __BIT( 0 + x) 244 #define VMCB_CTRL_INTERCEPT_WDR(x) __BIT(16 + x) 245 246 uint32_t intercept_vec; 247 #define VMCB_CTRL_INTERCEPT_VEC(x) __BIT(x) 248 249 uint32_t intercept_misc1; 250 #define VMCB_CTRL_INTERCEPT_INTR __BIT(0) 251 #define VMCB_CTRL_INTERCEPT_NMI __BIT(1) 252 #define VMCB_CTRL_INTERCEPT_SMI __BIT(2) 253 #define VMCB_CTRL_INTERCEPT_INIT __BIT(3) 254 #define VMCB_CTRL_INTERCEPT_VINTR __BIT(4) 255 #define VMCB_CTRL_INTERCEPT_CR0_SPEC __BIT(5) 256 #define VMCB_CTRL_INTERCEPT_RIDTR __BIT(6) 257 #define VMCB_CTRL_INTERCEPT_RGDTR __BIT(7) 258 #define VMCB_CTRL_INTERCEPT_RLDTR __BIT(8) 259 #define VMCB_CTRL_INTERCEPT_RTR __BIT(9) 260 #define VMCB_CTRL_INTERCEPT_WIDTR __BIT(10) 261 #define VMCB_CTRL_INTERCEPT_WGDTR __BIT(11) 262 #define VMCB_CTRL_INTERCEPT_WLDTR __BIT(12) 263 #define VMCB_CTRL_INTERCEPT_WTR __BIT(13) 264 #define VMCB_CTRL_INTERCEPT_RDTSC __BIT(14) 265 #define VMCB_CTRL_INTERCEPT_RDPMC __BIT(15) 266 #define VMCB_CTRL_INTERCEPT_PUSHF __BIT(16) 267 #define VMCB_CTRL_INTERCEPT_POPF __BIT(17) 268 #define VMCB_CTRL_INTERCEPT_CPUID __BIT(18) 269 #define VMCB_CTRL_INTERCEPT_RSM __BIT(19) 270 #define VMCB_CTRL_INTERCEPT_IRET __BIT(20) 271 #define VMCB_CTRL_INTERCEPT_INTN __BIT(21) 272 #define VMCB_CTRL_INTERCEPT_INVD __BIT(22) 273 #define VMCB_CTRL_INTERCEPT_PAUSE __BIT(23) 274 #define VMCB_CTRL_INTERCEPT_HLT __BIT(24) 275 #define VMCB_CTRL_INTERCEPT_INVLPG __BIT(25) 276 #define VMCB_CTRL_INTERCEPT_INVLPGA __BIT(26) 277 #define VMCB_CTRL_INTERCEPT_IOIO_PROT __BIT(27) 278 #define VMCB_CTRL_INTERCEPT_MSR_PROT __BIT(28) 279 #define VMCB_CTRL_INTERCEPT_TASKSW __BIT(29) 280 #define VMCB_CTRL_INTERCEPT_FERR_FREEZE __BIT(30) 281 #define VMCB_CTRL_INTERCEPT_SHUTDOWN __BIT(31) 282 283 uint32_t intercept_misc2; 284 #define VMCB_CTRL_INTERCEPT_VMRUN __BIT(0) 285 #define VMCB_CTRL_INTERCEPT_VMMCALL __BIT(1) 286 #define VMCB_CTRL_INTERCEPT_VMLOAD __BIT(2) 287 #define VMCB_CTRL_INTERCEPT_VMSAVE __BIT(3) 288 #define VMCB_CTRL_INTERCEPT_STGI __BIT(4) 289 #define VMCB_CTRL_INTERCEPT_CLGI __BIT(5) 290 #define VMCB_CTRL_INTERCEPT_SKINIT __BIT(6) 291 #define VMCB_CTRL_INTERCEPT_RDTSCP __BIT(7) 292 #define VMCB_CTRL_INTERCEPT_ICEBP __BIT(8) 293 #define VMCB_CTRL_INTERCEPT_WBINVD __BIT(9) 294 #define VMCB_CTRL_INTERCEPT_MONITOR __BIT(10) 295 #define VMCB_CTRL_INTERCEPT_MWAIT __BIT(11) 296 #define VMCB_CTRL_INTERCEPT_MWAIT_ARMED __BIT(12) 297 #define VMCB_CTRL_INTERCEPT_XSETBV __BIT(13) 298 #define VMCB_CTRL_INTERCEPT_RDPRU __BIT(14) 299 #define VMCB_CTRL_INTERCEPT_EFER_SPEC __BIT(15) 300 #define VMCB_CTRL_INTERCEPT_WCR_SPEC(x) __BIT(16 + x) 301 302 uint32_t intercept_misc3; 303 #define VMCB_CTRL_INTERCEPT_INVLPGB_ALL __BIT(0) 304 #define VMCB_CTRL_INTERCEPT_INVLPGB_ILL __BIT(1) 305 #define VMCB_CTRL_INTERCEPT_PCID __BIT(2) 306 #define VMCB_CTRL_INTERCEPT_MCOMMIT __BIT(3) 307 #define VMCB_CTRL_INTERCEPT_TLBSYNC __BIT(4) 308 309 uint8_t rsvd1[36]; 310 uint16_t pause_filt_thresh; 311 uint16_t pause_filt_cnt; 312 uint64_t iopm_base_pa; 313 uint64_t msrpm_base_pa; 314 uint64_t tsc_offset; 315 uint32_t guest_asid; 316 317 uint32_t tlb_ctrl; 318 #define VMCB_CTRL_TLB_CTRL_FLUSH_ALL 0x01 319 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST 0x03 320 #define VMCB_CTRL_TLB_CTRL_FLUSH_GUEST_NONGLOBAL 0x07 321 322 uint64_t v; 323 #define VMCB_CTRL_V_TPR __BITS(3,0) 324 #define VMCB_CTRL_V_IRQ __BIT(8) 325 #define VMCB_CTRL_V_VGIF __BIT(9) 326 #define VMCB_CTRL_V_INTR_PRIO __BITS(19,16) 327 #define VMCB_CTRL_V_IGN_TPR __BIT(20) 328 #define VMCB_CTRL_V_INTR_MASKING __BIT(24) 329 #define VMCB_CTRL_V_GUEST_VGIF __BIT(25) 330 #define VMCB_CTRL_V_AVIC_EN __BIT(31) 331 #define VMCB_CTRL_V_INTR_VECTOR __BITS(39,32) 332 333 uint64_t intr; 334 #define VMCB_CTRL_INTR_SHADOW __BIT(0) 335 #define VMCB_CTRL_INTR_MASK __BIT(1) 336 337 uint64_t exitcode; 338 uint64_t exitinfo1; 339 uint64_t exitinfo2; 340 341 uint64_t exitintinfo; 342 #define VMCB_CTRL_EXITINTINFO_VECTOR __BITS(7,0) 343 #define VMCB_CTRL_EXITINTINFO_TYPE __BITS(10,8) 344 #define VMCB_CTRL_EXITINTINFO_EV __BIT(11) 345 #define VMCB_CTRL_EXITINTINFO_V __BIT(31) 346 #define VMCB_CTRL_EXITINTINFO_ERRORCODE __BITS(63,32) 347 348 uint64_t enable1; 349 #define VMCB_CTRL_ENABLE_NP __BIT(0) 350 #define VMCB_CTRL_ENABLE_SEV __BIT(1) 351 #define VMCB_CTRL_ENABLE_ES_SEV __BIT(2) 352 #define VMCB_CTRL_ENABLE_GMET __BIT(3) 353 #define VMCB_CTRL_ENABLE_SSS __BIT(4) 354 #define VMCB_CTRL_ENABLE_VTE __BIT(5) 355 356 uint64_t avic; 357 #define VMCB_CTRL_AVIC_APIC_BAR __BITS(51,0) 358 359 uint64_t ghcb; 360 361 uint64_t eventinj; 362 #define VMCB_CTRL_EVENTINJ_VECTOR __BITS(7,0) 363 #define VMCB_CTRL_EVENTINJ_TYPE __BITS(10,8) 364 #define VMCB_CTRL_EVENTINJ_EV __BIT(11) 365 #define VMCB_CTRL_EVENTINJ_V __BIT(31) 366 #define VMCB_CTRL_EVENTINJ_ERRORCODE __BITS(63,32) 367 368 uint64_t n_cr3; 369 370 uint64_t enable2; 371 #define VMCB_CTRL_ENABLE_LBR __BIT(0) 372 #define VMCB_CTRL_ENABLE_VVMSAVE __BIT(1) 373 374 uint32_t vmcb_clean; 375 #define VMCB_CTRL_VMCB_CLEAN_I __BIT(0) 376 #define VMCB_CTRL_VMCB_CLEAN_IOPM __BIT(1) 377 #define VMCB_CTRL_VMCB_CLEAN_ASID __BIT(2) 378 #define VMCB_CTRL_VMCB_CLEAN_TPR __BIT(3) 379 #define VMCB_CTRL_VMCB_CLEAN_NP __BIT(4) 380 #define VMCB_CTRL_VMCB_CLEAN_CR __BIT(5) 381 #define VMCB_CTRL_VMCB_CLEAN_DR __BIT(6) 382 #define VMCB_CTRL_VMCB_CLEAN_DT __BIT(7) 383 #define VMCB_CTRL_VMCB_CLEAN_SEG __BIT(8) 384 #define VMCB_CTRL_VMCB_CLEAN_CR2 __BIT(9) 385 #define VMCB_CTRL_VMCB_CLEAN_LBR __BIT(10) 386 #define VMCB_CTRL_VMCB_CLEAN_AVIC __BIT(11) 387 #define VMCB_CTRL_VMCB_CLEAN_CET __BIT(12) 388 389 uint32_t rsvd2; 390 uint64_t nrip; 391 uint8_t inst_len; 392 uint8_t inst_bytes[15]; 393 uint64_t avic_abpp; 394 uint64_t rsvd3; 395 uint64_t avic_ltp; 396 397 uint64_t avic_phys; 398 #define VMCB_CTRL_AVIC_PHYS_TABLE_PTR __BITS(51,12) 399 #define VMCB_CTRL_AVIC_PHYS_MAX_INDEX __BITS(7,0) 400 401 uint64_t rsvd4; 402 uint64_t vmsa_ptr; 403 404 uint8_t pad[752]; 405 } __packed; 406 407 CTASSERT(sizeof(struct vmcb_ctrl) == 1024); 408 409 struct vmcb_segment { 410 uint16_t selector; 411 uint16_t attrib; /* hidden */ 412 uint32_t limit; /* hidden */ 413 uint64_t base; /* hidden */ 414 } __packed; 415 416 CTASSERT(sizeof(struct vmcb_segment) == 16); 417 418 struct vmcb_state { 419 struct vmcb_segment es; 420 struct vmcb_segment cs; 421 struct vmcb_segment ss; 422 struct vmcb_segment ds; 423 struct vmcb_segment fs; 424 struct vmcb_segment gs; 425 struct vmcb_segment gdt; 426 struct vmcb_segment ldt; 427 struct vmcb_segment idt; 428 struct vmcb_segment tr; 429 uint8_t rsvd1[43]; 430 uint8_t cpl; 431 uint8_t rsvd2[4]; 432 uint64_t efer; 433 uint8_t rsvd3[112]; 434 uint64_t cr4; 435 uint64_t cr3; 436 uint64_t cr0; 437 uint64_t dr7; 438 uint64_t dr6; 439 uint64_t rflags; 440 uint64_t rip; 441 uint8_t rsvd4[88]; 442 uint64_t rsp; 443 uint64_t s_cet; 444 uint64_t ssp; 445 uint64_t isst_addr; 446 uint64_t rax; 447 uint64_t star; 448 uint64_t lstar; 449 uint64_t cstar; 450 uint64_t sfmask; 451 uint64_t kernelgsbase; 452 uint64_t sysenter_cs; 453 uint64_t sysenter_esp; 454 uint64_t sysenter_eip; 455 uint64_t cr2; 456 uint8_t rsvd6[32]; 457 uint64_t g_pat; 458 uint64_t dbgctl; 459 uint64_t br_from; 460 uint64_t br_to; 461 uint64_t int_from; 462 uint64_t int_to; 463 uint8_t pad[2408]; 464 } __packed; 465 466 CTASSERT(sizeof(struct vmcb_state) == 0xC00); 467 468 struct vmcb { 469 struct vmcb_ctrl ctrl; 470 struct vmcb_state state; 471 } __packed; 472 473 CTASSERT(sizeof(struct vmcb) == PAGE_SIZE); 474 CTASSERT(offsetof(struct vmcb, state) == 0x400); 475 476 /* -------------------------------------------------------------------------- */ 477 478 static void svm_vcpu_state_provide(struct nvmm_cpu *, uint64_t); 479 static void svm_vcpu_state_commit(struct nvmm_cpu *); 480 481 /* 482 * These host values are static, they do not change at runtime and are the same 483 * on all CPUs. We save them here because they are not saved in the VMCB. 484 */ 485 static struct { 486 uint64_t xcr0; 487 uint64_t star; 488 uint64_t lstar; 489 uint64_t cstar; 490 uint64_t sfmask; 491 } svm_global_hstate __cacheline_aligned; 492 493 struct svm_hsave { 494 paddr_t pa; 495 }; 496 497 static struct svm_hsave hsave[OS_MAXCPUS]; 498 499 static uint8_t *svm_asidmap __read_mostly; 500 static uint32_t svm_maxasid __read_mostly; 501 static os_mtx_t svm_asidlock __cacheline_aligned; 502 503 static bool svm_decode_assist __read_mostly; 504 static uint32_t svm_ctrl_tlb_flush __read_mostly; 505 506 #define SVM_XCR0_MASK_DEFAULT (XCR0_X87|XCR0_SSE) 507 static uint64_t svm_xcr0_mask __read_mostly; 508 509 #define SVM_NCPUIDS 32 510 511 #define VMCB_NPAGES 1 512 513 #define MSRBM_NPAGES 2 514 #define MSRBM_SIZE (MSRBM_NPAGES * PAGE_SIZE) 515 516 #define IOBM_NPAGES 3 517 #define IOBM_SIZE (IOBM_NPAGES * PAGE_SIZE) 518 519 /* Does not include EFER_LMSLE. */ 520 #define EFER_VALID \ 521 (EFER_SCE|EFER_LME|EFER_LMA|EFER_NXE|EFER_SVME|EFER_FFXSR|EFER_TCE) 522 523 #define EFER_TLB_FLUSH \ 524 (EFER_NXE|EFER_LMA|EFER_LME) 525 #define CR0_TLB_FLUSH \ 526 (CR0_PG|CR0_WP|CR0_CD|CR0_NW) 527 #define CR4_TLB_FLUSH \ 528 (CR4_PSE|CR4_PAE|CR4_PGE|CR4_PCIDE|CR4_SMEP) 529 530 /* -------------------------------------------------------------------------- */ 531 532 struct svm_machdata { 533 volatile uint64_t mach_htlb_gen; 534 }; 535 536 static const size_t svm_vcpu_conf_sizes[NVMM_X86_VCPU_NCONF] = { 537 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID)] = 538 sizeof(struct nvmm_vcpu_conf_cpuid), 539 [NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_TPR)] = 540 sizeof(struct nvmm_vcpu_conf_tpr) 541 }; 542 543 struct svm_cpudata { 544 /* General. */ 545 bool shared_asid; 546 bool gtlb_want_flush; 547 bool htlb_want_flush; 548 bool gtsc_want_update; 549 uint64_t vcpu_htlb_gen; 550 551 /* VMCB. */ 552 struct vmcb *vmcb; 553 paddr_t vmcb_pa; 554 555 /* I/O bitmap. */ 556 uint8_t *iobm; 557 paddr_t iobm_pa; 558 559 /* MSR bitmap. */ 560 uint8_t *msrbm; 561 paddr_t msrbm_pa; 562 563 /* Percpu host state, absent from VMCB. */ 564 struct { 565 uint64_t fsbase; 566 uint64_t kernelgsbase; 567 uint64_t drs[NVMM_X64_NDR]; 568 #ifdef __DragonFly__ 569 mcontext_t hmctx; /* TODO: remove this like NetBSD */ 570 #endif 571 } hstate; 572 573 /* Intr state. */ 574 bool int_window_exit; 575 bool nmi_window_exit; 576 bool evt_pending; 577 578 /* Guest state. */ 579 uint64_t gxcr0; 580 uint64_t gprs[NVMM_X64_NGPR]; 581 uint64_t drs[NVMM_X64_NDR]; 582 uint64_t gtsc_offset; 583 uint64_t gtsc_match; 584 struct nvmm_x86_xsave gxsave __aligned(64); 585 586 /* VCPU configuration. */ 587 bool cpuidpresent[SVM_NCPUIDS]; 588 struct nvmm_vcpu_conf_cpuid cpuid[SVM_NCPUIDS]; 589 }; 590 591 static void 592 svm_vmcb_cache_default(struct vmcb *vmcb) 593 { 594 vmcb->ctrl.vmcb_clean = 595 VMCB_CTRL_VMCB_CLEAN_I | 596 VMCB_CTRL_VMCB_CLEAN_IOPM | 597 VMCB_CTRL_VMCB_CLEAN_ASID | 598 VMCB_CTRL_VMCB_CLEAN_TPR | 599 VMCB_CTRL_VMCB_CLEAN_NP | 600 VMCB_CTRL_VMCB_CLEAN_CR | 601 VMCB_CTRL_VMCB_CLEAN_DR | 602 VMCB_CTRL_VMCB_CLEAN_DT | 603 VMCB_CTRL_VMCB_CLEAN_SEG | 604 VMCB_CTRL_VMCB_CLEAN_CR2 | 605 VMCB_CTRL_VMCB_CLEAN_LBR | 606 VMCB_CTRL_VMCB_CLEAN_AVIC; 607 } 608 609 static void 610 svm_vmcb_cache_update(struct vmcb *vmcb, uint64_t flags) 611 { 612 if (flags & NVMM_X64_STATE_SEGS) { 613 vmcb->ctrl.vmcb_clean &= 614 ~(VMCB_CTRL_VMCB_CLEAN_SEG | VMCB_CTRL_VMCB_CLEAN_DT); 615 } 616 if (flags & NVMM_X64_STATE_CRS) { 617 vmcb->ctrl.vmcb_clean &= 618 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_CR2 | 619 VMCB_CTRL_VMCB_CLEAN_TPR); 620 } 621 if (flags & NVMM_X64_STATE_DRS) { 622 vmcb->ctrl.vmcb_clean &= ~VMCB_CTRL_VMCB_CLEAN_DR; 623 } 624 if (flags & NVMM_X64_STATE_MSRS) { 625 /* CR for EFER, NP for PAT. */ 626 vmcb->ctrl.vmcb_clean &= 627 ~(VMCB_CTRL_VMCB_CLEAN_CR | VMCB_CTRL_VMCB_CLEAN_NP); 628 } 629 } 630 631 static inline void 632 svm_vmcb_cache_flush(struct vmcb *vmcb, uint64_t flags) 633 { 634 vmcb->ctrl.vmcb_clean &= ~flags; 635 } 636 637 static inline void 638 svm_vmcb_cache_flush_all(struct vmcb *vmcb) 639 { 640 vmcb->ctrl.vmcb_clean = 0; 641 } 642 643 #define SVM_EVENT_TYPE_HW_INT 0 644 #define SVM_EVENT_TYPE_NMI 2 645 #define SVM_EVENT_TYPE_EXC 3 646 #define SVM_EVENT_TYPE_SW_INT 4 647 648 static void 649 svm_event_waitexit_enable(struct nvmm_cpu *vcpu, bool nmi) 650 { 651 struct svm_cpudata *cpudata = vcpu->cpudata; 652 struct vmcb *vmcb = cpudata->vmcb; 653 654 if (nmi) { 655 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_IRET; 656 cpudata->nmi_window_exit = true; 657 } else { 658 vmcb->ctrl.intercept_misc1 |= VMCB_CTRL_INTERCEPT_VINTR; 659 vmcb->ctrl.v |= (VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 660 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 661 cpudata->int_window_exit = true; 662 } 663 664 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 665 } 666 667 static void 668 svm_event_waitexit_disable(struct nvmm_cpu *vcpu, bool nmi) 669 { 670 struct svm_cpudata *cpudata = vcpu->cpudata; 671 struct vmcb *vmcb = cpudata->vmcb; 672 673 if (nmi) { 674 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_IRET; 675 cpudata->nmi_window_exit = false; 676 } else { 677 vmcb->ctrl.intercept_misc1 &= ~VMCB_CTRL_INTERCEPT_VINTR; 678 vmcb->ctrl.v &= ~(VMCB_CTRL_V_IRQ | VMCB_CTRL_V_IGN_TPR); 679 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_TPR); 680 cpudata->int_window_exit = false; 681 } 682 683 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 684 } 685 686 static inline bool 687 svm_excp_has_rf(uint8_t vector) 688 { 689 switch (vector) { 690 case 1: /* #DB */ 691 case 4: /* #OF */ 692 case 8: /* #DF */ 693 case 18: /* #MC */ 694 return false; 695 default: 696 return true; 697 } 698 } 699 700 static inline int 701 svm_excp_has_error(uint8_t vector) 702 { 703 switch (vector) { 704 case 8: /* #DF */ 705 case 10: /* #TS */ 706 case 11: /* #NP */ 707 case 12: /* #SS */ 708 case 13: /* #GP */ 709 case 14: /* #PF */ 710 case 17: /* #AC */ 711 case 21: /* #CP */ 712 case 30: /* #SX */ 713 return 1; 714 default: 715 return 0; 716 } 717 } 718 719 static int 720 svm_vcpu_inject(struct nvmm_cpu *vcpu) 721 { 722 struct nvmm_comm_page *comm = vcpu->comm; 723 struct svm_cpudata *cpudata = vcpu->cpudata; 724 struct vmcb *vmcb = cpudata->vmcb; 725 u_int evtype; 726 uint8_t vector; 727 uint64_t error; 728 int type = 0, err = 0; 729 730 evtype = comm->event.type; 731 vector = comm->event.vector; 732 error = comm->event.u.excp.error; 733 __insn_barrier(); 734 735 switch (evtype) { 736 case NVMM_VCPU_EVENT_EXCP: 737 type = SVM_EVENT_TYPE_EXC; 738 if (vector == 2 || vector >= 32) 739 return EINVAL; 740 if (vector == 3 || vector == 0) 741 return EINVAL; 742 if (svm_excp_has_rf(vector)) { 743 vmcb->state.rflags |= PSL_RF; 744 } 745 err = svm_excp_has_error(vector); 746 break; 747 case NVMM_VCPU_EVENT_INTR: 748 type = SVM_EVENT_TYPE_HW_INT; 749 if (vector == 2) { 750 type = SVM_EVENT_TYPE_NMI; 751 svm_event_waitexit_enable(vcpu, true); 752 } 753 err = 0; 754 break; 755 default: 756 return EINVAL; 757 } 758 759 vmcb->ctrl.eventinj = 760 __SHIFTIN((uint64_t)vector, VMCB_CTRL_EVENTINJ_VECTOR) | 761 __SHIFTIN((uint64_t)type, VMCB_CTRL_EVENTINJ_TYPE) | 762 __SHIFTIN((uint64_t)err, VMCB_CTRL_EVENTINJ_EV) | 763 __SHIFTIN((uint64_t)1, VMCB_CTRL_EVENTINJ_V) | 764 __SHIFTIN((uint64_t)error, VMCB_CTRL_EVENTINJ_ERRORCODE); 765 766 cpudata->evt_pending = true; 767 768 return 0; 769 } 770 771 static void 772 svm_inject_ud(struct nvmm_cpu *vcpu) 773 { 774 struct nvmm_comm_page *comm = vcpu->comm; 775 int ret __diagused; 776 777 comm->event.type = NVMM_VCPU_EVENT_EXCP; 778 comm->event.vector = 6; 779 comm->event.u.excp.error = 0; 780 781 ret = svm_vcpu_inject(vcpu); 782 OS_ASSERT(ret == 0); 783 } 784 785 static void 786 svm_inject_gp(struct nvmm_cpu *vcpu) 787 { 788 struct nvmm_comm_page *comm = vcpu->comm; 789 int ret __diagused; 790 791 comm->event.type = NVMM_VCPU_EVENT_EXCP; 792 comm->event.vector = 13; 793 comm->event.u.excp.error = 0; 794 795 ret = svm_vcpu_inject(vcpu); 796 OS_ASSERT(ret == 0); 797 } 798 799 static inline int 800 svm_vcpu_event_commit(struct nvmm_cpu *vcpu) 801 { 802 if (__predict_true(!vcpu->comm->event_commit)) { 803 return 0; 804 } 805 vcpu->comm->event_commit = false; 806 return svm_vcpu_inject(vcpu); 807 } 808 809 static inline void 810 svm_inkernel_advance(struct vmcb *vmcb) 811 { 812 /* 813 * Maybe we should also apply single-stepping and debug exceptions. 814 * Matters for guest-ring3, because it can execute 'cpuid' under a 815 * debugger. 816 */ 817 vmcb->state.rip = vmcb->ctrl.nrip; 818 vmcb->state.rflags &= ~PSL_RF; 819 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 820 } 821 822 #define SVM_CPUID_MAX_BASIC 0xD 823 #define SVM_CPUID_MAX_HYPERVISOR 0x40000000 824 #define SVM_CPUID_MAX_EXTENDED 0x8000001F 825 static uint32_t svm_cpuid_max_basic __read_mostly; 826 static uint32_t svm_cpuid_max_extended __read_mostly; 827 828 static void 829 svm_inkernel_exec_cpuid(struct svm_cpudata *cpudata, uint32_t eax, uint32_t ecx) 830 { 831 cpuid_desc_t descs; 832 833 x86_get_cpuid2(eax, ecx, &descs); 834 cpudata->vmcb->state.rax = descs.eax; 835 cpudata->gprs[NVMM_X64_GPR_RBX] = descs.ebx; 836 cpudata->gprs[NVMM_X64_GPR_RCX] = descs.ecx; 837 cpudata->gprs[NVMM_X64_GPR_RDX] = descs.edx; 838 } 839 840 static void 841 svm_inkernel_handle_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 842 uint32_t eax, uint32_t ecx) 843 { 844 struct svm_cpudata *cpudata = vcpu->cpudata; 845 unsigned int ncpus; 846 uint64_t cr4; 847 848 if (eax < 0x40000000) { 849 if (__predict_false(eax > svm_cpuid_max_basic)) { 850 eax = svm_cpuid_max_basic; 851 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 852 } 853 } else if (eax < 0x80000000) { 854 if (__predict_false(eax > SVM_CPUID_MAX_HYPERVISOR)) { 855 eax = svm_cpuid_max_basic; 856 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 857 } 858 } else { 859 if (__predict_false(eax > svm_cpuid_max_extended)) { 860 eax = svm_cpuid_max_basic; 861 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 862 } 863 } 864 865 switch (eax) { 866 case 0x00000000: 867 cpudata->vmcb->state.rax = svm_cpuid_max_basic; 868 break; 869 case 0x00000001: 870 cpudata->vmcb->state.rax &= nvmm_cpuid_00000001.eax; 871 872 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_0_01_EBX_LOCAL_APIC_ID; 873 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(vcpu->cpuid, 874 CPUID_0_01_EBX_LOCAL_APIC_ID); 875 876 ncpus = os_atomic_load_uint(&mach->ncpus); 877 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~CPUID_0_01_EBX_HTT_CORES; 878 cpudata->gprs[NVMM_X64_GPR_RBX] |= __SHIFTIN(ncpus, 879 CPUID_0_01_EBX_HTT_CORES); 880 881 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000001.ecx; 882 cpudata->gprs[NVMM_X64_GPR_RCX] |= CPUID_0_01_ECX_RAZ; 883 884 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000001.edx; 885 886 /* CPUID_0_01_ECX_OSXSAVE depends on CR4. */ 887 cr4 = cpudata->vmcb->state.cr4; 888 if (!(cr4 & CR4_OSXSAVE)) { 889 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~CPUID_0_01_ECX_OSXSAVE; 890 } 891 break; 892 case 0x00000002: /* Empty */ 893 case 0x00000003: /* Empty */ 894 case 0x00000004: /* Empty */ 895 case 0x00000005: /* Monitor/MWait */ 896 case 0x00000006: /* Power Management Related Features */ 897 cpudata->vmcb->state.rax = 0; 898 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 899 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 900 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 901 break; 902 case 0x00000007: /* Structured Extended Features */ 903 switch (ecx) { 904 case 0: 905 cpudata->vmcb->state.rax = 0; 906 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_00000007.ebx; 907 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_00000007.ecx; 908 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_00000007.edx; 909 break; 910 default: 911 cpudata->vmcb->state.rax = 0; 912 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 913 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 914 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 915 break; 916 } 917 break; 918 case 0x00000008: /* Empty */ 919 case 0x00000009: /* Empty */ 920 case 0x0000000A: /* Empty */ 921 case 0x0000000B: /* Empty */ 922 case 0x0000000C: /* Empty */ 923 cpudata->vmcb->state.rax = 0; 924 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 925 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 926 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 927 break; 928 case 0x0000000D: /* Processor Extended State Enumeration */ 929 if (svm_xcr0_mask == 0) { 930 break; 931 } 932 switch (ecx) { 933 case 0: 934 /* Supported XCR0 bits. */ 935 cpudata->vmcb->state.rax = svm_xcr0_mask & 0xFFFFFFFF; 936 cpudata->gprs[NVMM_X64_GPR_RDX] = svm_xcr0_mask >> 32; 937 /* XSAVE size for currently enabled XCR0 features. */ 938 cpudata->gprs[NVMM_X64_GPR_RBX] = nvmm_x86_xsave_size(cpudata->gxcr0); 939 /* XSAVE size for all supported XCR0 features. */ 940 cpudata->gprs[NVMM_X64_GPR_RCX] = nvmm_x86_xsave_size(svm_xcr0_mask); 941 break; 942 case 1: 943 cpudata->vmcb->state.rax &= 944 (CPUID_0_0D_ECX1_EAX_XSAVEOPT | 945 CPUID_0_0D_ECX1_EAX_XSAVEC | 946 CPUID_0_0D_ECX1_EAX_XGETBV); 947 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 948 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 949 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 950 break; 951 default: 952 cpudata->vmcb->state.rax = 0; 953 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 954 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 955 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 956 break; 957 } 958 break; 959 960 case 0x40000000: /* Hypervisor Information */ 961 cpudata->vmcb->state.rax = SVM_CPUID_MAX_HYPERVISOR; 962 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 963 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 964 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 965 memcpy(&cpudata->gprs[NVMM_X64_GPR_RBX], "___ ", 4); 966 memcpy(&cpudata->gprs[NVMM_X64_GPR_RCX], "NVMM", 4); 967 memcpy(&cpudata->gprs[NVMM_X64_GPR_RDX], " ___", 4); 968 break; 969 970 case 0x80000000: 971 cpudata->vmcb->state.rax = svm_cpuid_max_extended; 972 break; 973 case 0x80000001: 974 cpudata->vmcb->state.rax &= nvmm_cpuid_80000001.eax; 975 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000001.ebx; 976 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000001.ecx; 977 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000001.edx; 978 break; 979 case 0x80000002: /* Extended Processor Name String */ 980 case 0x80000003: /* Extended Processor Name String */ 981 case 0x80000004: /* Extended Processor Name String */ 982 case 0x80000005: /* L1 Cache and TLB Information */ 983 case 0x80000006: /* L2 Cache and TLB and L3 Cache Information */ 984 break; 985 case 0x80000007: /* Processor Power Management and RAS Capabilities */ 986 cpudata->vmcb->state.rax &= nvmm_cpuid_80000007.eax; 987 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000007.ebx; 988 cpudata->gprs[NVMM_X64_GPR_RCX] &= nvmm_cpuid_80000007.ecx; 989 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000007.edx; 990 break; 991 case 0x80000008: /* Processor Capacity Parameters and Ext Feat Ident */ 992 ncpus = os_atomic_load_uint(&mach->ncpus); 993 cpudata->vmcb->state.rax &= nvmm_cpuid_80000008.eax; 994 cpudata->gprs[NVMM_X64_GPR_RBX] &= nvmm_cpuid_80000008.ebx; 995 cpudata->gprs[NVMM_X64_GPR_RCX] = 996 __SHIFTIN(ncpus - 1, CPUID_8_08_ECX_NC) | 997 __SHIFTIN(ilog2(NVMM_MAX_VCPUS), CPUID_8_08_ECX_ApicIdSize); 998 cpudata->gprs[NVMM_X64_GPR_RDX] &= nvmm_cpuid_80000008.edx; 999 break; 1000 case 0x80000009: /* Empty */ 1001 case 0x8000000A: /* SVM Features */ 1002 case 0x8000000B: /* Empty */ 1003 case 0x8000000C: /* Empty */ 1004 case 0x8000000D: /* Empty */ 1005 case 0x8000000E: /* Empty */ 1006 case 0x8000000F: /* Empty */ 1007 case 0x80000010: /* Empty */ 1008 case 0x80000011: /* Empty */ 1009 case 0x80000012: /* Empty */ 1010 case 0x80000013: /* Empty */ 1011 case 0x80000014: /* Empty */ 1012 case 0x80000015: /* Empty */ 1013 case 0x80000016: /* Empty */ 1014 case 0x80000017: /* Empty */ 1015 case 0x80000018: /* Empty */ 1016 cpudata->vmcb->state.rax = 0; 1017 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1018 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1019 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1020 break; 1021 case 0x80000019: /* TLB Characteristics for 1GB pages */ 1022 case 0x8000001A: /* Instruction Optimizations */ 1023 break; 1024 case 0x8000001B: /* Instruction-Based Sampling Capabilities */ 1025 case 0x8000001C: /* Lightweight Profiling Capabilities */ 1026 cpudata->vmcb->state.rax = 0; 1027 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1028 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1029 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1030 break; 1031 case 0x8000001D: /* Cache Topology Information */ 1032 case 0x8000001E: /* Processor Topology Information */ 1033 break; /* TODO? */ 1034 case 0x8000001F: /* Encrypted Memory Capabilities */ 1035 cpudata->vmcb->state.rax = 0; 1036 cpudata->gprs[NVMM_X64_GPR_RBX] = 0; 1037 cpudata->gprs[NVMM_X64_GPR_RCX] = 0; 1038 cpudata->gprs[NVMM_X64_GPR_RDX] = 0; 1039 break; 1040 1041 default: 1042 break; 1043 } 1044 } 1045 1046 static void 1047 svm_exit_insn(struct vmcb *vmcb, struct nvmm_vcpu_exit *exit, uint64_t reason) 1048 { 1049 exit->u.insn.npc = vmcb->ctrl.nrip; 1050 exit->reason = reason; 1051 } 1052 1053 static void 1054 svm_exit_cpuid(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1055 struct nvmm_vcpu_exit *exit) 1056 { 1057 struct svm_cpudata *cpudata = vcpu->cpudata; 1058 struct nvmm_vcpu_conf_cpuid *cpuid; 1059 uint32_t eax, ecx; 1060 size_t i; 1061 1062 eax = (cpudata->vmcb->state.rax & 0xFFFFFFFF); 1063 ecx = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1064 svm_inkernel_exec_cpuid(cpudata, eax, ecx); 1065 svm_inkernel_handle_cpuid(mach, vcpu, eax, ecx); 1066 1067 for (i = 0; i < SVM_NCPUIDS; i++) { 1068 if (!cpudata->cpuidpresent[i]) { 1069 continue; 1070 } 1071 cpuid = &cpudata->cpuid[i]; 1072 if (cpuid->leaf != eax) { 1073 continue; 1074 } 1075 1076 if (cpuid->exit) { 1077 svm_exit_insn(cpudata->vmcb, exit, NVMM_VCPU_EXIT_CPUID); 1078 return; 1079 } 1080 OS_ASSERT(cpuid->mask); 1081 1082 /* del */ 1083 cpudata->vmcb->state.rax &= ~cpuid->u.mask.del.eax; 1084 cpudata->gprs[NVMM_X64_GPR_RBX] &= ~cpuid->u.mask.del.ebx; 1085 cpudata->gprs[NVMM_X64_GPR_RCX] &= ~cpuid->u.mask.del.ecx; 1086 cpudata->gprs[NVMM_X64_GPR_RDX] &= ~cpuid->u.mask.del.edx; 1087 1088 /* set */ 1089 cpudata->vmcb->state.rax |= cpuid->u.mask.set.eax; 1090 cpudata->gprs[NVMM_X64_GPR_RBX] |= cpuid->u.mask.set.ebx; 1091 cpudata->gprs[NVMM_X64_GPR_RCX] |= cpuid->u.mask.set.ecx; 1092 cpudata->gprs[NVMM_X64_GPR_RDX] |= cpuid->u.mask.set.edx; 1093 1094 break; 1095 } 1096 1097 svm_inkernel_advance(cpudata->vmcb); 1098 exit->reason = NVMM_VCPU_EXIT_NONE; 1099 } 1100 1101 static void 1102 svm_exit_hlt(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1103 struct nvmm_vcpu_exit *exit) 1104 { 1105 struct svm_cpudata *cpudata = vcpu->cpudata; 1106 struct vmcb *vmcb = cpudata->vmcb; 1107 1108 if (cpudata->int_window_exit && (vmcb->state.rflags & PSL_I)) { 1109 svm_event_waitexit_disable(vcpu, false); 1110 } 1111 1112 svm_inkernel_advance(cpudata->vmcb); 1113 exit->reason = NVMM_VCPU_EXIT_HALTED; 1114 } 1115 1116 #define SVM_EXIT_IO_PORT __BITS(31,16) 1117 #define SVM_EXIT_IO_SEG __BITS(12,10) 1118 #define SVM_EXIT_IO_A64 __BIT(9) 1119 #define SVM_EXIT_IO_A32 __BIT(8) 1120 #define SVM_EXIT_IO_A16 __BIT(7) 1121 #define SVM_EXIT_IO_SZ32 __BIT(6) 1122 #define SVM_EXIT_IO_SZ16 __BIT(5) 1123 #define SVM_EXIT_IO_SZ8 __BIT(4) 1124 #define SVM_EXIT_IO_REP __BIT(3) 1125 #define SVM_EXIT_IO_STR __BIT(2) 1126 #define SVM_EXIT_IO_IN __BIT(0) 1127 1128 static void 1129 svm_exit_io(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1130 struct nvmm_vcpu_exit *exit) 1131 { 1132 struct svm_cpudata *cpudata = vcpu->cpudata; 1133 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1134 uint64_t nextpc = cpudata->vmcb->ctrl.exitinfo2; 1135 1136 exit->reason = NVMM_VCPU_EXIT_IO; 1137 1138 exit->u.io.in = (info & SVM_EXIT_IO_IN) != 0; 1139 exit->u.io.port = __SHIFTOUT(info, SVM_EXIT_IO_PORT); 1140 1141 if (svm_decode_assist) { 1142 OS_ASSERT(__SHIFTOUT(info, SVM_EXIT_IO_SEG) < 6); 1143 exit->u.io.seg = __SHIFTOUT(info, SVM_EXIT_IO_SEG); 1144 } else { 1145 exit->u.io.seg = -1; 1146 } 1147 1148 if (info & SVM_EXIT_IO_A64) { 1149 exit->u.io.address_size = 8; 1150 } else if (info & SVM_EXIT_IO_A32) { 1151 exit->u.io.address_size = 4; 1152 } else if (info & SVM_EXIT_IO_A16) { 1153 exit->u.io.address_size = 2; 1154 } 1155 1156 if (info & SVM_EXIT_IO_SZ32) { 1157 exit->u.io.operand_size = 4; 1158 } else if (info & SVM_EXIT_IO_SZ16) { 1159 exit->u.io.operand_size = 2; 1160 } else if (info & SVM_EXIT_IO_SZ8) { 1161 exit->u.io.operand_size = 1; 1162 } 1163 1164 exit->u.io.rep = (info & SVM_EXIT_IO_REP) != 0; 1165 exit->u.io.str = (info & SVM_EXIT_IO_STR) != 0; 1166 exit->u.io.npc = nextpc; 1167 1168 svm_vcpu_state_provide(vcpu, 1169 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1170 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1171 } 1172 1173 static const uint64_t msr_ignore_list[] = { 1174 0xc0010055, /* MSR_CMPHALT */ 1175 MSR_DE_CFG, 1176 MSR_IC_CFG, 1177 MSR_UCODE_AMD_PATCHLEVEL 1178 }; 1179 1180 static bool 1181 svm_inkernel_handle_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1182 struct nvmm_vcpu_exit *exit) 1183 { 1184 struct svm_cpudata *cpudata = vcpu->cpudata; 1185 struct vmcb *vmcb = cpudata->vmcb; 1186 uint64_t val; 1187 size_t i; 1188 1189 if (exit->reason == NVMM_VCPU_EXIT_RDMSR) { 1190 if (exit->u.rdmsr.msr == MSR_EFER) { 1191 val = vmcb->state.efer & ~EFER_SVME; 1192 vmcb->state.rax = (val & 0xFFFFFFFF); 1193 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1194 goto handled; 1195 } 1196 if (exit->u.rdmsr.msr == MSR_NB_CFG) { 1197 val = NB_CFG_INITAPICCPUIDLO; 1198 vmcb->state.rax = (val & 0xFFFFFFFF); 1199 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1200 goto handled; 1201 } 1202 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1203 if (msr_ignore_list[i] != exit->u.rdmsr.msr) 1204 continue; 1205 val = 0; 1206 vmcb->state.rax = (val & 0xFFFFFFFF); 1207 cpudata->gprs[NVMM_X64_GPR_RDX] = (val >> 32); 1208 goto handled; 1209 } 1210 } else { 1211 if (exit->u.wrmsr.msr == MSR_EFER) { 1212 if (__predict_false(exit->u.wrmsr.val & ~EFER_VALID)) { 1213 goto error; 1214 } 1215 if ((vmcb->state.efer ^ exit->u.wrmsr.val) & 1216 EFER_TLB_FLUSH) { 1217 cpudata->gtlb_want_flush = true; 1218 } 1219 vmcb->state.efer = exit->u.wrmsr.val | EFER_SVME; 1220 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_CR); 1221 goto handled; 1222 } 1223 if (exit->u.wrmsr.msr == MSR_TSC) { 1224 cpudata->gtsc_offset = exit->u.wrmsr.val - rdtsc(); 1225 cpudata->gtsc_want_update = true; 1226 goto handled; 1227 } 1228 for (i = 0; i < __arraycount(msr_ignore_list); i++) { 1229 if (msr_ignore_list[i] != exit->u.wrmsr.msr) 1230 continue; 1231 goto handled; 1232 } 1233 } 1234 1235 return false; 1236 1237 handled: 1238 svm_inkernel_advance(cpudata->vmcb); 1239 return true; 1240 1241 error: 1242 svm_inject_gp(vcpu); 1243 return true; 1244 } 1245 1246 static inline void 1247 svm_exit_rdmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1248 struct nvmm_vcpu_exit *exit) 1249 { 1250 struct svm_cpudata *cpudata = vcpu->cpudata; 1251 1252 exit->reason = NVMM_VCPU_EXIT_RDMSR; 1253 exit->u.rdmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1254 exit->u.rdmsr.npc = cpudata->vmcb->ctrl.nrip; 1255 1256 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1257 exit->reason = NVMM_VCPU_EXIT_NONE; 1258 return; 1259 } 1260 1261 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1262 } 1263 1264 static inline void 1265 svm_exit_wrmsr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1266 struct nvmm_vcpu_exit *exit) 1267 { 1268 struct svm_cpudata *cpudata = vcpu->cpudata; 1269 uint64_t rdx, rax; 1270 1271 rdx = cpudata->gprs[NVMM_X64_GPR_RDX]; 1272 rax = cpudata->vmcb->state.rax; 1273 1274 exit->reason = NVMM_VCPU_EXIT_WRMSR; 1275 exit->u.wrmsr.msr = (cpudata->gprs[NVMM_X64_GPR_RCX] & 0xFFFFFFFF); 1276 exit->u.wrmsr.val = (rdx << 32) | (rax & 0xFFFFFFFF); 1277 exit->u.wrmsr.npc = cpudata->vmcb->ctrl.nrip; 1278 1279 if (svm_inkernel_handle_msr(mach, vcpu, exit)) { 1280 exit->reason = NVMM_VCPU_EXIT_NONE; 1281 return; 1282 } 1283 1284 svm_vcpu_state_provide(vcpu, NVMM_X64_STATE_GPRS); 1285 } 1286 1287 static void 1288 svm_exit_msr(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1289 struct nvmm_vcpu_exit *exit) 1290 { 1291 struct svm_cpudata *cpudata = vcpu->cpudata; 1292 uint64_t info = cpudata->vmcb->ctrl.exitinfo1; 1293 1294 if (info == 0) { 1295 svm_exit_rdmsr(mach, vcpu, exit); 1296 } else { 1297 svm_exit_wrmsr(mach, vcpu, exit); 1298 } 1299 } 1300 1301 static void 1302 svm_exit_npf(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1303 struct nvmm_vcpu_exit *exit) 1304 { 1305 struct svm_cpudata *cpudata = vcpu->cpudata; 1306 gpaddr_t gpa = cpudata->vmcb->ctrl.exitinfo2; 1307 1308 exit->reason = NVMM_VCPU_EXIT_MEMORY; 1309 if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_W) 1310 exit->u.mem.prot = PROT_WRITE; 1311 else if (cpudata->vmcb->ctrl.exitinfo1 & PGEX_I) 1312 exit->u.mem.prot = PROT_EXEC; 1313 else 1314 exit->u.mem.prot = PROT_READ; 1315 exit->u.mem.gpa = gpa; 1316 exit->u.mem.inst_len = cpudata->vmcb->ctrl.inst_len; 1317 memcpy(exit->u.mem.inst_bytes, cpudata->vmcb->ctrl.inst_bytes, 1318 sizeof(exit->u.mem.inst_bytes)); 1319 1320 svm_vcpu_state_provide(vcpu, 1321 NVMM_X64_STATE_GPRS | NVMM_X64_STATE_SEGS | 1322 NVMM_X64_STATE_CRS | NVMM_X64_STATE_MSRS); 1323 } 1324 1325 static void 1326 svm_exit_xsetbv(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1327 struct nvmm_vcpu_exit *exit) 1328 { 1329 struct svm_cpudata *cpudata = vcpu->cpudata; 1330 struct vmcb *vmcb = cpudata->vmcb; 1331 uint64_t val; 1332 1333 exit->reason = NVMM_VCPU_EXIT_NONE; 1334 1335 val = (cpudata->gprs[NVMM_X64_GPR_RDX] << 32) | 1336 (vmcb->state.rax & 0xFFFFFFFF); 1337 1338 if (__predict_false(cpudata->gprs[NVMM_X64_GPR_RCX] != 0)) { 1339 goto error; 1340 } else if (__predict_false(vmcb->state.cpl != 0)) { 1341 goto error; 1342 } else if (__predict_false((val & ~svm_xcr0_mask) != 0)) { 1343 goto error; 1344 } else if (__predict_false((val & XCR0_X87) == 0)) { 1345 goto error; 1346 } 1347 1348 cpudata->gxcr0 = val; 1349 1350 svm_inkernel_advance(cpudata->vmcb); 1351 return; 1352 1353 error: 1354 svm_inject_gp(vcpu); 1355 } 1356 1357 static void 1358 svm_exit_invalid(struct nvmm_vcpu_exit *exit, uint64_t code) 1359 { 1360 exit->u.inv.hwcode = code; 1361 exit->reason = NVMM_VCPU_EXIT_INVALID; 1362 } 1363 1364 /* -------------------------------------------------------------------------- */ 1365 1366 static void 1367 svm_vcpu_guest_fpu_enter(struct nvmm_cpu *vcpu) 1368 { 1369 struct svm_cpudata *cpudata = vcpu->cpudata; 1370 1371 #if defined(__NetBSD__) 1372 x86_curthread_save_fpu(); 1373 #elif defined(__DragonFly__) 1374 /* 1375 * NOTE: Host FPU state depends on whether the user program used the 1376 * FPU or not. Need to use npxpush()/npxpop() to handle this. 1377 */ 1378 npxpush(&cpudata->hstate.hmctx); 1379 #endif 1380 1381 x86_restore_fpu(&cpudata->gxsave, svm_xcr0_mask); 1382 if (svm_xcr0_mask != 0) { 1383 x86_set_xcr(0, cpudata->gxcr0); 1384 } 1385 } 1386 1387 static void 1388 svm_vcpu_guest_fpu_leave(struct nvmm_cpu *vcpu) 1389 { 1390 struct svm_cpudata *cpudata = vcpu->cpudata; 1391 1392 if (svm_xcr0_mask != 0) { 1393 x86_set_xcr(0, svm_global_hstate.xcr0); 1394 } 1395 x86_save_fpu(&cpudata->gxsave, svm_xcr0_mask); 1396 1397 #if defined(__NetBSD__) 1398 x86_curthread_restore_fpu(); 1399 #elif defined(__DragonFly__) 1400 npxpop(&cpudata->hstate.hmctx); 1401 #endif 1402 } 1403 1404 static void 1405 svm_vcpu_guest_dbregs_enter(struct nvmm_cpu *vcpu) 1406 { 1407 struct svm_cpudata *cpudata = vcpu->cpudata; 1408 1409 x86_curthread_save_dbregs(cpudata->hstate.drs); 1410 1411 x86_set_dr7(0); 1412 1413 x86_set_dr0(cpudata->drs[NVMM_X64_DR_DR0]); 1414 x86_set_dr1(cpudata->drs[NVMM_X64_DR_DR1]); 1415 x86_set_dr2(cpudata->drs[NVMM_X64_DR_DR2]); 1416 x86_set_dr3(cpudata->drs[NVMM_X64_DR_DR3]); 1417 } 1418 1419 static void 1420 svm_vcpu_guest_dbregs_leave(struct nvmm_cpu *vcpu) 1421 { 1422 struct svm_cpudata *cpudata = vcpu->cpudata; 1423 1424 cpudata->drs[NVMM_X64_DR_DR0] = x86_get_dr0(); 1425 cpudata->drs[NVMM_X64_DR_DR1] = x86_get_dr1(); 1426 cpudata->drs[NVMM_X64_DR_DR2] = x86_get_dr2(); 1427 cpudata->drs[NVMM_X64_DR_DR3] = x86_get_dr3(); 1428 1429 x86_curthread_restore_dbregs(cpudata->hstate.drs); 1430 } 1431 1432 static void 1433 svm_vcpu_guest_misc_enter(struct nvmm_cpu *vcpu) 1434 { 1435 struct svm_cpudata *cpudata = vcpu->cpudata; 1436 1437 /* Save the percpu host state. */ 1438 cpudata->hstate.fsbase = rdmsr(MSR_FSBASE); 1439 cpudata->hstate.kernelgsbase = rdmsr(MSR_KERNELGSBASE); 1440 } 1441 1442 static void 1443 svm_vcpu_guest_misc_leave(struct nvmm_cpu *vcpu) 1444 { 1445 struct svm_cpudata *cpudata = vcpu->cpudata; 1446 1447 /* Restore the global host state. */ 1448 wrmsr(MSR_STAR, svm_global_hstate.star); 1449 wrmsr(MSR_LSTAR, svm_global_hstate.lstar); 1450 wrmsr(MSR_CSTAR, svm_global_hstate.cstar); 1451 wrmsr(MSR_SFMASK, svm_global_hstate.sfmask); 1452 1453 /* Restore the percpu host state. */ 1454 wrmsr(MSR_FSBASE, cpudata->hstate.fsbase); 1455 wrmsr(MSR_KERNELGSBASE, cpudata->hstate.kernelgsbase); 1456 } 1457 1458 /* -------------------------------------------------------------------------- */ 1459 1460 static inline void 1461 svm_gtlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1462 { 1463 struct svm_cpudata *cpudata = vcpu->cpudata; 1464 1465 if (vcpu->hcpu_last != hcpu || cpudata->shared_asid) { 1466 cpudata->gtlb_want_flush = true; 1467 } 1468 } 1469 1470 static inline void 1471 svm_htlb_catchup(struct nvmm_cpu *vcpu, int hcpu) 1472 { 1473 /* 1474 * Nothing to do. If an hTLB flush was needed, either the VCPU was 1475 * executing on this hCPU and the hTLB already got flushed, or it 1476 * was executing on another hCPU in which case the catchup is done 1477 * indirectly when svm_gtlb_catchup() sets gtlb_want_flush. 1478 */ 1479 } 1480 1481 static inline uint64_t 1482 svm_htlb_flush(struct nvmm_machine *mach, struct svm_cpudata *cpudata) 1483 { 1484 struct vmcb *vmcb = cpudata->vmcb; 1485 uint64_t machgen; 1486 1487 #if defined(__NetBSD__) 1488 machgen = ((struct svm_machdata *)mach->machdata)->mach_htlb_gen; 1489 #elif defined(__DragonFly__) 1490 clear_xinvltlb(); 1491 machgen = vmspace_pmap(mach->vm)->pm_invgen; 1492 #endif 1493 if (__predict_true(machgen == cpudata->vcpu_htlb_gen)) { 1494 return machgen; 1495 } 1496 1497 cpudata->htlb_want_flush = true; 1498 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1499 return machgen; 1500 } 1501 1502 static inline void 1503 svm_htlb_flush_ack(struct svm_cpudata *cpudata, uint64_t machgen) 1504 { 1505 struct vmcb *vmcb = cpudata->vmcb; 1506 1507 if (__predict_true(vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID)) { 1508 cpudata->vcpu_htlb_gen = machgen; 1509 cpudata->htlb_want_flush = false; 1510 } 1511 } 1512 1513 static inline void 1514 svm_exit_evt(struct svm_cpudata *cpudata, struct vmcb *vmcb) 1515 { 1516 cpudata->evt_pending = false; 1517 1518 if (__predict_false(vmcb->ctrl.exitintinfo & VMCB_CTRL_EXITINTINFO_V)) { 1519 vmcb->ctrl.eventinj = vmcb->ctrl.exitintinfo; 1520 cpudata->evt_pending = true; 1521 } 1522 } 1523 1524 static int 1525 svm_vcpu_run(struct nvmm_machine *mach, struct nvmm_cpu *vcpu, 1526 struct nvmm_vcpu_exit *exit) 1527 { 1528 struct nvmm_comm_page *comm = vcpu->comm; 1529 struct svm_cpudata *cpudata = vcpu->cpudata; 1530 struct vmcb *vmcb = cpudata->vmcb; 1531 uint64_t machgen; 1532 int hcpu; 1533 int error = 0; 1534 1535 svm_vcpu_state_commit(vcpu); 1536 comm->state_cached = 0; 1537 1538 #ifndef __DragonFly__ 1539 if (__predict_false(svm_vcpu_event_commit(vcpu) != 0)) { 1540 return EINVAL; 1541 } 1542 #endif 1543 1544 os_preempt_disable(); 1545 hcpu = os_curcpu_number(); 1546 1547 svm_gtlb_catchup(vcpu, hcpu); 1548 svm_htlb_catchup(vcpu, hcpu); 1549 1550 if (vcpu->hcpu_last != hcpu) { 1551 svm_vmcb_cache_flush_all(vmcb); 1552 cpudata->gtsc_want_update = true; 1553 1554 #ifdef __DragonFly__ 1555 /* 1556 * XXX: We aren't tracking overloaded CPUs (multiple vCPUs 1557 * scheduled on the same physical CPU) yet so there are 1558 * currently no calls to pmap_del_cpu(). 1559 */ 1560 pmap_add_cpu(mach->vm, hcpu); 1561 #endif 1562 } 1563 1564 svm_vcpu_guest_dbregs_enter(vcpu); 1565 svm_vcpu_guest_misc_enter(vcpu); 1566 1567 while (1) { 1568 if (__predict_false(cpudata->gtlb_want_flush || 1569 cpudata->htlb_want_flush)) 1570 { 1571 vmcb->ctrl.tlb_ctrl = svm_ctrl_tlb_flush; 1572 } else { 1573 vmcb->ctrl.tlb_ctrl = 0; 1574 } 1575 1576 if (__predict_false(cpudata->gtsc_want_update)) { 1577 vmcb->ctrl.tsc_offset = cpudata->gtsc_offset; 1578 svm_vmcb_cache_flush(vmcb, VMCB_CTRL_VMCB_CLEAN_I); 1579 } 1580 1581 svm_clgi(); 1582 svm_vcpu_guest_fpu_enter(vcpu); 1583 machgen = svm_htlb_flush(mach, cpudata); 1584 1585 #ifdef __DragonFly__ 1586 /* 1587 * Check for pending host events (e.g., interrupt, AST) 1588 * to make the state safe to VM Entry. This check must 1589 * be done after the clgi to avoid gd_reqflags pending 1590 * races. 1591 * 1592 * Emulators may assume that event injection succeeds, but 1593 * we have to return to process these events. To deal with 1594 * this, use ERESTART mechanics. 1595 */ 1596 if (__predict_false(mycpu->gd_reqflags & RQF_HVM_MASK)) { 1597 /* No hTLB flush ack, because it's not executed. */ 1598 svm_vcpu_guest_fpu_leave(vcpu); 1599 svm_stgi(); 1600 exit->reason = NVMM_VCPU_EXIT_NONE; 1601 error = ERESTART; 1602 break; 1603 } 1604 1605 /* 1606 * Don't try to inject an event until we are absolutely 1607 * sure that the vmrun will be executed, otherwise we 1608 * might overwrite/miss an event. 1609 */ 1610 if (__predict_false(svm_vcpu_event_commit(vcpu) != 0)) { 1611 /* No hTLB flush ack, because it's not executed. */ 1612 svm_vcpu_guest_fpu_leave(vcpu); 1613 svm_stgi(); 1614 exit->reason = NVMM_VCPU_EXIT_NONE; 1615 error = EINVAL; 1616 break; 1617 } 1618 #endif 1619 1620 svm_vmrun(cpudata->vmcb_pa, cpudata->gprs); 1621 svm_htlb_flush_ack(cpudata, machgen); 1622 svm_vcpu_guest_fpu_leave(vcpu); 1623 svm_stgi(); 1624 1625 svm_vmcb_cache_default(vmcb); 1626 1627 if (vmcb->ctrl.exitcode != VMCB_EXITCODE_INVALID) { 1628 cpudata->gtlb_want_flush = false; 1629 cpudata->gtsc_want_update = false; 1630 vcpu->hcpu_last = hcpu; 1631 } 1632 svm_exit_evt(cpudata, vmcb); 1633 1634 switch (vmcb->ctrl.exitcode) { 1635 case VMCB_EXITCODE_INTR: 1636 case VMCB_EXITCODE_NMI: 1637 exit->reason = NVMM_VCPU_EXIT_NONE; 1638 break; 1639 case VMCB_EXITCODE_VINTR: 1640 svm_event_waitexit_disable(vcpu, false); 1641 exit->reason = NVMM_VCPU_EXIT_INT_READY; 1642 break; 1643 case VMCB_EXITCODE_IRET: 1644 svm_event_waitexit_disable(vcpu, true); 1645 exit->reason = NVMM_VCPU_EXIT_NMI_READY; 1646 break; 1647 case VMCB_EXITCODE_CPUID: 1648 svm_exit_cpuid(mach, vcpu, exit); 1649 break; 1650 case VMCB_EXITCODE_HLT: 1651 svm_exit_hlt(mach, vcpu, exit); 1652 break; 1653 case VMCB_EXITCODE_IOIO: 1654 svm_exit_io(mach, vcpu, exit); 1655 break; 1656 case VMCB_EXITCODE_MSR: 1657 svm_exit_msr(mach, vcpu, exit); 1658 break; 1659 case VMCB_EXITCODE_SHUTDOWN: 1660 exit->reason = NVMM_VCPU_EXIT_SHUTDOWN; 1661 break; 1662 case VMCB_EXITCODE_RDPMC: 1663 case VMCB_EXITCODE_RSM: 1664 case VMCB_EXITCODE_INVLPGA: 1665 case VMCB_EXITCODE_VMRUN: 1666 case VMCB_EXITCODE_VMMCALL: 1667 case VMCB_EXITCODE_VMLOAD: 1668 case VMCB_EXITCODE_VMSAVE: 1669 case VMCB_EXITCODE_STGI: 1670 case VMCB_EXITCODE_CLGI: 1671 case VMCB_EXITCODE_SKINIT: 1672 case VMCB_EXITCODE_RDTSCP: 1673 case VMCB_EXITCODE_RDPRU: 1674 case VMCB_EXITCODE_INVLPGB: 1675 case VMCB_EXITCODE_INVPCID: 1676 case VMCB_EXITCODE_MCOMMIT: 1677 case VMCB_EXITCODE_TLBSYNC: 1678 svm_inject_ud(vcpu); 1679 exit->reason = NVMM_VCPU_EXIT_NONE; 1680 break; 1681 case VMCB_EXITCODE_MONITOR: 1682 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MONITOR); 1683 break; 1684 case VMCB_EXITCODE_MWAIT: 1685 case VMCB_EXITCODE_MWAIT_CONDITIONAL: 1686 svm_exit_insn(vmcb, exit, NVMM_VCPU_EXIT_MWAIT); 1687 break; 1688 case VMCB_EXITCODE_XSETBV: 1689 svm_exit_xsetbv(mach, vcpu, exit); 1690 break; 1691 case VMCB_EXITCODE_NPF: 1692 svm_exit_npf(mach, vcpu, exit); 1693 break; 1694 case VMCB_EXITCODE_FERR_FREEZE: /* ? */ 1695 default: 1696 svm_exit_invalid(exit, vmcb->ctrl.exitcode); 1697 break; 1698 } 1699 1700 /* If no reason to return to userland, keep rolling. */ 1701 if (os_return_needed()) { 1702 break; 1703 } 1704 if (exit->reason != NVMM_VCPU_EXIT_NONE) { 1705 break; 1706 } 1707 } 1708 1709 svm_vcpu_guest_misc_leave(vcpu); 1710 svm_vcpu_guest_dbregs_leave(vcpu); 1711 1712 os_preempt_enable(); 1713 1714 exit->exitstate.rflags = vmcb->state.rflags; 1715 exit->exitstate.cr8 = __SHIFTOUT(vmcb->ctrl.v, VMCB_CTRL_V_TPR); 1716 exit->exitstate.int_shadow = 1717 ((vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0); 1718 exit->exitstate.int_window_exiting = cpudata->int_window_exit; 1719 exit->exitstate.nmi_window_exiting = cpudata->nmi_window_exit; 1720 exit->exitstate.evt_pending = cpudata->evt_pending; 1721 1722 return error; 1723 } 1724 1725 /* -------------------------------------------------------------------------- */ 1726 1727 #define SVM_MSRBM_READ __BIT(0) 1728 #define SVM_MSRBM_WRITE __BIT(1) 1729 1730 static void 1731 svm_vcpu_msr_allow(uint8_t *bitmap, uint64_t msr, bool read, bool write) 1732 { 1733 uint64_t byte; 1734 uint8_t bitoff; 1735 1736 if (msr < 0x00002000) { 1737 /* Range 1 */ 1738 byte = ((msr - 0x00000000) >> 2UL) + 0x0000; 1739 } else if (msr >= 0xC0000000 && msr < 0xC0002000) { 1740 /* Range 2 */ 1741 byte = ((msr - 0xC0000000) >> 2UL) + 0x0800; 1742 } else if (msr >= 0xC0010000 && msr < 0xC0012000) { 1743 /* Range 3 */ 1744 byte = ((msr - 0xC0010000) >> 2UL) + 0x1000; 1745 } else { 1746 panic("%s: wrong range", __func__); 1747 } 1748 1749 bitoff = (msr & 0x3) << 1; 1750 1751 if (read) { 1752 bitmap[byte] &= ~(SVM_MSRBM_READ << bitoff); 1753 } 1754 if (write) { 1755 bitmap[byte] &= ~(SVM_MSRBM_WRITE << bitoff); 1756 } 1757 } 1758 1759 #define SVM_SEG_ATTRIB_TYPE __BITS(3,0) 1760 #define SVM_SEG_ATTRIB_S __BIT(4) 1761 #define SVM_SEG_ATTRIB_DPL __BITS(6,5) 1762 #define SVM_SEG_ATTRIB_P __BIT(7) 1763 #define SVM_SEG_ATTRIB_AVL __BIT(8) 1764 #define SVM_SEG_ATTRIB_L __BIT(9) 1765 #define SVM_SEG_ATTRIB_DEF __BIT(10) 1766 #define SVM_SEG_ATTRIB_G __BIT(11) 1767 1768 static void 1769 svm_vcpu_setstate_seg(const struct nvmm_x64_state_seg *seg, 1770 struct vmcb_segment *vseg) 1771 { 1772 vseg->selector = seg->selector; 1773 vseg->attrib = 1774 __SHIFTIN(seg->attrib.type, SVM_SEG_ATTRIB_TYPE) | 1775 __SHIFTIN(seg->attrib.s, SVM_SEG_ATTRIB_S) | 1776 __SHIFTIN(seg->attrib.dpl, SVM_SEG_ATTRIB_DPL) | 1777 __SHIFTIN(seg->attrib.p, SVM_SEG_ATTRIB_P) | 1778 __SHIFTIN(seg->attrib.avl, SVM_SEG_ATTRIB_AVL) | 1779 __SHIFTIN(seg->attrib.l, SVM_SEG_ATTRIB_L) | 1780 __SHIFTIN(seg->attrib.def, SVM_SEG_ATTRIB_DEF) | 1781 __SHIFTIN(seg->attrib.g, SVM_SEG_ATTRIB_G); 1782 vseg->limit = seg->limit; 1783 vseg->base = seg->base; 1784 } 1785 1786 static void 1787 svm_vcpu_getstate_seg(struct nvmm_x64_state_seg *seg, struct vmcb_segment *vseg) 1788 { 1789 seg->selector = vseg->selector; 1790 seg->attrib.type = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_TYPE); 1791 seg->attrib.s = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_S); 1792 seg->attrib.dpl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DPL); 1793 seg->attrib.p = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_P); 1794 seg->attrib.avl = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_AVL); 1795 seg->attrib.l = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_L); 1796 seg->attrib.def = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_DEF); 1797 seg->attrib.g = __SHIFTOUT(vseg->attrib, SVM_SEG_ATTRIB_G); 1798 seg->limit = vseg->limit; 1799 seg->base = vseg->base; 1800 } 1801 1802 static inline bool 1803 svm_state_gtlb_flush(const struct vmcb *vmcb, const struct nvmm_x64_state *state, 1804 uint64_t flags) 1805 { 1806 if (flags & NVMM_X64_STATE_CRS) { 1807 if ((vmcb->state.cr0 ^ 1808 state->crs[NVMM_X64_CR_CR0]) & CR0_TLB_FLUSH) { 1809 return true; 1810 } 1811 if (vmcb->state.cr3 != state->crs[NVMM_X64_CR_CR3]) { 1812 return true; 1813 } 1814 if ((vmcb->state.cr4 ^ 1815 state->crs[NVMM_X64_CR_CR4]) & CR4_TLB_FLUSH) { 1816 return true; 1817 } 1818 } 1819 1820 if (flags & NVMM_X64_STATE_MSRS) { 1821 if ((vmcb->state.efer ^ 1822 state->msrs[NVMM_X64_MSR_EFER]) & EFER_TLB_FLUSH) { 1823 return true; 1824 } 1825 } 1826 1827 return false; 1828 } 1829 1830 static void 1831 svm_vcpu_setstate(struct nvmm_cpu *vcpu) 1832 { 1833 struct nvmm_comm_page *comm = vcpu->comm; 1834 const struct nvmm_x64_state *state = &comm->state; 1835 struct svm_cpudata *cpudata = vcpu->cpudata; 1836 struct vmcb *vmcb = cpudata->vmcb; 1837 struct nvmm_x64_state_fpu *fpustate; 1838 uint64_t flags; 1839 1840 flags = comm->state_wanted; 1841 1842 if (svm_state_gtlb_flush(vmcb, state, flags)) { 1843 cpudata->gtlb_want_flush = true; 1844 } 1845 1846 if (flags & NVMM_X64_STATE_SEGS) { 1847 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_CS], 1848 &vmcb->state.cs); 1849 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_DS], 1850 &vmcb->state.ds); 1851 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_ES], 1852 &vmcb->state.es); 1853 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_FS], 1854 &vmcb->state.fs); 1855 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GS], 1856 &vmcb->state.gs); 1857 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_SS], 1858 &vmcb->state.ss); 1859 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_GDT], 1860 &vmcb->state.gdt); 1861 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_IDT], 1862 &vmcb->state.idt); 1863 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_LDT], 1864 &vmcb->state.ldt); 1865 svm_vcpu_setstate_seg(&state->segs[NVMM_X64_SEG_TR], 1866 &vmcb->state.tr); 1867 1868 vmcb->state.cpl = state->segs[NVMM_X64_SEG_SS].attrib.dpl; 1869 } 1870 1871 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 1872 if (flags & NVMM_X64_STATE_GPRS) { 1873 memcpy(cpudata->gprs, state->gprs, sizeof(state->gprs)); 1874 1875 vmcb->state.rip = state->gprs[NVMM_X64_GPR_RIP]; 1876 vmcb->state.rsp = state->gprs[NVMM_X64_GPR_RSP]; 1877 vmcb->state.rax = state->gprs[NVMM_X64_GPR_RAX]; 1878 vmcb->state.rflags = state->gprs[NVMM_X64_GPR_RFLAGS]; 1879 } 1880 1881 if (flags & NVMM_X64_STATE_CRS) { 1882 vmcb->state.cr0 = state->crs[NVMM_X64_CR_CR0]; 1883 vmcb->state.cr2 = state->crs[NVMM_X64_CR_CR2]; 1884 vmcb->state.cr3 = state->crs[NVMM_X64_CR_CR3]; 1885 vmcb->state.cr4 = state->crs[NVMM_X64_CR_CR4]; 1886 1887 vmcb->ctrl.v &= ~VMCB_CTRL_V_TPR; 1888 vmcb->ctrl.v |= __SHIFTIN(state->crs[NVMM_X64_CR_CR8], 1889 VMCB_CTRL_V_TPR); 1890 1891 if (svm_xcr0_mask != 0) { 1892 /* Clear illegal XCR0 bits, set mandatory X87 bit. */ 1893 cpudata->gxcr0 = state->crs[NVMM_X64_CR_XCR0]; 1894 cpudata->gxcr0 &= svm_xcr0_mask; 1895 cpudata->gxcr0 |= XCR0_X87; 1896 } 1897 } 1898 1899 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 1900 if (flags & NVMM_X64_STATE_DRS) { 1901 memcpy(cpudata->drs, state->drs, sizeof(state->drs)); 1902 1903 vmcb->state.dr6 = state->drs[NVMM_X64_DR_DR6]; 1904 vmcb->state.dr7 = state->drs[NVMM_X64_DR_DR7]; 1905 } 1906 1907 if (flags & NVMM_X64_STATE_MSRS) { 1908 /* 1909 * EFER_SVME is mandatory. 1910 */ 1911 vmcb->state.efer = state->msrs[NVMM_X64_MSR_EFER] | EFER_SVME; 1912 vmcb->state.star = state->msrs[NVMM_X64_MSR_STAR]; 1913 vmcb->state.lstar = state->msrs[NVMM_X64_MSR_LSTAR]; 1914 vmcb->state.cstar = state->msrs[NVMM_X64_MSR_CSTAR]; 1915 vmcb->state.sfmask = state->msrs[NVMM_X64_MSR_SFMASK]; 1916 vmcb->state.kernelgsbase = 1917 state->msrs[NVMM_X64_MSR_KERNELGSBASE]; 1918 vmcb->state.sysenter_cs = 1919 state->msrs[NVMM_X64_MSR_SYSENTER_CS]; 1920 vmcb->state.sysenter_esp = 1921 state->msrs[NVMM_X64_MSR_SYSENTER_ESP]; 1922 vmcb->state.sysenter_eip = 1923 state->msrs[NVMM_X64_MSR_SYSENTER_EIP]; 1924 vmcb->state.g_pat = state->msrs[NVMM_X64_MSR_PAT]; 1925 1926 /* 1927 * The emulator might NOT want to set the TSC, because doing 1928 * so would destroy TSC MP-synchronization across CPUs. Try 1929 * to figure out what the emulator meant to do. 1930 * 1931 * If writing the last TSC value we reported via getstate or 1932 * a zero value, assume that the emulator does not want to 1933 * write to the TSC. 1934 */ 1935 if (state->msrs[NVMM_X64_MSR_TSC] != cpudata->gtsc_match && 1936 state->msrs[NVMM_X64_MSR_TSC] != 0) { 1937 cpudata->gtsc_offset = 1938 state->msrs[NVMM_X64_MSR_TSC] - rdtsc(); 1939 cpudata->gtsc_want_update = true; 1940 } 1941 } 1942 1943 if (flags & NVMM_X64_STATE_INTR) { 1944 if (state->intr.int_shadow) { 1945 vmcb->ctrl.intr |= VMCB_CTRL_INTR_SHADOW; 1946 } else { 1947 vmcb->ctrl.intr &= ~VMCB_CTRL_INTR_SHADOW; 1948 } 1949 1950 if (state->intr.int_window_exiting) { 1951 svm_event_waitexit_enable(vcpu, false); 1952 } else { 1953 svm_event_waitexit_disable(vcpu, false); 1954 } 1955 1956 if (state->intr.nmi_window_exiting) { 1957 svm_event_waitexit_enable(vcpu, true); 1958 } else { 1959 svm_event_waitexit_disable(vcpu, true); 1960 } 1961 } 1962 1963 CTASSERT(sizeof(cpudata->gxsave.fpu) == sizeof(state->fpu)); 1964 if (flags & NVMM_X64_STATE_FPU) { 1965 memcpy(&cpudata->gxsave.fpu, &state->fpu, sizeof(state->fpu)); 1966 1967 fpustate = (struct nvmm_x64_state_fpu *)&cpudata->gxsave.fpu; 1968 fpustate->fx_mxcsr_mask &= x86_fpu_mxcsr_mask; 1969 fpustate->fx_mxcsr &= fpustate->fx_mxcsr_mask; 1970 1971 if (svm_xcr0_mask != 0) { 1972 /* Reset XSTATE_BV, to force a reload. */ 1973 cpudata->gxsave.xstate_bv = svm_xcr0_mask; 1974 } 1975 } 1976 1977 svm_vmcb_cache_update(vmcb, flags); 1978 1979 comm->state_wanted = 0; 1980 comm->state_cached |= flags; 1981 } 1982 1983 static void 1984 svm_vcpu_getstate(struct nvmm_cpu *vcpu) 1985 { 1986 struct nvmm_comm_page *comm = vcpu->comm; 1987 struct nvmm_x64_state *state = &comm->state; 1988 struct svm_cpudata *cpudata = vcpu->cpudata; 1989 struct vmcb *vmcb = cpudata->vmcb; 1990 uint64_t flags; 1991 1992 flags = comm->state_wanted; 1993 1994 if (flags & NVMM_X64_STATE_SEGS) { 1995 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_CS], 1996 &vmcb->state.cs); 1997 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_DS], 1998 &vmcb->state.ds); 1999 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_ES], 2000 &vmcb->state.es); 2001 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_FS], 2002 &vmcb->state.fs); 2003 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GS], 2004 &vmcb->state.gs); 2005 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_SS], 2006 &vmcb->state.ss); 2007 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_GDT], 2008 &vmcb->state.gdt); 2009 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_IDT], 2010 &vmcb->state.idt); 2011 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_LDT], 2012 &vmcb->state.ldt); 2013 svm_vcpu_getstate_seg(&state->segs[NVMM_X64_SEG_TR], 2014 &vmcb->state.tr); 2015 2016 state->segs[NVMM_X64_SEG_SS].attrib.dpl = vmcb->state.cpl; 2017 } 2018 2019 CTASSERT(sizeof(cpudata->gprs) == sizeof(state->gprs)); 2020 if (flags & NVMM_X64_STATE_GPRS) { 2021 memcpy(state->gprs, cpudata->gprs, sizeof(state->gprs)); 2022 2023 state->gprs[NVMM_X64_GPR_RIP] = vmcb->state.rip; 2024 state->gprs[NVMM_X64_GPR_RSP] = vmcb->state.rsp; 2025 state->gprs[NVMM_X64_GPR_RAX] = vmcb->state.rax; 2026 state->gprs[NVMM_X64_GPR_RFLAGS] = vmcb->state.rflags; 2027 } 2028 2029 if (flags & NVMM_X64_STATE_CRS) { 2030 state->crs[NVMM_X64_CR_CR0] = vmcb->state.cr0; 2031 state->crs[NVMM_X64_CR_CR2] = vmcb->state.cr2; 2032 state->crs[NVMM_X64_CR_CR3] = vmcb->state.cr3; 2033 state->crs[NVMM_X64_CR_CR4] = vmcb->state.cr4; 2034 state->crs[NVMM_X64_CR_CR8] = __SHIFTOUT(vmcb->ctrl.v, 2035 VMCB_CTRL_V_TPR); 2036 state->crs[NVMM_X64_CR_XCR0] = cpudata->gxcr0; 2037 } 2038 2039 CTASSERT(sizeof(cpudata->drs) == sizeof(state->drs)); 2040 if (flags & NVMM_X64_STATE_DRS) { 2041 memcpy(state->drs, cpudata->drs, sizeof(state->drs)); 2042 2043 state->drs[NVMM_X64_DR_DR6] = vmcb->state.dr6; 2044 state->drs[NVMM_X64_DR_DR7] = vmcb->state.dr7; 2045 } 2046 2047 if (flags & NVMM_X64_STATE_MSRS) { 2048 state->msrs[NVMM_X64_MSR_EFER] = vmcb->state.efer; 2049 state->msrs[NVMM_X64_MSR_STAR] = vmcb->state.star; 2050 state->msrs[NVMM_X64_MSR_LSTAR] = vmcb->state.lstar; 2051 state->msrs[NVMM_X64_MSR_CSTAR] = vmcb->state.cstar; 2052 state->msrs[NVMM_X64_MSR_SFMASK] = vmcb->state.sfmask; 2053 state->msrs[NVMM_X64_MSR_KERNELGSBASE] = 2054 vmcb->state.kernelgsbase; 2055 state->msrs[NVMM_X64_MSR_SYSENTER_CS] = 2056 vmcb->state.sysenter_cs; 2057 state->msrs[NVMM_X64_MSR_SYSENTER_ESP] = 2058 vmcb->state.sysenter_esp; 2059 state->msrs[NVMM_X64_MSR_SYSENTER_EIP] = 2060 vmcb->state.sysenter_eip; 2061 state->msrs[NVMM_X64_MSR_PAT] = vmcb->state.g_pat; 2062 state->msrs[NVMM_X64_MSR_TSC] = rdtsc() + cpudata->gtsc_offset; 2063 2064 /* Hide SVME. */ 2065 state->msrs[NVMM_X64_MSR_EFER] &= ~EFER_SVME; 2066 2067 /* Save reported TSC value for later setstate check. */ 2068 cpudata->gtsc_match = state->msrs[NVMM_X64_MSR_TSC]; 2069 } 2070 2071 if (flags & NVMM_X64_STATE_INTR) { 2072 state->intr.int_shadow = 2073 (vmcb->ctrl.intr & VMCB_CTRL_INTR_SHADOW) != 0; 2074 state->intr.int_window_exiting = cpudata->int_window_exit; 2075 state->intr.nmi_window_exiting = cpudata->nmi_window_exit; 2076 state->intr.evt_pending = cpudata->evt_pending; 2077 } 2078 2079 CTASSERT(sizeof(cpudata->gxsave.fpu) == sizeof(state->fpu)); 2080 if (flags & NVMM_X64_STATE_FPU) { 2081 memcpy(&state->fpu, &cpudata->gxsave.fpu, sizeof(state->fpu)); 2082 } 2083 2084 comm->state_wanted = 0; 2085 comm->state_cached |= flags; 2086 } 2087 2088 static void 2089 svm_vcpu_state_provide(struct nvmm_cpu *vcpu, uint64_t flags) 2090 { 2091 vcpu->comm->state_wanted = flags; 2092 svm_vcpu_getstate(vcpu); 2093 } 2094 2095 static void 2096 svm_vcpu_state_commit(struct nvmm_cpu *vcpu) 2097 { 2098 vcpu->comm->state_wanted = vcpu->comm->state_commit; 2099 vcpu->comm->state_commit = 0; 2100 svm_vcpu_setstate(vcpu); 2101 } 2102 2103 /* -------------------------------------------------------------------------- */ 2104 2105 static void 2106 svm_asid_alloc(struct nvmm_cpu *vcpu) 2107 { 2108 struct svm_cpudata *cpudata = vcpu->cpudata; 2109 struct vmcb *vmcb = cpudata->vmcb; 2110 size_t i, oct, bit; 2111 2112 os_mtx_lock(&svm_asidlock); 2113 2114 for (i = 0; i < svm_maxasid; i++) { 2115 oct = i / 8; 2116 bit = i % 8; 2117 2118 if (svm_asidmap[oct] & __BIT(bit)) { 2119 continue; 2120 } 2121 2122 svm_asidmap[oct] |= __BIT(bit); 2123 vmcb->ctrl.guest_asid = i; 2124 os_mtx_unlock(&svm_asidlock); 2125 return; 2126 } 2127 2128 /* 2129 * No free ASID. Use the last one, which is shared and requires 2130 * special TLB handling. 2131 */ 2132 cpudata->shared_asid = true; 2133 vmcb->ctrl.guest_asid = svm_maxasid - 1; 2134 os_mtx_unlock(&svm_asidlock); 2135 } 2136 2137 static void 2138 svm_asid_free(struct nvmm_cpu *vcpu) 2139 { 2140 struct svm_cpudata *cpudata = vcpu->cpudata; 2141 struct vmcb *vmcb = cpudata->vmcb; 2142 size_t oct, bit; 2143 2144 if (cpudata->shared_asid) { 2145 return; 2146 } 2147 2148 oct = vmcb->ctrl.guest_asid / 8; 2149 bit = vmcb->ctrl.guest_asid % 8; 2150 2151 os_mtx_lock(&svm_asidlock); 2152 svm_asidmap[oct] &= ~__BIT(bit); 2153 os_mtx_unlock(&svm_asidlock); 2154 } 2155 2156 static void 2157 svm_vcpu_init(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2158 { 2159 struct svm_cpudata *cpudata = vcpu->cpudata; 2160 struct vmcb *vmcb = cpudata->vmcb; 2161 2162 /* Allow reads/writes of Control Registers. */ 2163 vmcb->ctrl.intercept_cr = 0; 2164 2165 /* Allow reads/writes of Debug Registers. */ 2166 vmcb->ctrl.intercept_dr = 0; 2167 2168 /* Allow exceptions 0 to 31. */ 2169 vmcb->ctrl.intercept_vec = 0; 2170 2171 /* 2172 * Allow: 2173 * - SMI [smm interrupts] 2174 * - VINTR [virtual interrupts] 2175 * - CR0_SPEC [CR0 writes changing other fields than CR0.TS or CR0.MP] 2176 * - RIDTR [reads of IDTR] 2177 * - RGDTR [reads of GDTR] 2178 * - RLDTR [reads of LDTR] 2179 * - RTR [reads of TR] 2180 * - WIDTR [writes of IDTR] 2181 * - WGDTR [writes of GDTR] 2182 * - WLDTR [writes of LDTR] 2183 * - WTR [writes of TR] 2184 * - RDTSC [rdtsc instruction] 2185 * - PUSHF [pushf instruction] 2186 * - POPF [popf instruction] 2187 * - IRET [iret instruction] 2188 * - INTN [int $n instructions] 2189 * - PAUSE [pause instruction] 2190 * - INVLPG [invplg instruction] 2191 * - TASKSW [task switches] 2192 * 2193 * Intercept the rest below. 2194 */ 2195 vmcb->ctrl.intercept_misc1 = 2196 VMCB_CTRL_INTERCEPT_INTR | 2197 VMCB_CTRL_INTERCEPT_NMI | 2198 VMCB_CTRL_INTERCEPT_INIT | 2199 VMCB_CTRL_INTERCEPT_RDPMC | 2200 VMCB_CTRL_INTERCEPT_CPUID | 2201 VMCB_CTRL_INTERCEPT_RSM | 2202 VMCB_CTRL_INTERCEPT_INVD | 2203 VMCB_CTRL_INTERCEPT_HLT | 2204 VMCB_CTRL_INTERCEPT_INVLPGA | 2205 VMCB_CTRL_INTERCEPT_IOIO_PROT | 2206 VMCB_CTRL_INTERCEPT_MSR_PROT | 2207 VMCB_CTRL_INTERCEPT_FERR_FREEZE | 2208 VMCB_CTRL_INTERCEPT_SHUTDOWN; 2209 2210 /* 2211 * Allow: 2212 * - ICEBP [icebp instruction] 2213 * - WBINVD [wbinvd instruction] 2214 * - WCR_SPEC(0..15) [writes of CR0-15, received after instruction] 2215 * 2216 * Intercept the rest below. 2217 */ 2218 vmcb->ctrl.intercept_misc2 = 2219 VMCB_CTRL_INTERCEPT_VMRUN | 2220 VMCB_CTRL_INTERCEPT_VMMCALL | 2221 VMCB_CTRL_INTERCEPT_VMLOAD | 2222 VMCB_CTRL_INTERCEPT_VMSAVE | 2223 VMCB_CTRL_INTERCEPT_STGI | 2224 VMCB_CTRL_INTERCEPT_CLGI | 2225 VMCB_CTRL_INTERCEPT_SKINIT | 2226 VMCB_CTRL_INTERCEPT_RDTSCP | 2227 VMCB_CTRL_INTERCEPT_MONITOR | 2228 VMCB_CTRL_INTERCEPT_MWAIT | 2229 VMCB_CTRL_INTERCEPT_XSETBV | 2230 VMCB_CTRL_INTERCEPT_RDPRU; 2231 2232 /* 2233 * Intercept everything. 2234 */ 2235 vmcb->ctrl.intercept_misc3 = 2236 VMCB_CTRL_INTERCEPT_INVLPGB_ALL | 2237 VMCB_CTRL_INTERCEPT_PCID | 2238 VMCB_CTRL_INTERCEPT_MCOMMIT | 2239 VMCB_CTRL_INTERCEPT_TLBSYNC; 2240 2241 /* Intercept all I/O accesses. */ 2242 memset(cpudata->iobm, 0xFF, IOBM_SIZE); 2243 vmcb->ctrl.iopm_base_pa = cpudata->iobm_pa; 2244 2245 /* Allow direct access to certain MSRs. */ 2246 memset(cpudata->msrbm, 0xFF, MSRBM_SIZE); 2247 svm_vcpu_msr_allow(cpudata->msrbm, MSR_STAR, true, true); 2248 svm_vcpu_msr_allow(cpudata->msrbm, MSR_LSTAR, true, true); 2249 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CSTAR, true, true); 2250 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SFMASK, true, true); 2251 svm_vcpu_msr_allow(cpudata->msrbm, MSR_KERNELGSBASE, true, true); 2252 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_CS, true, true); 2253 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_ESP, true, true); 2254 svm_vcpu_msr_allow(cpudata->msrbm, MSR_SYSENTER_EIP, true, true); 2255 svm_vcpu_msr_allow(cpudata->msrbm, MSR_FSBASE, true, true); 2256 svm_vcpu_msr_allow(cpudata->msrbm, MSR_GSBASE, true, true); 2257 svm_vcpu_msr_allow(cpudata->msrbm, MSR_CR_PAT, true, true); 2258 svm_vcpu_msr_allow(cpudata->msrbm, MSR_TSC, true, false); 2259 vmcb->ctrl.msrpm_base_pa = cpudata->msrbm_pa; 2260 2261 /* Generate ASID. */ 2262 svm_asid_alloc(vcpu); 2263 2264 /* Virtual TPR. */ 2265 vmcb->ctrl.v = VMCB_CTRL_V_INTR_MASKING; 2266 2267 /* Enable Nested Paging. */ 2268 vmcb->ctrl.enable1 = VMCB_CTRL_ENABLE_NP; 2269 vmcb->ctrl.n_cr3 = os_vmspace_pdirpa(mach->vm); 2270 2271 /* Init XSAVE header. */ 2272 cpudata->gxsave.xstate_bv = svm_xcr0_mask; 2273 cpudata->gxsave.xcomp_bv = 0; 2274 2275 /* Install the RESET state. */ 2276 memcpy(&vcpu->comm->state, &nvmm_x86_reset_state, 2277 sizeof(nvmm_x86_reset_state)); 2278 vcpu->comm->state_wanted = NVMM_X64_STATE_ALL; 2279 vcpu->comm->state_cached = 0; 2280 svm_vcpu_setstate(vcpu); 2281 } 2282 2283 static int 2284 svm_vcpu_create(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2285 { 2286 struct svm_cpudata *cpudata; 2287 int error; 2288 2289 /* Allocate the SVM cpudata. */ 2290 cpudata = (struct svm_cpudata *)os_pagemem_zalloc(sizeof(*cpudata)); 2291 if (cpudata == NULL) 2292 return ENOMEM; 2293 2294 vcpu->cpudata = cpudata; 2295 2296 /* VMCB */ 2297 error = os_contigpa_zalloc(&cpudata->vmcb_pa, 2298 (vaddr_t *)&cpudata->vmcb, VMCB_NPAGES); 2299 if (error) 2300 goto error; 2301 2302 /* I/O Bitmap */ 2303 error = os_contigpa_zalloc(&cpudata->iobm_pa, 2304 (vaddr_t *)&cpudata->iobm, IOBM_NPAGES); 2305 if (error) 2306 goto error; 2307 2308 /* MSR Bitmap */ 2309 error = os_contigpa_zalloc(&cpudata->msrbm_pa, 2310 (vaddr_t *)&cpudata->msrbm, MSRBM_NPAGES); 2311 if (error) 2312 goto error; 2313 2314 /* Init the VCPU info. */ 2315 svm_vcpu_init(mach, vcpu); 2316 2317 return 0; 2318 2319 error: 2320 if (cpudata->vmcb_pa) { 2321 os_contigpa_free(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2322 VMCB_NPAGES); 2323 } 2324 if (cpudata->iobm_pa) { 2325 os_contigpa_free(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2326 IOBM_NPAGES); 2327 } 2328 if (cpudata->msrbm_pa) { 2329 os_contigpa_free(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2330 MSRBM_NPAGES); 2331 } 2332 os_pagemem_free(cpudata, sizeof(*cpudata)); 2333 return error; 2334 } 2335 2336 static void 2337 svm_vcpu_destroy(struct nvmm_machine *mach, struct nvmm_cpu *vcpu) 2338 { 2339 struct svm_cpudata *cpudata = vcpu->cpudata; 2340 2341 svm_asid_free(vcpu); 2342 2343 os_contigpa_free(cpudata->vmcb_pa, (vaddr_t)cpudata->vmcb, 2344 VMCB_NPAGES); 2345 os_contigpa_free(cpudata->iobm_pa, (vaddr_t)cpudata->iobm, 2346 IOBM_NPAGES); 2347 os_contigpa_free(cpudata->msrbm_pa, (vaddr_t)cpudata->msrbm, 2348 MSRBM_NPAGES); 2349 2350 os_pagemem_free(cpudata, sizeof(*cpudata)); 2351 } 2352 2353 /* -------------------------------------------------------------------------- */ 2354 2355 static int 2356 svm_vcpu_configure_cpuid(struct svm_cpudata *cpudata, void *data) 2357 { 2358 struct nvmm_vcpu_conf_cpuid *cpuid = data; 2359 size_t i; 2360 2361 if (__predict_false(cpuid->mask && cpuid->exit)) { 2362 return EINVAL; 2363 } 2364 if (__predict_false(cpuid->mask && 2365 ((cpuid->u.mask.set.eax & cpuid->u.mask.del.eax) || 2366 (cpuid->u.mask.set.ebx & cpuid->u.mask.del.ebx) || 2367 (cpuid->u.mask.set.ecx & cpuid->u.mask.del.ecx) || 2368 (cpuid->u.mask.set.edx & cpuid->u.mask.del.edx)))) { 2369 return EINVAL; 2370 } 2371 2372 /* If unset, delete, to restore the default behavior. */ 2373 if (!cpuid->mask && !cpuid->exit) { 2374 for (i = 0; i < SVM_NCPUIDS; i++) { 2375 if (!cpudata->cpuidpresent[i]) { 2376 continue; 2377 } 2378 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2379 cpudata->cpuidpresent[i] = false; 2380 } 2381 } 2382 return 0; 2383 } 2384 2385 /* If already here, replace. */ 2386 for (i = 0; i < SVM_NCPUIDS; i++) { 2387 if (!cpudata->cpuidpresent[i]) { 2388 continue; 2389 } 2390 if (cpudata->cpuid[i].leaf == cpuid->leaf) { 2391 memcpy(&cpudata->cpuid[i], cpuid, 2392 sizeof(struct nvmm_vcpu_conf_cpuid)); 2393 return 0; 2394 } 2395 } 2396 2397 /* Not here, insert. */ 2398 for (i = 0; i < SVM_NCPUIDS; i++) { 2399 if (!cpudata->cpuidpresent[i]) { 2400 cpudata->cpuidpresent[i] = true; 2401 memcpy(&cpudata->cpuid[i], cpuid, 2402 sizeof(struct nvmm_vcpu_conf_cpuid)); 2403 return 0; 2404 } 2405 } 2406 2407 return ENOBUFS; 2408 } 2409 2410 static int 2411 svm_vcpu_configure(struct nvmm_cpu *vcpu, uint64_t op, void *data) 2412 { 2413 struct svm_cpudata *cpudata = vcpu->cpudata; 2414 2415 switch (op) { 2416 case NVMM_VCPU_CONF_MD(NVMM_VCPU_CONF_CPUID): 2417 return svm_vcpu_configure_cpuid(cpudata, data); 2418 default: 2419 return EINVAL; 2420 } 2421 } 2422 2423 /* -------------------------------------------------------------------------- */ 2424 2425 #ifdef __NetBSD__ 2426 static void 2427 svm_tlb_flush(struct pmap *pm) 2428 { 2429 struct nvmm_machine *mach = os_pmap_mach(pm); 2430 struct svm_machdata *machdata = mach->machdata; 2431 2432 os_atomic_inc_64(&machdata->mach_htlb_gen); 2433 2434 /* 2435 * Send a dummy IPI to each CPU. The IPIs cause #VMEXITs. Afterwards the 2436 * VCPU loops will see that their 'vcpu_htlb_gen' is out of sync, and 2437 * will each flush their own TLB. 2438 */ 2439 os_ipi_kickall(); 2440 } 2441 #endif 2442 2443 static void 2444 svm_machine_create(struct nvmm_machine *mach) 2445 { 2446 struct pmap *pmap = os_vmspace_pmap(mach->vm); 2447 struct svm_machdata *machdata; 2448 2449 /* Transform pmap. */ 2450 #if defined(__NetBSD__) 2451 os_pmap_mach(pmap) = (void *)mach; 2452 pmap->pm_tlb_flush = svm_tlb_flush; 2453 #elif defined(__DragonFly__) 2454 pmap_npt_transform(pmap, 0); 2455 #endif 2456 2457 machdata = os_mem_zalloc(sizeof(struct svm_machdata)); 2458 mach->machdata = machdata; 2459 2460 /* Start with an hTLB flush everywhere. */ 2461 machdata->mach_htlb_gen = 1; 2462 } 2463 2464 static void 2465 svm_machine_destroy(struct nvmm_machine *mach) 2466 { 2467 os_mem_free(mach->machdata, sizeof(struct svm_machdata)); 2468 } 2469 2470 static int 2471 svm_machine_configure(struct nvmm_machine *mach, uint64_t op, void *data) 2472 { 2473 panic("%s: impossible", __func__); 2474 } 2475 2476 /* -------------------------------------------------------------------------- */ 2477 2478 static bool 2479 svm_ident(void) 2480 { 2481 cpuid_desc_t descs; 2482 uint64_t msr; 2483 2484 /* Must be AMD CPU. */ 2485 x86_get_cpuid(0x00000000, &descs); 2486 if (memcmp(&descs.ebx, "Auth", 4) || 2487 memcmp(&descs.edx, "enti", 4) || 2488 memcmp(&descs.ecx, "cAMD", 4)) { 2489 return false; 2490 } 2491 2492 /* Want leaf Fn8000_000A. */ 2493 x86_get_cpuid(0x80000000, &descs); 2494 if (descs.eax < 0x8000000a) { 2495 os_printf("nvmm: CPUID leaf not available\n"); 2496 return false; 2497 } 2498 2499 /* Want SVM support. */ 2500 x86_get_cpuid(0x80000001, &descs); 2501 if (!(descs.ecx & CPUID_8_01_ECX_SVM)) { 2502 os_printf("nvmm: SVM not supported\n"); 2503 return false; 2504 } 2505 2506 /* Want SVM revision 1. */ 2507 x86_get_cpuid(0x8000000a, &descs); 2508 if (__SHIFTOUT(descs.eax, CPUID_8_0A_EAX_SvmRev) != 1) { 2509 os_printf("nvmm: SVM revision not supported\n"); 2510 return false; 2511 } 2512 2513 /* Want Nested Paging. */ 2514 if (!(descs.edx & CPUID_8_0A_EDX_NP)) { 2515 os_printf("nvmm: SVM-NP not supported\n"); 2516 return false; 2517 } 2518 2519 /* Want nRIP. */ 2520 if (!(descs.edx & CPUID_8_0A_EDX_NRIPS)) { 2521 os_printf("nvmm: SVM-NRIPS not supported\n"); 2522 return false; 2523 } 2524 2525 svm_decode_assist = (descs.edx & CPUID_8_0A_EDX_DecodeAssists) != 0; 2526 2527 msr = rdmsr(MSR_VM_CR); 2528 if ((msr & VM_CR_SVMED) && (msr & VM_CR_LOCK)) { 2529 os_printf("nvmm: SVM disabled in BIOS\n"); 2530 return false; 2531 } 2532 2533 return true; 2534 } 2535 2536 static void 2537 svm_init_asid(uint32_t maxasid) 2538 { 2539 size_t i, j, allocsz; 2540 2541 os_mtx_init(&svm_asidlock); 2542 2543 /* Arbitrarily limit. */ 2544 maxasid = uimin(maxasid, 8192); 2545 2546 svm_maxasid = maxasid; 2547 allocsz = roundup(maxasid, 8) / 8; 2548 svm_asidmap = os_mem_zalloc(allocsz); 2549 2550 /* ASID 0 is reserved for the host. */ 2551 svm_asidmap[0] |= __BIT(0); 2552 2553 /* ASID n-1 is special, we share it. */ 2554 i = (maxasid - 1) / 8; 2555 j = (maxasid - 1) % 8; 2556 svm_asidmap[i] |= __BIT(j); 2557 } 2558 2559 static 2560 OS_IPI_FUNC(svm_change_cpu) 2561 { 2562 bool enable = arg != NULL; 2563 uint64_t msr; 2564 2565 msr = rdmsr(MSR_VM_CR); 2566 if (msr & VM_CR_SVMED) { 2567 wrmsr(MSR_VM_CR, msr & ~VM_CR_SVMED); 2568 } 2569 2570 if (!enable) { 2571 wrmsr(MSR_VM_HSAVE_PA, 0); 2572 } 2573 2574 msr = rdmsr(MSR_EFER); 2575 if (enable) { 2576 msr |= EFER_SVME; 2577 } else { 2578 msr &= ~EFER_SVME; 2579 } 2580 wrmsr(MSR_EFER, msr); 2581 2582 if (enable) { 2583 wrmsr(MSR_VM_HSAVE_PA, hsave[os_curcpu_number()].pa); 2584 } 2585 } 2586 2587 static void 2588 svm_init(void) 2589 { 2590 cpuid_desc_t descs; 2591 os_cpu_t *cpu; 2592 2593 x86_get_cpuid(0x8000000a, &descs); 2594 2595 /* The guest TLB flush command. */ 2596 if (descs.edx & CPUID_8_0A_EDX_FlushByASID) { 2597 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_GUEST; 2598 } else { 2599 svm_ctrl_tlb_flush = VMCB_CTRL_TLB_CTRL_FLUSH_ALL; 2600 } 2601 2602 /* Init the ASID. */ 2603 svm_init_asid(descs.ebx); 2604 2605 /* Init the XCR0 mask. */ 2606 svm_xcr0_mask = SVM_XCR0_MASK_DEFAULT & x86_xsave_features; 2607 2608 /* Init the max basic CPUID leaf. */ 2609 x86_get_cpuid(0x00000000, &descs); 2610 svm_cpuid_max_basic = uimin(descs.eax, SVM_CPUID_MAX_BASIC); 2611 2612 /* Init the max extended CPUID leaf. */ 2613 x86_get_cpuid(0x80000000, &descs); 2614 svm_cpuid_max_extended = uimin(descs.eax, SVM_CPUID_MAX_EXTENDED); 2615 2616 /* Init the global host state. */ 2617 if (svm_xcr0_mask != 0) { 2618 svm_global_hstate.xcr0 = x86_get_xcr(0); 2619 } 2620 svm_global_hstate.star = rdmsr(MSR_STAR); 2621 svm_global_hstate.lstar = rdmsr(MSR_LSTAR); 2622 svm_global_hstate.cstar = rdmsr(MSR_CSTAR); 2623 svm_global_hstate.sfmask = rdmsr(MSR_SFMASK); 2624 2625 memset(hsave, 0, sizeof(hsave)); 2626 OS_CPU_FOREACH(cpu) { 2627 hsave[os_cpu_number(cpu)].pa = os_pa_zalloc(); 2628 } 2629 2630 os_ipi_broadcast(svm_change_cpu, (void *)true); 2631 } 2632 2633 static void 2634 svm_fini_asid(void) 2635 { 2636 size_t allocsz; 2637 2638 allocsz = roundup(svm_maxasid, 8) / 8; 2639 os_mem_free(svm_asidmap, allocsz); 2640 2641 os_mtx_destroy(&svm_asidlock); 2642 } 2643 2644 static void 2645 svm_fini(void) 2646 { 2647 size_t i; 2648 2649 os_ipi_broadcast(svm_change_cpu, (void *)false); 2650 2651 for (i = 0; i < OS_MAXCPUS; i++) { 2652 if (hsave[i].pa != 0) 2653 os_pa_free(hsave[i].pa); 2654 } 2655 2656 svm_fini_asid(); 2657 } 2658 2659 static void 2660 svm_capability(struct nvmm_capability *cap) 2661 { 2662 cap->arch.mach_conf_support = 0; 2663 cap->arch.vcpu_conf_support = 2664 NVMM_CAP_ARCH_VCPU_CONF_CPUID; 2665 cap->arch.xcr0_mask = svm_xcr0_mask; 2666 cap->arch.mxcsr_mask = x86_fpu_mxcsr_mask; 2667 cap->arch.conf_cpuid_maxops = SVM_NCPUIDS; 2668 } 2669 2670 const struct nvmm_impl nvmm_x86_svm = { 2671 .name = "x86-svm", 2672 .ident = svm_ident, 2673 .init = svm_init, 2674 .fini = svm_fini, 2675 .capability = svm_capability, 2676 .mach_conf_max = NVMM_X86_MACH_NCONF, 2677 .mach_conf_sizes = NULL, 2678 .vcpu_conf_max = NVMM_X86_VCPU_NCONF, 2679 .vcpu_conf_sizes = svm_vcpu_conf_sizes, 2680 .state_size = sizeof(struct nvmm_x64_state), 2681 .machine_create = svm_machine_create, 2682 .machine_destroy = svm_machine_destroy, 2683 .machine_configure = svm_machine_configure, 2684 .vcpu_create = svm_vcpu_create, 2685 .vcpu_destroy = svm_vcpu_destroy, 2686 .vcpu_configure = svm_vcpu_configure, 2687 .vcpu_setstate = svm_vcpu_setstate, 2688 .vcpu_getstate = svm_vcpu_getstate, 2689 .vcpu_inject = svm_vcpu_inject, 2690 .vcpu_run = svm_vcpu_run 2691 }; 2692