xref: /freebsd/sys/amd64/vmm/intel/vmx_msr.c (revision 38069501)
1 /*-
2  * Copyright (c) 2011 NetApp, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/proc.h>
35 
36 #include <machine/clock.h>
37 #include <machine/cpufunc.h>
38 #include <machine/md_var.h>
39 #include <machine/pcb.h>
40 #include <machine/specialreg.h>
41 #include <machine/vmm.h>
42 
43 #include "vmx.h"
44 #include "vmx_msr.h"
45 
46 static boolean_t
47 vmx_ctl_allows_one_setting(uint64_t msr_val, int bitpos)
48 {
49 
50 	if (msr_val & (1UL << (bitpos + 32)))
51 		return (TRUE);
52 	else
53 		return (FALSE);
54 }
55 
56 static boolean_t
57 vmx_ctl_allows_zero_setting(uint64_t msr_val, int bitpos)
58 {
59 
60 	if ((msr_val & (1UL << bitpos)) == 0)
61 		return (TRUE);
62 	else
63 		return (FALSE);
64 }
65 
66 uint32_t
67 vmx_revision(void)
68 {
69 
70 	return (rdmsr(MSR_VMX_BASIC) & 0xffffffff);
71 }
72 
73 /*
74  * Generate a bitmask to be used for the VMCS execution control fields.
75  *
76  * The caller specifies what bits should be set to one in 'ones_mask'
77  * and what bits should be set to zero in 'zeros_mask'. The don't-care
78  * bits are set to the default value. The default values are obtained
79  * based on "Algorithm 3" in Section 27.5.1 "Algorithms for Determining
80  * VMX Capabilities".
81  *
82  * Returns zero on success and non-zero on error.
83  */
84 int
85 vmx_set_ctlreg(int ctl_reg, int true_ctl_reg, uint32_t ones_mask,
86 	       uint32_t zeros_mask, uint32_t *retval)
87 {
88 	int i;
89 	uint64_t val, trueval;
90 	boolean_t true_ctls_avail, one_allowed, zero_allowed;
91 
92 	/* We cannot ask the same bit to be set to both '1' and '0' */
93 	if ((ones_mask ^ zeros_mask) != (ones_mask | zeros_mask))
94 		return (EINVAL);
95 
96 	if (rdmsr(MSR_VMX_BASIC) & (1UL << 55))
97 		true_ctls_avail = TRUE;
98 	else
99 		true_ctls_avail = FALSE;
100 
101 	val = rdmsr(ctl_reg);
102 	if (true_ctls_avail)
103 		trueval = rdmsr(true_ctl_reg);		/* step c */
104 	else
105 		trueval = val;				/* step a */
106 
107 	for (i = 0; i < 32; i++) {
108 		one_allowed = vmx_ctl_allows_one_setting(trueval, i);
109 		zero_allowed = vmx_ctl_allows_zero_setting(trueval, i);
110 
111 		KASSERT(one_allowed || zero_allowed,
112 			("invalid zero/one setting for bit %d of ctl 0x%0x, "
113 			 "truectl 0x%0x\n", i, ctl_reg, true_ctl_reg));
114 
115 		if (zero_allowed && !one_allowed) {		/* b(i),c(i) */
116 			if (ones_mask & (1 << i))
117 				return (EINVAL);
118 			*retval &= ~(1 << i);
119 		} else if (one_allowed && !zero_allowed) {	/* b(i),c(i) */
120 			if (zeros_mask & (1 << i))
121 				return (EINVAL);
122 			*retval |= 1 << i;
123 		} else {
124 			if (zeros_mask & (1 << i))	/* b(ii),c(ii) */
125 				*retval &= ~(1 << i);
126 			else if (ones_mask & (1 << i)) /* b(ii), c(ii) */
127 				*retval |= 1 << i;
128 			else if (!true_ctls_avail)
129 				*retval &= ~(1 << i);	/* b(iii) */
130 			else if (vmx_ctl_allows_zero_setting(val, i))/* c(iii)*/
131 				*retval &= ~(1 << i);
132 			else if (vmx_ctl_allows_one_setting(val, i)) /* c(iv) */
133 				*retval |= 1 << i;
134 			else {
135 				panic("vmx_set_ctlreg: unable to determine "
136 				      "correct value of ctl bit %d for msr "
137 				      "0x%0x and true msr 0x%0x", i, ctl_reg,
138 				      true_ctl_reg);
139 			}
140 		}
141 	}
142 
143 	return (0);
144 }
145 
146 void
147 msr_bitmap_initialize(char *bitmap)
148 {
149 
150 	memset(bitmap, 0xff, PAGE_SIZE);
151 }
152 
153 int
154 msr_bitmap_change_access(char *bitmap, u_int msr, int access)
155 {
156 	int byte, bit;
157 
158 	if (msr <= 0x00001FFF)
159 		byte = msr / 8;
160 	else if (msr >= 0xC0000000 && msr <= 0xC0001FFF)
161 		byte = 1024 + (msr - 0xC0000000) / 8;
162 	else
163 		return (EINVAL);
164 
165 	bit = msr & 0x7;
166 
167 	if (access & MSR_BITMAP_ACCESS_READ)
168 		bitmap[byte] &= ~(1 << bit);
169 	else
170 		bitmap[byte] |= 1 << bit;
171 
172 	byte += 2048;
173 	if (access & MSR_BITMAP_ACCESS_WRITE)
174 		bitmap[byte] &= ~(1 << bit);
175 	else
176 		bitmap[byte] |= 1 << bit;
177 
178 	return (0);
179 }
180 
181 static uint64_t misc_enable;
182 static uint64_t platform_info;
183 static uint64_t turbo_ratio_limit;
184 static uint64_t host_msrs[GUEST_MSR_NUM];
185 
186 static bool
187 nehalem_cpu(void)
188 {
189 	u_int family, model;
190 
191 	/*
192 	 * The family:model numbers belonging to the Nehalem microarchitecture
193 	 * are documented in Section 35.5, Intel SDM dated Feb 2014.
194 	 */
195 	family = CPUID_TO_FAMILY(cpu_id);
196 	model = CPUID_TO_MODEL(cpu_id);
197 	if (family == 0x6) {
198 		switch (model) {
199 		case 0x1A:
200 		case 0x1E:
201 		case 0x1F:
202 		case 0x2E:
203 			return (true);
204 		default:
205 			break;
206 		}
207 	}
208 	return (false);
209 }
210 
211 static bool
212 westmere_cpu(void)
213 {
214 	u_int family, model;
215 
216 	/*
217 	 * The family:model numbers belonging to the Westmere microarchitecture
218 	 * are documented in Section 35.6, Intel SDM dated Feb 2014.
219 	 */
220 	family = CPUID_TO_FAMILY(cpu_id);
221 	model = CPUID_TO_MODEL(cpu_id);
222 	if (family == 0x6) {
223 		switch (model) {
224 		case 0x25:
225 		case 0x2C:
226 			return (true);
227 		default:
228 			break;
229 		}
230 	}
231 	return (false);
232 }
233 
234 static bool
235 pat_valid(uint64_t val)
236 {
237 	int i, pa;
238 
239 	/*
240 	 * From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
241 	 *
242 	 * Extract PA0 through PA7 and validate that each one encodes a
243 	 * valid memory type.
244 	 */
245 	for (i = 0; i < 8; i++) {
246 		pa = (val >> (i * 8)) & 0xff;
247 		if (pa == 2 || pa == 3 || pa >= 8)
248 			return (false);
249 	}
250 	return (true);
251 }
252 
253 void
254 vmx_msr_init(void)
255 {
256 	uint64_t bus_freq, ratio;
257 	int i;
258 
259 	/*
260 	 * It is safe to cache the values of the following MSRs because
261 	 * they don't change based on curcpu, curproc or curthread.
262 	 */
263 	host_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
264 	host_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
265 	host_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
266 	host_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
267 
268 	/*
269 	 * Initialize emulated MSRs
270 	 */
271 	misc_enable = rdmsr(MSR_IA32_MISC_ENABLE);
272 	/*
273 	 * Set mandatory bits
274 	 *  11:   branch trace disabled
275 	 *  12:   PEBS unavailable
276 	 * Clear unsupported features
277 	 *  16:   SpeedStep enable
278 	 *  18:   enable MONITOR FSM
279 	 */
280 	misc_enable |= (1 << 12) | (1 << 11);
281 	misc_enable &= ~((1 << 18) | (1 << 16));
282 
283 	if (nehalem_cpu() || westmere_cpu())
284 		bus_freq = 133330000;		/* 133Mhz */
285 	else
286 		bus_freq = 100000000;		/* 100Mhz */
287 
288 	/*
289 	 * XXXtime
290 	 * The ratio should really be based on the virtual TSC frequency as
291 	 * opposed to the host TSC.
292 	 */
293 	ratio = (tsc_freq / bus_freq) & 0xff;
294 
295 	/*
296 	 * The register definition is based on the micro-architecture
297 	 * but the following bits are always the same:
298 	 * [15:8]  Maximum Non-Turbo Ratio
299 	 * [28]    Programmable Ratio Limit for Turbo Mode
300 	 * [29]    Programmable TDC-TDP Limit for Turbo Mode
301 	 * [47:40] Maximum Efficiency Ratio
302 	 *
303 	 * The other bits can be safely set to 0 on all
304 	 * micro-architectures up to Haswell.
305 	 */
306 	platform_info = (ratio << 8) | (ratio << 40);
307 
308 	/*
309 	 * The number of valid bits in the MSR_TURBO_RATIO_LIMITx register is
310 	 * dependent on the maximum cores per package supported by the micro-
311 	 * architecture. For e.g., Westmere supports 6 cores per package and
312 	 * uses the low 48 bits. Sandybridge support 8 cores per package and
313 	 * uses up all 64 bits.
314 	 *
315 	 * However, the unused bits are reserved so we pretend that all bits
316 	 * in this MSR are valid.
317 	 */
318 	for (i = 0; i < 8; i++)
319 		turbo_ratio_limit = (turbo_ratio_limit << 8) | ratio;
320 }
321 
322 void
323 vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
324 {
325 	uint64_t *guest_msrs;
326 
327 	guest_msrs = vmx->guest_msrs[vcpuid];
328 
329 	/*
330 	 * The permissions bitmap is shared between all vcpus so initialize it
331 	 * once when initializing the vBSP.
332 	 */
333 	if (vcpuid == 0) {
334 		guest_msr_rw(vmx, MSR_LSTAR);
335 		guest_msr_rw(vmx, MSR_CSTAR);
336 		guest_msr_rw(vmx, MSR_STAR);
337 		guest_msr_rw(vmx, MSR_SF_MASK);
338 		guest_msr_rw(vmx, MSR_KGSBASE);
339 	}
340 
341 	/*
342 	 * Initialize guest IA32_PAT MSR with default value after reset.
343 	 */
344 	guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
345 	    PAT_VALUE(1, PAT_WRITE_THROUGH)	|
346 	    PAT_VALUE(2, PAT_UNCACHED)		|
347 	    PAT_VALUE(3, PAT_UNCACHEABLE)	|
348 	    PAT_VALUE(4, PAT_WRITE_BACK)	|
349 	    PAT_VALUE(5, PAT_WRITE_THROUGH)	|
350 	    PAT_VALUE(6, PAT_UNCACHED)		|
351 	    PAT_VALUE(7, PAT_UNCACHEABLE);
352 
353 	return;
354 }
355 
356 void
357 vmx_msr_guest_enter(struct vmx *vmx, int vcpuid)
358 {
359 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
360 
361 	/* Save host MSRs (in particular, KGSBASE) and restore guest MSRs */
362 	update_pcb_bases(curpcb);
363 	wrmsr(MSR_LSTAR, guest_msrs[IDX_MSR_LSTAR]);
364 	wrmsr(MSR_CSTAR, guest_msrs[IDX_MSR_CSTAR]);
365 	wrmsr(MSR_STAR, guest_msrs[IDX_MSR_STAR]);
366 	wrmsr(MSR_SF_MASK, guest_msrs[IDX_MSR_SF_MASK]);
367 	wrmsr(MSR_KGSBASE, guest_msrs[IDX_MSR_KGSBASE]);
368 }
369 
370 void
371 vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
372 {
373 	uint64_t *guest_msrs = vmx->guest_msrs[vcpuid];
374 
375 	/* Save guest MSRs */
376 	guest_msrs[IDX_MSR_LSTAR] = rdmsr(MSR_LSTAR);
377 	guest_msrs[IDX_MSR_CSTAR] = rdmsr(MSR_CSTAR);
378 	guest_msrs[IDX_MSR_STAR] = rdmsr(MSR_STAR);
379 	guest_msrs[IDX_MSR_SF_MASK] = rdmsr(MSR_SF_MASK);
380 	guest_msrs[IDX_MSR_KGSBASE] = rdmsr(MSR_KGSBASE);
381 
382 	/* Restore host MSRs */
383 	wrmsr(MSR_LSTAR, host_msrs[IDX_MSR_LSTAR]);
384 	wrmsr(MSR_CSTAR, host_msrs[IDX_MSR_CSTAR]);
385 	wrmsr(MSR_STAR, host_msrs[IDX_MSR_STAR]);
386 	wrmsr(MSR_SF_MASK, host_msrs[IDX_MSR_SF_MASK]);
387 
388 	/* MSR_KGSBASE will be restored on the way back to userspace */
389 }
390 
391 int
392 vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
393 {
394 	const uint64_t *guest_msrs;
395 	int error;
396 
397 	guest_msrs = vmx->guest_msrs[vcpuid];
398 	error = 0;
399 
400 	switch (num) {
401 	case MSR_MCG_CAP:
402 	case MSR_MCG_STATUS:
403 		*val = 0;
404 		break;
405 	case MSR_MTRRcap:
406 	case MSR_MTRRdefType:
407 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
408 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
409 	case MSR_MTRR64kBase:
410 		*val = 0;
411 		break;
412 	case MSR_IA32_MISC_ENABLE:
413 		*val = misc_enable;
414 		break;
415 	case MSR_PLATFORM_INFO:
416 		*val = platform_info;
417 		break;
418 	case MSR_TURBO_RATIO_LIMIT:
419 	case MSR_TURBO_RATIO_LIMIT1:
420 		*val = turbo_ratio_limit;
421 		break;
422 	case MSR_PAT:
423 		*val = guest_msrs[IDX_MSR_PAT];
424 		break;
425 	default:
426 		error = EINVAL;
427 		break;
428 	}
429 	return (error);
430 }
431 
432 int
433 vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
434 {
435 	uint64_t *guest_msrs;
436 	uint64_t changed;
437 	int error;
438 
439 	guest_msrs = vmx->guest_msrs[vcpuid];
440 	error = 0;
441 
442 	switch (num) {
443 	case MSR_MCG_CAP:
444 	case MSR_MCG_STATUS:
445 		break;		/* ignore writes */
446 	case MSR_MTRRcap:
447 		vm_inject_gp(vmx->vm, vcpuid);
448 		break;
449 	case MSR_MTRRdefType:
450 	case MSR_MTRR4kBase ... MSR_MTRR4kBase + 8:
451 	case MSR_MTRR16kBase ... MSR_MTRR16kBase + 1:
452 	case MSR_MTRR64kBase:
453 		break;		/* Ignore writes */
454 	case MSR_IA32_MISC_ENABLE:
455 		changed = val ^ misc_enable;
456 		/*
457 		 * If the host has disabled the NX feature then the guest
458 		 * also cannot use it. However, a Linux guest will try to
459 		 * enable the NX feature by writing to the MISC_ENABLE MSR.
460 		 *
461 		 * This can be safely ignored because the memory management
462 		 * code looks at CPUID.80000001H:EDX.NX to check if the
463 		 * functionality is actually enabled.
464 		 */
465 		changed &= ~(1UL << 34);
466 
467 		/*
468 		 * Punt to userspace if any other bits are being modified.
469 		 */
470 		if (changed)
471 			error = EINVAL;
472 
473 		break;
474 	case MSR_PAT:
475 		if (pat_valid(val))
476 			guest_msrs[IDX_MSR_PAT] = val;
477 		else
478 			vm_inject_gp(vmx->vm, vcpuid);
479 		break;
480 	case MSR_TSC:
481 		error = vmx_set_tsc_offset(vmx, vcpuid, val - rdtsc());
482 		break;
483 	default:
484 		error = EINVAL;
485 		break;
486 	}
487 
488 	return (error);
489 }
490