xref: /illumos-gate/usr/src/uts/intel/sys/vmm.h (revision 02b17e23)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 /*
31  * This file and its contents are supplied under the terms of the
32  * Common Development and Distribution License ("CDDL"), version 1.0.
33  * You may only use this file in accordance with the terms of version
34  * 1.0 of the CDDL.
35  *
36  * A full copy of the text of the CDDL should have accompanied this
37  * source.  A copy of the CDDL is also available via the Internet at
38  * http://www.illumos.org/license/CDDL.
39  *
40  * Copyright 2015 Pluribus Networks Inc.
41  * Copyright 2019 Joyent, Inc.
42  * Copyright 2021 Oxide Computer Company
43  */
44 
45 #ifndef _VMM_H_
46 #define	_VMM_H_
47 
48 enum vm_suspend_how {
49 	VM_SUSPEND_NONE,
50 	VM_SUSPEND_RESET,
51 	VM_SUSPEND_POWEROFF,
52 	VM_SUSPEND_HALT,
53 	VM_SUSPEND_TRIPLEFAULT,
54 	VM_SUSPEND_LAST
55 };
56 
57 /*
58  * Identifiers for architecturally defined registers.
59  */
60 enum vm_reg_name {
61 	VM_REG_GUEST_RAX,
62 	VM_REG_GUEST_RBX,
63 	VM_REG_GUEST_RCX,
64 	VM_REG_GUEST_RDX,
65 	VM_REG_GUEST_RSI,
66 	VM_REG_GUEST_RDI,
67 	VM_REG_GUEST_RBP,
68 	VM_REG_GUEST_R8,
69 	VM_REG_GUEST_R9,
70 	VM_REG_GUEST_R10,
71 	VM_REG_GUEST_R11,
72 	VM_REG_GUEST_R12,
73 	VM_REG_GUEST_R13,
74 	VM_REG_GUEST_R14,
75 	VM_REG_GUEST_R15,
76 	VM_REG_GUEST_CR0,
77 	VM_REG_GUEST_CR3,
78 	VM_REG_GUEST_CR4,
79 	VM_REG_GUEST_DR7,
80 	VM_REG_GUEST_RSP,
81 	VM_REG_GUEST_RIP,
82 	VM_REG_GUEST_RFLAGS,
83 	VM_REG_GUEST_ES,
84 	VM_REG_GUEST_CS,
85 	VM_REG_GUEST_SS,
86 	VM_REG_GUEST_DS,
87 	VM_REG_GUEST_FS,
88 	VM_REG_GUEST_GS,
89 	VM_REG_GUEST_LDTR,
90 	VM_REG_GUEST_TR,
91 	VM_REG_GUEST_IDTR,
92 	VM_REG_GUEST_GDTR,
93 	VM_REG_GUEST_EFER,
94 	VM_REG_GUEST_CR2,
95 	VM_REG_GUEST_PDPTE0,
96 	VM_REG_GUEST_PDPTE1,
97 	VM_REG_GUEST_PDPTE2,
98 	VM_REG_GUEST_PDPTE3,
99 	VM_REG_GUEST_INTR_SHADOW,
100 	VM_REG_GUEST_DR0,
101 	VM_REG_GUEST_DR1,
102 	VM_REG_GUEST_DR2,
103 	VM_REG_GUEST_DR3,
104 	VM_REG_GUEST_DR6,
105 	VM_REG_GUEST_ENTRY_INST_LENGTH,
106 	VM_REG_LAST
107 };
108 
109 enum x2apic_state {
110 	X2APIC_DISABLED,
111 	X2APIC_ENABLED,
112 	X2APIC_STATE_LAST
113 };
114 
115 #define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
116 #define	VM_INTINFO_DEL_ERRCODE	0x800
117 #define	VM_INTINFO_RSVD		0x7ffff000
118 #define	VM_INTINFO_VALID	0x80000000
119 #define	VM_INTINFO_TYPE		0x700
120 #define	VM_INTINFO_HWINTR	(0 << 8)
121 #define	VM_INTINFO_NMI		(2 << 8)
122 #define	VM_INTINFO_HWEXCEPTION	(3 << 8)
123 #define	VM_INTINFO_SWINTR	(4 << 8)
124 
125 /*
126  * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
127  * To simplify structure definitions, an arbitrary limit has been chosen.
128  * This same limit is used for memory segment names
129  */
130 
131 #define	VM_MAX_NAMELEN		128
132 #define	VM_MAX_SEG_NAMELEN	128
133 
134 #define	VM_MAXCPU	32			/* maximum virtual cpus */
135 
136 /*
137  * Identifiers for optional vmm capabilities
138  */
139 enum vm_cap_type {
140 	VM_CAP_HALT_EXIT,
141 	VM_CAP_MTRAP_EXIT,
142 	VM_CAP_PAUSE_EXIT,
143 	VM_CAP_ENABLE_INVPCID,
144 	VM_CAP_BPT_EXIT,
145 	VM_CAP_MAX
146 };
147 
148 enum vmx_caps {
149 	VMX_CAP_NONE		= 0,
150 	VMX_CAP_TPR_SHADOW	= (1UL << 0),
151 	VMX_CAP_APICV		= (1UL << 1),
152 	VMX_CAP_APICV_X2APIC	= (1UL << 2),
153 	VMX_CAP_APICV_PIR	= (1UL << 3),
154 };
155 
156 enum vm_intr_trigger {
157 	EDGE_TRIGGER,
158 	LEVEL_TRIGGER
159 };
160 
161 /*
162  * The 'access' field has the format specified in Table 21-2 of the Intel
163  * Architecture Manual vol 3b.
164  *
165  * XXX The contents of the 'access' field are architecturally defined except
166  * bit 16 - Segment Unusable.
167  */
168 struct seg_desc {
169 	uint64_t	base;
170 	uint32_t	limit;
171 	uint32_t	access;
172 };
173 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
174 #define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
175 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
176 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
177 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
178 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
179 
180 enum vm_cpu_mode {
181 	CPU_MODE_REAL,
182 	CPU_MODE_PROTECTED,
183 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
184 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
185 };
186 
187 enum vm_paging_mode {
188 	PAGING_MODE_FLAT,
189 	PAGING_MODE_32,
190 	PAGING_MODE_PAE,
191 	PAGING_MODE_64,
192 };
193 
194 struct vm_guest_paging {
195 	uint64_t	cr3;
196 	int		cpl;
197 	enum vm_cpu_mode cpu_mode;
198 	enum vm_paging_mode paging_mode;
199 };
200 
201 enum vm_exitcode {
202 	VM_EXITCODE_INOUT,
203 	VM_EXITCODE_VMX,
204 	VM_EXITCODE_BOGUS,
205 	VM_EXITCODE_RDMSR,
206 	VM_EXITCODE_WRMSR,
207 	VM_EXITCODE_HLT,
208 	VM_EXITCODE_MTRAP,
209 	VM_EXITCODE_PAUSE,
210 	VM_EXITCODE_PAGING,
211 	VM_EXITCODE_INST_EMUL,
212 	VM_EXITCODE_RUN_STATE,
213 	VM_EXITCODE_MMIO_EMUL,
214 	VM_EXITCODE_DEPRECATED,	/* formerly RUNBLOCK */
215 	VM_EXITCODE_IOAPIC_EOI,
216 	VM_EXITCODE_SUSPENDED,
217 	VM_EXITCODE_MMIO,
218 	VM_EXITCODE_TASK_SWITCH,
219 	VM_EXITCODE_MONITOR,
220 	VM_EXITCODE_MWAIT,
221 	VM_EXITCODE_SVM,
222 	VM_EXITCODE_REQIDLE,
223 	VM_EXITCODE_DEBUG,
224 	VM_EXITCODE_VMINSN,
225 	VM_EXITCODE_BPT,
226 	VM_EXITCODE_HT,
227 	VM_EXITCODE_MAX
228 };
229 
230 enum inout_flags {
231 	INOUT_IN	= (1U << 0), /* direction: 'in' when set, else 'out' */
232 
233 	/*
234 	 * The following flags are used only for in-kernel emulation logic and
235 	 * are not exposed to userspace.
236 	 */
237 	INOUT_STR	= (1U << 1), /* ins/outs operation */
238 	INOUT_REP	= (1U << 2), /* 'rep' prefix present on instruction */
239 };
240 
241 struct vm_inout {
242 	uint32_t	eax;
243 	uint16_t	port;
244 	uint8_t		bytes;		/* 1 or 2 or 4 */
245 	uint8_t		flags;		/* see: inout_flags */
246 
247 	/*
248 	 * The address size and segment are relevant to INS/OUTS operations.
249 	 * Userspace is not concerned with them since the in-kernel emulation
250 	 * handles those specific aspects.
251 	 */
252 	uint8_t		addrsize;
253 	uint8_t		segment;
254 };
255 
256 struct vm_mmio {
257 	uint8_t		bytes;		/* 1/2/4/8 bytes */
258 	uint8_t		read;		/* read: 1, write: 0 */
259 	uint16_t	_pad[3];
260 	uint64_t	gpa;
261 	uint64_t	data;
262 };
263 
264 enum task_switch_reason {
265 	TSR_CALL,
266 	TSR_IRET,
267 	TSR_JMP,
268 	TSR_IDT_GATE,	/* task gate in IDT */
269 };
270 
271 struct vm_task_switch {
272 	uint16_t	tsssel;		/* new TSS selector */
273 	int		ext;		/* task switch due to external event */
274 	uint32_t	errcode;
275 	int		errcode_valid;	/* push 'errcode' on the new stack */
276 	enum task_switch_reason reason;
277 	struct vm_guest_paging paging;
278 };
279 
280 enum vcpu_run_state {
281 	VRS_HALT		= 0,
282 	VRS_INIT		= (1 << 0),
283 	VRS_RUN			= (1 << 1),
284 
285 	VRS_PEND_INIT		= (1 << 14),
286 	VRS_PEND_SIPI		= (1 << 15),
287 };
288 #define VRS_MASK_VALID(v)	\
289 	((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI))
290 #define VRS_IS_VALID(v)		((v) == VRS_MASK_VALID(v))
291 
292 struct vm_exit {
293 	enum vm_exitcode	exitcode;
294 	int			inst_length;	/* 0 means unknown */
295 	uint64_t		rip;
296 	union {
297 		struct vm_inout	inout;
298 		struct vm_mmio	mmio;
299 		struct {
300 			uint64_t	gpa;
301 			int		fault_type;
302 		} paging;
303 		/*
304 		 * Kernel-internal MMIO decoding and emulation.
305 		 * Userspace should not expect to see this, but rather a
306 		 * VM_EXITCODE_MMIO with the above 'mmio' context.
307 		 */
308 		struct {
309 			uint64_t	gpa;
310 			uint64_t	gla;
311 			uint64_t	cs_base;
312 			int		cs_d;		/* CS.D */
313 		} mmio_emul;
314 		struct {
315 			uint8_t		inst[15];
316 			uint8_t		num_valid;
317 		} inst_emul;
318 		/*
319 		 * VMX specific payload. Used when there is no "better"
320 		 * exitcode to represent the VM-exit.
321 		 */
322 		struct {
323 			int		status;		/* vmx inst status */
324 			/*
325 			 * 'exit_reason' and 'exit_qualification' are valid
326 			 * only if 'status' is zero.
327 			 */
328 			uint32_t	exit_reason;
329 			uint64_t	exit_qualification;
330 			/*
331 			 * 'inst_error' and 'inst_type' are valid
332 			 * only if 'status' is non-zero.
333 			 */
334 			int		inst_type;
335 			int		inst_error;
336 		} vmx;
337 		/*
338 		 * SVM specific payload.
339 		 */
340 		struct {
341 			uint64_t	exitcode;
342 			uint64_t	exitinfo1;
343 			uint64_t	exitinfo2;
344 		} svm;
345 		struct {
346 			int		inst_length;
347 		} bpt;
348 		struct {
349 			uint32_t	code;		/* ecx value */
350 			uint64_t	wval;
351 		} msr;
352 		struct {
353 			uint64_t	rflags;
354 		} hlt;
355 		struct {
356 			int		vector;
357 		} ioapic_eoi;
358 		struct {
359 			enum vm_suspend_how how;
360 		} suspended;
361 		struct vm_task_switch task_switch;
362 	} u;
363 };
364 
365 enum vm_entry_cmds {
366 	VEC_DEFAULT = 0,
367 	VEC_DISCARD_INSTR,	/* discard inst emul state */
368 	VEC_FULFILL_MMIO,	/* entry includes result for mmio emul */
369 	VEC_FULFILL_INOUT,	/* entry includes result for inout emul */
370 };
371 
372 struct vm_entry {
373 	int cpuid;
374 	uint_t cmd;		/* see: vm_entry_cmds */
375 	void *exit_data;
376 	union {
377 		struct vm_inout inout;
378 		struct vm_mmio mmio;
379 	} u;
380 };
381 
382 int vm_restart_instruction(void *vm, int vcpuid);
383 
384 enum vm_create_flags {
385 	/*
386 	 * Allocate guest memory segments from existing reservoir capacity,
387 	 * rather than attempting to create transient allocations.
388 	 */
389 	VCF_RESERVOIR_MEM = (1 << 0),
390 };
391 
392 #endif	/* _VMM_H_ */
393