xref: /illumos-gate/usr/src/uts/intel/sys/vmm.h (revision 3ee59242)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 NetApp, Inc.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer.
12  * 2. Redistributions in binary form must reproduce the above copyright
13  *    notice, this list of conditions and the following disclaimer in the
14  *    documentation and/or other materials provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
17  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19  * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
20  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
26  * SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 /*
31  * This file and its contents are supplied under the terms of the
32  * Common Development and Distribution License ("CDDL"), version 1.0.
33  * You may only use this file in accordance with the terms of version
34  * 1.0 of the CDDL.
35  *
36  * A full copy of the text of the CDDL should have accompanied this
37  * source.  A copy of the CDDL is also available via the Internet at
38  * http://www.illumos.org/license/CDDL.
39  *
40  * Copyright 2015 Pluribus Networks Inc.
41  * Copyright 2019 Joyent, Inc.
42  * Copyright 2021 Oxide Computer Company
43  */
44 
45 #ifndef _VMM_H_
46 #define	_VMM_H_
47 
48 enum vm_suspend_how {
49 	VM_SUSPEND_NONE,
50 	VM_SUSPEND_RESET,
51 	VM_SUSPEND_POWEROFF,
52 	VM_SUSPEND_HALT,
53 	VM_SUSPEND_TRIPLEFAULT,
54 	VM_SUSPEND_LAST
55 };
56 
57 /*
58  * Identifiers for architecturally defined registers.
59  */
60 enum vm_reg_name {
61 	VM_REG_GUEST_RAX,
62 	VM_REG_GUEST_RBX,
63 	VM_REG_GUEST_RCX,
64 	VM_REG_GUEST_RDX,
65 	VM_REG_GUEST_RSI,
66 	VM_REG_GUEST_RDI,
67 	VM_REG_GUEST_RBP,
68 	VM_REG_GUEST_R8,
69 	VM_REG_GUEST_R9,
70 	VM_REG_GUEST_R10,
71 	VM_REG_GUEST_R11,
72 	VM_REG_GUEST_R12,
73 	VM_REG_GUEST_R13,
74 	VM_REG_GUEST_R14,
75 	VM_REG_GUEST_R15,
76 	VM_REG_GUEST_CR0,
77 	VM_REG_GUEST_CR3,
78 	VM_REG_GUEST_CR4,
79 	VM_REG_GUEST_DR7,
80 	VM_REG_GUEST_RSP,
81 	VM_REG_GUEST_RIP,
82 	VM_REG_GUEST_RFLAGS,
83 	VM_REG_GUEST_ES,
84 	VM_REG_GUEST_CS,
85 	VM_REG_GUEST_SS,
86 	VM_REG_GUEST_DS,
87 	VM_REG_GUEST_FS,
88 	VM_REG_GUEST_GS,
89 	VM_REG_GUEST_LDTR,
90 	VM_REG_GUEST_TR,
91 	VM_REG_GUEST_IDTR,
92 	VM_REG_GUEST_GDTR,
93 	VM_REG_GUEST_EFER,
94 	VM_REG_GUEST_CR2,
95 	VM_REG_GUEST_PDPTE0,
96 	VM_REG_GUEST_PDPTE1,
97 	VM_REG_GUEST_PDPTE2,
98 	VM_REG_GUEST_PDPTE3,
99 	VM_REG_GUEST_INTR_SHADOW,
100 	VM_REG_GUEST_DR0,
101 	VM_REG_GUEST_DR1,
102 	VM_REG_GUEST_DR2,
103 	VM_REG_GUEST_DR3,
104 	VM_REG_GUEST_DR6,
105 	VM_REG_GUEST_ENTRY_INST_LENGTH,
106 	VM_REG_LAST
107 };
108 
109 enum x2apic_state {
110 	X2APIC_DISABLED,
111 	X2APIC_ENABLED,
112 	X2APIC_STATE_LAST
113 };
114 
115 #define	VM_INTINFO_VECTOR(info)	((info) & 0xff)
116 #define	VM_INTINFO_DEL_ERRCODE	0x800
117 #define	VM_INTINFO_RSVD		0x7ffff000
118 #define	VM_INTINFO_VALID	0x80000000
119 #define	VM_INTINFO_TYPE		0x700
120 #define	VM_INTINFO_HWINTR	(0 << 8)
121 #define	VM_INTINFO_NMI		(2 << 8)
122 #define	VM_INTINFO_HWEXCEPTION	(3 << 8)
123 #define	VM_INTINFO_SWINTR	(4 << 8)
124 
125 /*
126  * illumos doesn't have a limitation based on SPECNAMELEN like FreeBSD does.
127  * To simplify structure definitions, an arbitrary limit has been chosen.
128  * This same limit is used for memory segment names
129  */
130 
131 #define	VM_MAX_NAMELEN		128
132 #define	VM_MAX_SEG_NAMELEN	128
133 
134 #ifdef _KERNEL
135 #define	VM_MAXCPU	32			/* maximum virtual cpus */
136 #endif
137 
138 /*
139  * Identifiers for optional vmm capabilities
140  */
141 enum vm_cap_type {
142 	VM_CAP_HALT_EXIT,
143 	VM_CAP_MTRAP_EXIT,
144 	VM_CAP_PAUSE_EXIT,
145 	VM_CAP_ENABLE_INVPCID,
146 	VM_CAP_BPT_EXIT,
147 	VM_CAP_MAX
148 };
149 
150 enum vmx_caps {
151 	VMX_CAP_NONE		= 0,
152 	VMX_CAP_TPR_SHADOW	= (1UL << 0),
153 	VMX_CAP_APICV		= (1UL << 1),
154 	VMX_CAP_APICV_X2APIC	= (1UL << 2),
155 	VMX_CAP_APICV_PIR	= (1UL << 3),
156 };
157 
158 enum vm_intr_trigger {
159 	EDGE_TRIGGER,
160 	LEVEL_TRIGGER
161 };
162 
163 /*
164  * The 'access' field has the format specified in Table 21-2 of the Intel
165  * Architecture Manual vol 3b.
166  *
167  * XXX The contents of the 'access' field are architecturally defined except
168  * bit 16 - Segment Unusable.
169  */
170 struct seg_desc {
171 	uint64_t	base;
172 	uint32_t	limit;
173 	uint32_t	access;
174 };
175 #define	SEG_DESC_TYPE(access)		((access) & 0x001f)
176 #define	SEG_DESC_DPL(access)		(((access) >> 5) & 0x3)
177 #define	SEG_DESC_PRESENT(access)	(((access) & 0x0080) ? 1 : 0)
178 #define	SEG_DESC_DEF32(access)		(((access) & 0x4000) ? 1 : 0)
179 #define	SEG_DESC_GRANULARITY(access)	(((access) & 0x8000) ? 1 : 0)
180 #define	SEG_DESC_UNUSABLE(access)	(((access) & 0x10000) ? 1 : 0)
181 
182 enum vm_cpu_mode {
183 	CPU_MODE_REAL,
184 	CPU_MODE_PROTECTED,
185 	CPU_MODE_COMPATIBILITY,		/* IA-32E mode (CS.L = 0) */
186 	CPU_MODE_64BIT,			/* IA-32E mode (CS.L = 1) */
187 };
188 
189 enum vm_paging_mode {
190 	PAGING_MODE_FLAT,
191 	PAGING_MODE_32,
192 	PAGING_MODE_PAE,
193 	PAGING_MODE_64,
194 };
195 
196 struct vm_guest_paging {
197 	uint64_t	cr3;
198 	int		cpl;
199 	enum vm_cpu_mode cpu_mode;
200 	enum vm_paging_mode paging_mode;
201 };
202 
203 enum vm_exitcode {
204 	VM_EXITCODE_INOUT,
205 	VM_EXITCODE_VMX,
206 	VM_EXITCODE_BOGUS,
207 	VM_EXITCODE_RDMSR,
208 	VM_EXITCODE_WRMSR,
209 	VM_EXITCODE_HLT,
210 	VM_EXITCODE_MTRAP,
211 	VM_EXITCODE_PAUSE,
212 	VM_EXITCODE_PAGING,
213 	VM_EXITCODE_INST_EMUL,
214 	VM_EXITCODE_RUN_STATE,
215 	VM_EXITCODE_MMIO_EMUL,
216 	VM_EXITCODE_DEPRECATED,	/* formerly RUNBLOCK */
217 	VM_EXITCODE_IOAPIC_EOI,
218 	VM_EXITCODE_SUSPENDED,
219 	VM_EXITCODE_MMIO,
220 	VM_EXITCODE_TASK_SWITCH,
221 	VM_EXITCODE_MONITOR,
222 	VM_EXITCODE_MWAIT,
223 	VM_EXITCODE_SVM,
224 	VM_EXITCODE_REQIDLE,
225 	VM_EXITCODE_DEBUG,
226 	VM_EXITCODE_VMINSN,
227 	VM_EXITCODE_BPT,
228 	VM_EXITCODE_HT,
229 	VM_EXITCODE_MAX
230 };
231 
232 enum inout_flags {
233 	INOUT_IN	= (1U << 0), /* direction: 'in' when set, else 'out' */
234 
235 	/*
236 	 * The following flags are used only for in-kernel emulation logic and
237 	 * are not exposed to userspace.
238 	 */
239 	INOUT_STR	= (1U << 1), /* ins/outs operation */
240 	INOUT_REP	= (1U << 2), /* 'rep' prefix present on instruction */
241 };
242 
243 struct vm_inout {
244 	uint32_t	eax;
245 	uint16_t	port;
246 	uint8_t		bytes;		/* 1 or 2 or 4 */
247 	uint8_t		flags;		/* see: inout_flags */
248 
249 	/*
250 	 * The address size and segment are relevant to INS/OUTS operations.
251 	 * Userspace is not concerned with them since the in-kernel emulation
252 	 * handles those specific aspects.
253 	 */
254 	uint8_t		addrsize;
255 	uint8_t		segment;
256 };
257 
258 struct vm_mmio {
259 	uint8_t		bytes;		/* 1/2/4/8 bytes */
260 	uint8_t		read;		/* read: 1, write: 0 */
261 	uint16_t	_pad[3];
262 	uint64_t	gpa;
263 	uint64_t	data;
264 };
265 
266 enum task_switch_reason {
267 	TSR_CALL,
268 	TSR_IRET,
269 	TSR_JMP,
270 	TSR_IDT_GATE,	/* task gate in IDT */
271 };
272 
273 struct vm_task_switch {
274 	uint16_t	tsssel;		/* new TSS selector */
275 	int		ext;		/* task switch due to external event */
276 	uint32_t	errcode;
277 	int		errcode_valid;	/* push 'errcode' on the new stack */
278 	enum task_switch_reason reason;
279 	struct vm_guest_paging paging;
280 };
281 
282 enum vcpu_run_state {
283 	VRS_HALT		= 0,
284 	VRS_INIT		= (1 << 0),
285 	VRS_RUN			= (1 << 1),
286 
287 	VRS_PEND_INIT		= (1 << 14),
288 	VRS_PEND_SIPI		= (1 << 15),
289 };
290 #define VRS_MASK_VALID(v)	\
291 	((v) & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI | VRS_PEND_SIPI))
292 #define VRS_IS_VALID(v)		((v) == VRS_MASK_VALID(v))
293 
294 struct vm_exit {
295 	enum vm_exitcode	exitcode;
296 	int			inst_length;	/* 0 means unknown */
297 	uint64_t		rip;
298 	union {
299 		struct vm_inout	inout;
300 		struct vm_mmio	mmio;
301 		struct {
302 			uint64_t	gpa;
303 			int		fault_type;
304 		} paging;
305 		/*
306 		 * Kernel-internal MMIO decoding and emulation.
307 		 * Userspace should not expect to see this, but rather a
308 		 * VM_EXITCODE_MMIO with the above 'mmio' context.
309 		 */
310 		struct {
311 			uint64_t	gpa;
312 			uint64_t	gla;
313 			uint64_t	cs_base;
314 			int		cs_d;		/* CS.D */
315 		} mmio_emul;
316 		struct {
317 			uint8_t		inst[15];
318 			uint8_t		num_valid;
319 		} inst_emul;
320 		/*
321 		 * VMX specific payload. Used when there is no "better"
322 		 * exitcode to represent the VM-exit.
323 		 */
324 		struct {
325 			int		status;		/* vmx inst status */
326 			/*
327 			 * 'exit_reason' and 'exit_qualification' are valid
328 			 * only if 'status' is zero.
329 			 */
330 			uint32_t	exit_reason;
331 			uint64_t	exit_qualification;
332 			/*
333 			 * 'inst_error' and 'inst_type' are valid
334 			 * only if 'status' is non-zero.
335 			 */
336 			int		inst_type;
337 			int		inst_error;
338 		} vmx;
339 		/*
340 		 * SVM specific payload.
341 		 */
342 		struct {
343 			uint64_t	exitcode;
344 			uint64_t	exitinfo1;
345 			uint64_t	exitinfo2;
346 		} svm;
347 		struct {
348 			int		inst_length;
349 		} bpt;
350 		struct {
351 			uint32_t	code;		/* ecx value */
352 			uint64_t	wval;
353 		} msr;
354 		struct {
355 			uint64_t	rflags;
356 		} hlt;
357 		struct {
358 			int		vector;
359 		} ioapic_eoi;
360 		struct {
361 			enum vm_suspend_how how;
362 		} suspended;
363 		struct vm_task_switch task_switch;
364 	} u;
365 };
366 
367 enum vm_entry_cmds {
368 	VEC_DEFAULT = 0,
369 	VEC_DISCARD_INSTR,	/* discard inst emul state */
370 	VEC_FULFILL_MMIO,	/* entry includes result for mmio emul */
371 	VEC_FULFILL_INOUT,	/* entry includes result for inout emul */
372 };
373 
374 struct vm_entry {
375 	int cpuid;
376 	uint_t cmd;		/* see: vm_entry_cmds */
377 	void *exit_data;
378 	union {
379 		struct vm_inout inout;
380 		struct vm_mmio mmio;
381 	} u;
382 };
383 
384 int vm_restart_instruction(void *vm, int vcpuid);
385 
386 enum vm_create_flags {
387 	/*
388 	 * Allocate guest memory segments from existing reservoir capacity,
389 	 * rather than attempting to create transient allocations.
390 	 */
391 	VCF_RESERVOIR_MEM = (1 << 0),
392 };
393 
394 #endif	/* _VMM_H_ */
395