xref: /dragonfly/lib/libnvmm/libnvmm.3 (revision 634ba020)
1.\"	$NetBSD: libnvmm.3,v 1.19.4.2 2020/02/10 19:05:05 martin Exp $
2.\"
3.\" Copyright (c) 2018-2020 The NetBSD Foundation, Inc.
4.\" All rights reserved.
5.\"
6.\" This code is derived from software contributed to The NetBSD Foundation
7.\" by Maxime Villard.
8.\"
9.\" Redistribution and use in source and binary forms, with or without
10.\" modification, are permitted provided that the following conditions
11.\" are met:
12.\" 1. Redistributions of source code must retain the above copyright
13.\"    notice, this list of conditions and the following disclaimer.
14.\" 2. Redistributions in binary form must reproduce the above copyright
15.\"    notice, this list of conditions and the following disclaimer in the
16.\"    documentation and/or other materials provided with the distribution.
17.\"
18.\" THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
19.\" ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
20.\" TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
21.\" PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
22.\" BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23.\" CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24.\" SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25.\" INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26.\" CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27.\" ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28.\" POSSIBILITY OF SUCH DAMAGE.
29.\"
30.Dd February 9, 2020
31.Dt LIBNVMM 3
32.Os
33.Sh NAME
34.Nm libnvmm
35.Nd NetBSD Virtualization API
36.Sh LIBRARY
37.Lb libnvmm
38.Sh SYNOPSIS
39.In nvmm.h
40.Ft int
41.Fn nvmm_init "void"
42.Ft int
43.Fn nvmm_capability "struct nvmm_capability *cap"
44.Ft int
45.Fn nvmm_machine_create "struct nvmm_machine *mach"
46.Ft int
47.Fn nvmm_machine_destroy "struct nvmm_machine *mach"
48.Ft int
49.Fn nvmm_machine_configure "struct nvmm_machine *mach" "uint64_t op" \
50    "void *conf"
51.Ft int
52.Fn nvmm_vcpu_create "struct nvmm_machine *mach" "nvmm_cpuid_t cpuid" \
53    "struct nvmm_vcpu *vcpu"
54.Ft int
55.Fn nvmm_vcpu_destroy "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu"
56.Ft int
57.Fn nvmm_vcpu_configure "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu" \
58    "uint64_t op" "void *conf"
59.Ft int
60.Fn nvmm_vcpu_getstate "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu" \
61    "uint64_t flags"
62.Ft int
63.Fn nvmm_vcpu_setstate "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu" \
64    "uint64_t flags"
65.Ft int
66.Fn nvmm_vcpu_inject "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu"
67.Ft int
68.Fn nvmm_vcpu_run "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu"
69.Ft int
70.Fn nvmm_hva_map "struct nvmm_machine *mach" "uintptr_t hva" "size_t size"
71.Ft int
72.Fn nvmm_hva_unmap "struct nvmm_machine *mach" "uintptr_t hva" "size_t size"
73.Ft int
74.Fn nvmm_gpa_map "struct nvmm_machine *mach" "uintptr_t hva" "gpaddr_t gpa" \
75    "size_t size" "int prot"
76.Ft int
77.Fn nvmm_gpa_unmap "struct nvmm_machine *mach" "uintptr_t hva" "gpaddr_t gpa" \
78    "size_t size"
79.Ft int
80.Fn nvmm_gva_to_gpa "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu" \
81    "gvaddr_t gva" "gpaddr_t *gpa" "nvmm_prot_t *prot"
82.Ft int
83.Fn nvmm_gpa_to_hva "struct nvmm_machine *mach" "gpaddr_t gpa" \
84    "uintptr_t *hva" "nvmm_prot_t *prot"
85.Ft int
86.Fn nvmm_assist_io "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu"
87.Ft int
88.Fn nvmm_assist_mem "struct nvmm_machine *mach" "struct nvmm_vcpu *vcpu"
89.Sh DESCRIPTION
90.Nm
91provides a library for emulator software to handle hardware-accelerated virtual
92machines in
93.Nx .
94A virtual machine is described by an opaque structure,
95.Cd nvmm_machine .
96Emulator software should not attempt to modify this structure directly, and
97should use the API provided by
98.Nm
99to manage virtual machines.
100A virtual CPU is described by a public structure,
101.Cd nvmm_vcpu .
102.Pp
103.Fn nvmm_init
104initializes NVMM.
105See
106.Sx NVMM Initialization
107below for details.
108.Pp
109.Fn nvmm_capability
110gets the capabilities of NVMM.
111See
112.Sx NVMM Capability
113below for details.
114.Pp
115.Fn nvmm_machine_create
116creates a virtual machine in the kernel.
117The
118.Fa mach
119structure is initialized, and describes the machine.
120.Pp
121.Fn nvmm_machine_destroy
122destroys the virtual machine described in
123.Fa mach .
124.Pp
125.Fn nvmm_machine_configure
126configures, on the machine
127.Fa mach ,
128the parameter indicated in
129.Fa op .
130.Fa conf
131describes the value of the parameter.
132.Pp
133.Fn nvmm_vcpu_create
134creates a virtual CPU in the machine
135.Fa mach ,
136giving it the CPU id
137.Fa cpuid ,
138and initializes
139.Fa vcpu .
140.Pp
141.Fn nvmm_vcpu_destroy
142destroys the virtual CPU identified by
143.Fa vcpu
144in the machine
145.Fa mach .
146.Pp
147.Fn nvmm_vcpu_configure
148configures, on the VCPU
149.Fa vcpu
150of machine
151.Fa mach ,
152the parameter indicated in
153.Fa op .
154.Fa conf
155describes the value of the parameter.
156.Pp
157.Fn nvmm_vcpu_getstate
158gets the state of the virtual CPU identified by
159.Fa vcpu
160in the machine
161.Fa mach .
162.Fa flags
163is the bitmap of the components that are to be retrieved.
164The components are located in
165.Fa vcpu->state .
166See
167.Sx VCPU State Area
168below for details.
169.Pp
170.Fn nvmm_vcpu_setstate
171sets the state of the virtual CPU identified by
172.Fa vcpu
173in the machine
174.Fa mach .
175.Fa flags
176is the bitmap of the components that are to be set.
177The components are located in
178.Fa vcpu->state .
179See
180.Sx VCPU State Area
181below for details.
182.Pp
183.Fn nvmm_vcpu_inject
184injects into the CPU identified by
185.Fa vcpu
186of the machine
187.Fa mach
188an event described by
189.Fa vcpu->event .
190See
191.Sx Event Injection
192below for details.
193.Pp
194.Fn nvmm_vcpu_run
195runs the CPU identified by
196.Fa vcpu
197in the machine
198.Fa mach ,
199until a VM exit is triggered.
200The
201.Fa vcpu->exit
202structure is filled to indicate the exit reason, and the associated parameters
203if any.
204.Pp
205.Fn nvmm_hva_map
206maps at address
207.Fa hva
208a buffer of size
209.Fa size
210in the calling process' virtual address space.
211This buffer is allowed to be subsequently mapped in a virtual machine.
212.Pp
213.Fn nvmm_hva_unmap
214unmaps the buffer of size
215.Fa size
216at address
217.Fa hva
218from the calling process' virtual address space.
219.Pp
220.Fn nvmm_gpa_map
221maps into the guest physical memory beginning on address
222.Fa gpa
223the buffer of size
224.Fa size
225located at address
226.Fa hva
227of the calling process' virtual address space.
228The
229.Fa hva
230parameter must point to a buffer that was previously mapped with
231.Fn nvmm_hva_map .
232.Pp
233.Fn nvmm_gpa_unmap
234removes the guest physical memory area beginning on address
235.Fa gpa
236and of size
237.Fa size
238from the machine
239.Fa mach .
240.Pp
241.Fn nvmm_gva_to_gpa
242translates, on the CPU
243.Fa vcpu
244from the machine
245.Fa mach ,
246the guest virtual address given in
247.Fa gva
248into a guest physical address returned in
249.Fa gpa .
250The associated page premissions are returned in
251.Fa prot .
252.Fa gva
253must be page-aligned.
254.Pp
255.Fn nvmm_gpa_to_hva
256translates, on the machine
257.Fa mach ,
258the guest physical address indicated in
259.Fa gpa
260into a host virtual address returned in
261.Fa hva .
262The associated page premissions are returned in
263.Fa prot .
264.Fa gpa
265must be page-aligned.
266.Pp
267.Fn nvmm_assist_io
268emulates the I/O operation described in
269.Fa vcpu->exit
270on CPU
271.Fa vcpu
272from machine
273.Fa mach .
274See
275.Sx I/O Assist
276below for details.
277.Pp
278.Fn nvmm_assist_mem
279emulates the Mem operation described in
280.Fa vcpu->exit
281on CPU
282.Fa vcpu
283from machine
284.Fa mach .
285See
286.Sx Mem Assist
287below for details.
288.Ss NVMM Initialization
289NVMM initialization is performed by the
290.Fn nvmm_init
291function, which must be invoked by emulator software before any other NVMM
292function.
293.Pp
294.Fn nvmm_init
295opens the NVMM device, and expects to have the proper permissions to do so.
296In a default configuration, this implies being part of the "nvmm" group.
297If using a special configuration, emulator software should arrange to have the
298proper permissions before invoking
299.Fn nvmm_init ,
300and can drop them after the call has completed.
301.Pp
302It is to be noted that
303.Fn nvmm_init
304may perform non-re-entrant operations, and should be called only once.
305.Ss NVMM Capability
306The
307.Cd nvmm_capability
308structure helps emulator software identify the capabilities offered by NVMM on
309the host:
310.Bd -literal
311struct nvmm_capability {
312	uint64_t version;
313	uint64_t state_size;
314	uint64_t max_machines;
315	uint64_t max_vcpus;
316	uint64_t max_ram;
317	struct {
318		...
319	} arch;
320};
321.Ed
322.Pp
323For example, the
324.Cd max_machines
325field indicates the maximum number of virtual machines supported, while
326.Cd max_vcpus
327indicates the maximum number of VCPUs supported per virtual machine.
328.Ss Machine Ownership
329When a process creates a virtual machine via
330.Fn nvmm_machine_create ,
331it is considered the owner of this machine.
332No other processes than the owner can operate a virtual machine.
333.Pp
334When an owner exits, all the virtual machines associated with it are destroyed,
335if they were not already destroyed by the owner itself via
336.Fn nvmm_machine_destroy .
337.Pp
338Virtual machines are not inherited across
339.Xr fork 2
340operations.
341.Ss Machine Configuration
342Emulator software can configure several parameters of a virtual machine by using
343.Fn nvmm_machine_configure .
344Currently, no parameters are implemented.
345.Ss VCPU Configuration
346Emulator software can configure several parameters of a VCPU by using
347.Fn nvmm_vcpu_configure ,
348which can take the following operations:
349.Bd -literal
350#define NVMM_VCPU_CONF_CALLBACKS	0
351	...
352.Ed
353.Pp
354The higher fields depend on the architecture.
355.Ss Guest-Host Mappings
356Each virtual machine has an associated guest physical memory.
357Emulator software is allowed to modify this guest physical memory by mapping
358it into some parts of its virtual address space.
359.Pp
360Emulator software should follow the following steps to achieve that:
361.Pp
362.Bl -bullet -offset indent -compact
363.It
364Call
365.Fn nvmm_hva_map
366to create in the host's virtual address space an area of memory that can
367be shared with a guest.
368Typically, the
369.Fa hva
370parameter will be a pointer to an area that was previously mapped via
371.Fn mmap .
372.Fn nvmm_hva_map
373will replace the content of the area, and will make it read-write (but not
374executable).
375.It
376Make available in the guest an area of guest physical memory, by calling
377.Fn nvmm_gpa_map
378and passing in the
379.Fa hva
380parameter the value that was previously given to
381.Fn nvmm_hva_map .
382.Fn nvmm_gpa_map
383does not replace the content of any memory, it only creates a direct link
384from
385.Fa gpa
386into
387.Fa hva .
388.Fn nvmm_gpa_unmap
389removes this link without modifying
390.Fa hva .
391.El
392.Pp
393The guest will then be able to use the guest physical address passed in the
394.Fa gpa
395parameter of
396.Fn nvmm_gpa_map .
397Each change the guest makes in
398.Fa gpa
399will be reflected in the host's
400.Fa hva ,
401and vice versa.
402.Pp
403It is illegal for emulator software to use
404.Fn munmap
405on an area that was mapped via
406.Fn nvmm_hva_map .
407.Ss VCPU State Area
408A VCPU state area is a structure that entirely defines the content of the
409registers of a VCPU.
410Only one such structure exists, for x86:
411.Bd -literal
412struct nvmm_x64_state {
413	struct nvmm_x64_state_seg segs[NVMM_X64_NSEG];
414	uint64_t gprs[NVMM_X64_NGPR];
415	uint64_t crs[NVMM_X64_NCR];
416	uint64_t drs[NVMM_X64_NDR];
417	uint64_t msrs[NVMM_X64_NMSR];
418	struct nvmm_x64_state_intr intr;
419	struct fxsave fpu;
420};
421#define nvmm_vcpu_state nvmm_x64_state
422.Ed
423.Pp
424Refer to functional examples to see precisely how to use this structure.
425.Pp
426A VCPU state area is divided in sub-states.
427A
428.Fa flags
429parameter is used to set and get the VCPU state; it acts as a bitmap which
430indicates which sub-states to set or get.
431.Pp
432During VM exits, a partial VCPU state area is provided in
433.Va exitstate ,
434see
435.Sx Exit Reasons
436below for details.
437.Ss VCPU Programming Model
438A VCPU is described by a public structure,
439.Cd nvmm_vcpu :
440.Bd -literal
441struct nvmm_vcpu {
442	nvmm_cpuid_t cpuid;
443	struct nvmm_vcpu_state *state;
444	struct nvmm_vcpu_event *event;
445	struct nvmm_vcpu_exit *exit;
446};
447.Ed
448.Pp
449This structure is used both publicly by emulator software and internally by
450.Nm .
451Emulator software should not modify the pointers of this structure, because
452they are initialized to special values by
453.Nm .
454.Pp
455A call to
456.Fn nvmm_vcpu_getstate
457will fetch the desired parts of the VCPU state and put them in
458.Fa vcpu->state .
459A call to
460.Fn nvmm_vcpu_setstate
461will install in the VCPU the desired parts of
462.Fa vcpu->state .
463A call to
464.Fn nvmm_vcpu_inject
465will inject in the VCPU the event in
466.Fa vcpu->event .
467A call to
468.Fn nvmm_vcpu_run
469will fill
470.Fa vcpu->exit
471with the VCPU exit information.
472.Pp
473If emulator software uses several threads, a VCPU should be associated with
474only one thread, and only this thread should perform VCPU modifications.
475Emulator software should not modify the state of a VCPU with several
476different threads.
477.Ss Exit Reasons
478The
479.Cd nvmm_vcpu_exit
480structure is used to handle VM exits:
481.Bd -literal
482/* Generic. */
483#define NVMM_VCPU_EXIT_NONE		0x0000000000000000ULL
484#define NVMM_VCPU_EXIT_INVALID		0xFFFFFFFFFFFFFFFFULL
485/* x86: operations. */
486#define NVMM_VCPU_EXIT_MEMORY		0x0000000000000001ULL
487#define NVMM_VCPU_EXIT_IO		0x0000000000000002ULL
488/* x86: changes in VCPU state. */
489#define NVMM_VCPU_EXIT_SHUTDOWN		0x0000000000001000ULL
490#define NVMM_VCPU_EXIT_INT_READY	0x0000000000001001ULL
491#define NVMM_VCPU_EXIT_NMI_READY	0x0000000000001002ULL
492#define NVMM_VCPU_EXIT_HALTED		0x0000000000001003ULL
493#define NVMM_VCPU_EXIT_TPR_CHANGED	0x0000000000001004ULL
494/* x86: instructions. */
495#define NVMM_VCPU_EXIT_RDMSR		0x0000000000002000ULL
496#define NVMM_VCPU_EXIT_WRMSR		0x0000000000002001ULL
497#define NVMM_VCPU_EXIT_MONITOR		0x0000000000002002ULL
498#define NVMM_VCPU_EXIT_MWAIT		0x0000000000002003ULL
499#define NVMM_VCPU_EXIT_CPUID		0x0000000000002004ULL
500
501struct nvmm_vcpu_exit {
502	uint64_t reason;
503	union {
504		...
505	} u;
506	struct {
507		...
508	} exitstate;
509};
510.Ed
511.Pp
512The
513.Va reason
514field indicates the reason of the VM exit.
515Additional parameters describing the exit can be present in
516.Va u .
517.Va exitstate
518contains a partial, implementation-specific VCPU state, usable as a fast-path
519to retrieve certain state values.
520.Pp
521It is possible that a VM exit was caused by a reason internal to the host
522kernel, and that emulator software should not be concerned with.
523In this case, the exit reason is set to
524.Cd NVMM_VCPU_EXIT_NONE .
525This gives a chance for emulator software to halt the VM in its tracks.
526.Pp
527Refer to functional examples to see precisely how to handle VM exits.
528.Ss Event Injection
529It is possible to inject an event into a VCPU.
530An event can be a hardware interrupt, a software interrupt, or a software
531exception, defined by:
532.Bd -literal
533#define NVMM_VCPU_EVENT_EXCP	0
534#define NVMM_VCPU_EVENT_INTR	1
535
536struct nvmm_vcpu_event {
537	u_int type;
538	uint8_t vector;
539	union {
540		struct {
541			uint64_t error;
542		} excp;
543	} u;
544};
545.Ed
546.Pp
547This describes an event of type
548.Va type ,
549to be sent to vector number
550.Va vector ,
551with a possible additional
552.Va error
553code that is implementation-specific.
554.Pp
555It is possible that the VCPU is in a state where it cannot receive this
556event, if:
557.Pp
558.Bl -bullet -offset indent -compact
559.It
560the event is a hardware interrupt, and the VCPU runs with interrupts disabled,
561or
562.It
563the event is a non-maskable interrupt (NMI), and the VCPU is already in an
564in-NMI context.
565.El
566.Pp
567Emulator software can manage interrupt and NMI window-exiting via the
568.Va intr
569component of the VCPU state.
570When such window-exiting is enabled, NVMM will cause a VM exit with reason
571.Cd NVMM_VCPU_EXIT_INT_READY
572or
573.Cd NVMM_VCPU_EXIT_NMI_READY
574to indicate that the guest is now able to handle the corresponding class
575of interrupts.
576.Ss Assist Callbacks
577In order to assist emulation of certain operations,
578.Nm
579requires emulator software to register, via
580.Fn nvmm_vcpu_configure ,
581a set of callbacks described in the following structure:
582.Bd -literal
583struct nvmm_assist_callbacks {
584	void (*io)(struct nvmm_io *);
585	void (*mem)(struct nvmm_mem *);
586};
587.Ed
588.Pp
589These callbacks are used by
590.Nm
591each time
592.Fn nvmm_assist_io
593or
594.Fn nvmm_assist_mem
595are invoked.
596Emulator software that does not intend to use either of these assists can put
597.Dv NULL
598in the callbacks.
599.Ss I/O Assist
600When a VM exit occurs with reason
601.Cd NVMM_VCPU_EXIT_IO ,
602it is necessary for emulator software to emulate the associated I/O operation.
603.Nm
604provides an easy way for emulator software to perform that.
605.Pp
606.Fn nvmm_assist_io
607will call the registered
608.Fa io
609callback function and give it a
610.Cd nvmm_io
611structure as argument.
612This structure describes an I/O transaction:
613.Bd -literal
614struct nvmm_io {
615	struct nvmm_machine *mach;
616	struct nvmm_vcpu *vcpu;
617	uint16_t port;
618	bool in;
619	size_t size;
620	uint8_t *data;
621};
622.Ed
623.Pp
624The callback can emulate the operation using this descriptor, following two
625unique cases:
626.Pp
627.Bl -bullet -offset indent -compact
628.It
629The operation is an input.
630In this case, the callback should fill
631.Va data
632with the desired value.
633.It
634The operation is an output.
635In this case, the callback should read
636.Va data
637to retrieve the desired value.
638.El
639.Pp
640In either case,
641.Va port
642will indicate the I/O port,
643.Va in
644will indicate if the operation is an input, and
645.Va size
646will indicate the size of the access.
647.Ss Mem Assist
648When a VM exit occurs with reason
649.Cd NVMM_VCPU_EXIT_MEMORY ,
650it is necessary for emulator software to emulate the associated memory
651operation.
652.Nm
653provides an easy way for emulator software to perform that, similar to the I/O
654Assist.
655.Pp
656.Fn nvmm_assist_mem
657will call the registered
658.Fa mem
659callback function and give it a
660.Cd nvmm_mem
661structure as argument.
662This structure describes a Mem transaction:
663.Bd -literal
664struct nvmm_mem {
665	struct nvmm_machine *mach;
666	struct nvmm_vcpu *vcpu;
667	gpaddr_t gpa;
668	bool write;
669	size_t size;
670	uint8_t *data;
671};
672.Ed
673.Pp
674The callback can emulate the operation using this descriptor, following two
675unique cases:
676.Pp
677.Bl -bullet -offset indent -compact
678.It
679The operation is a read.
680In this case, the callback should fill
681.Va data
682with the desired value.
683.It
684The operation is a write.
685In this case, the callback should read
686.Va data
687to retrieve the desired value.
688.El
689.Pp
690In either case,
691.Va gpa
692will indicate the guest physical address,
693.Va write
694will indicate if the access is a write, and
695.Va size
696will indicate the size of the access.
697.Sh RETURN VALUES
698Upon successful completion, each of these functions returns zero.
699Otherwise, a value of \-1 is returned and the global
700variable
701.Va errno
702is set to indicate the error.
703.Sh FILES
704.Bl -tag -width XXXX -compact
705.It Lk https://www.netbsd.org/~maxv/nvmm/nvmm-demo.zip
706Functional example (demonstrator).
707Contains an emulator that uses the
708.Nm
709API, and a small kernel that exercises this emulator.
710.It Pa src/sys/dev/nvmm/
711Source code of the kernel NVMM driver.
712.It Pa src/lib/libnvmm/
713Source code of the
714.Nm
715library.
716.El
717.Sh ERRORS
718These functions will fail if:
719.Bl -tag -width [ENOBUFS]
720.It Bq Er EEXIST
721An attempt was made to create a machine or a VCPU that already exists.
722.It Bq Er EFAULT
723An attempt was made to emulate a memory-based operation in a guest, and the
724guest page tables did not have the permissions necessary for the operation
725to complete successfully.
726.It Bq Er EINVAL
727An inappropriate parameter was used.
728.It Bq Er ENOBUFS
729The maximum number of machines or VCPUs was reached.
730.It Bq Er ENOENT
731A query was made on a machine or a VCPU that does not exist.
732.It Bq Er EPERM
733An attempt was made to access a machine that does not belong to the process.
734.El
735.Sh SEE ALSO
736.Xr nvmm 4 ,
737.Xr nvmmctl 8
738.Sh AUTHORS
739NVMM was designed and implemented by
740.An Maxime Villard .
741