1 /*-
2  * SPDX-License-Identifier: BSD-3-Clause
3  *
4  * Copyright (c) 2021 The DragonFly Project.  All rights reserved.
5  *
6  * This code is derived from software contributed to The DragonFly Project
7  * by Aaron LI <aly@aaronly.me>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  *
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in
17  *    the documentation and/or other materials provided with the
18  *    distribution.
19  * 3. Neither the name of The DragonFly Project nor the names of its
20  *    contributors may be used to endorse or promote products derived
21  *    from this software without specific, prior written permission.
22  *
23  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
24  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
25  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
26  * FOR A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE
27  * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
28  * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
29  * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
30  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
31  * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
32  * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
33  * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34  * SUCH DAMAGE.
35  */
36 
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/mman.h>
41 
42 #include "nvmm.h"
43 #include "nvmm_os.h"
44 #include "nvmm_internal.h"
45 
46 MALLOC_DEFINE(M_NVMM, "nvmm", "NVMM data");
47 
48 /*
49  * NVMM expects VM functions to return 0 on success, but DragonFly's VM
50  * functions return KERN_SUCCESS.  Although it's also defined to be 0,
51  * assert it to be future-proofing.
52  */
53 CTASSERT(KERN_SUCCESS == 0);
54 
55 os_vmspace_t *
56 os_vmspace_create(vaddr_t vmin, vaddr_t vmax)
57 {
58 	struct vmspace *vm;
59 
60 	vm = vmspace_alloc(vmin, vmax);
61 
62 	/*
63 	 * Set PMAP_MULTI on the backing pmap for the machine.  Only
64 	 * pmap changes to the backing pmap for the machine affect the
65 	 * guest.  Changes to the host's pmap do not affect the guest's
66 	 * backing pmap.
67 	 */
68 	pmap_maybethreaded(&vm->vm_pmap);
69 
70 	return vm;
71 }
72 
73 void
74 os_vmspace_destroy(os_vmspace_t *vm)
75 {
76 	pmap_del_all_cpus(vm);
77 	vmspace_rel(vm);
78 }
79 
80 int
81 os_vmspace_fault(os_vmspace_t *vm, vaddr_t va, vm_prot_t prot)
82 {
83 	int fault_flags;
84 
85 	if (prot & VM_PROT_WRITE)
86 		fault_flags = VM_FAULT_DIRTY;
87 	else
88 		fault_flags = VM_FAULT_NORMAL;
89 
90 	return vm_fault(&vm->vm_map, trunc_page(va), prot, fault_flags);
91 }
92 
93 os_vmobj_t *
94 os_vmobj_create(voff_t size)
95 {
96 	struct vm_object *object;
97 
98 	object = default_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0);
99 	vm_object_set_flag(object, OBJ_NOSPLIT);
100 
101 	return object;
102 }
103 
104 void
105 os_vmobj_ref(os_vmobj_t *vmobj)
106 {
107 	vm_object_hold(vmobj);
108 	vm_object_reference_locked(vmobj);
109 	vm_object_drop(vmobj);
110 }
111 
112 void
113 os_vmobj_rel(os_vmobj_t *vmobj)
114 {
115 	vm_object_deallocate(vmobj);
116 }
117 
118 int
119 os_vmobj_map(struct vm_map *map, vaddr_t *addr, vsize_t size, os_vmobj_t *vmobj,
120     voff_t offset, bool wired, bool fixed, bool shared, int prot, int maxprot)
121 {
122 	vm_prot_t vmprot, vmmaxprot;
123 	vm_inherit_t inherit;
124 	vm_offset_t start = *addr;
125 	int rv = KERN_SUCCESS;
126 	int count;
127 
128 	/* Convert prot. */
129 	vmprot = 0;
130 	if (prot & PROT_READ)
131 		vmprot |= VM_PROT_READ;
132 	if (prot & PROT_WRITE)
133 		vmprot |= VM_PROT_WRITE;
134 	if (prot & PROT_EXEC)
135 		vmprot |= VM_PROT_EXECUTE;
136 
137 	/* Convert maxprot. */
138 	vmmaxprot = 0;
139 	if (maxprot & PROT_READ)
140 		vmmaxprot |= VM_PROT_READ;
141 	if (maxprot & PROT_WRITE)
142 		vmmaxprot |= VM_PROT_WRITE;
143 	if (maxprot & PROT_EXEC)
144 		vmmaxprot |= VM_PROT_EXECUTE;
145 
146 	count = vm_map_entry_reserve(MAP_RESERVE_COUNT);
147 	vm_map_lock(map);
148 
149 	if (fixed) {
150 		/*
151 		 * Remove any existing entries in the range, so the new
152 		 * mapping can be created at the requested address.
153 		 */
154 		rv = vm_map_delete(map, start, start + size, &count);
155 	} else {
156 		if (vm_map_findspace(map, start, size, 1, 0, &start))
157 			rv = KERN_NO_SPACE;
158 	}
159 	if (rv != KERN_SUCCESS) {
160 		vm_map_unlock(map);
161 		vm_map_entry_release(count);
162 		return rv;
163 	}
164 
165 	/* Get a reference to the object. */
166 	os_vmobj_ref(vmobj);
167 
168 	/*
169 	 * Map the object. This consumes the reference on success only. On
170 	 * failure we must drop the reference manually.
171 	 */
172 	vm_object_hold(vmobj);
173 	rv = vm_map_insert(map, &count, vmobj, NULL, offset, NULL,
174 	    start, start + size, VM_MAPTYPE_NORMAL, VM_SUBSYS_NVMM,
175 	    vmprot, vmmaxprot, 0);
176 	vm_object_drop(vmobj);
177 	vm_map_unlock(map);
178 	vm_map_entry_release(count);
179 	if (rv != KERN_SUCCESS) {
180 		/* Drop the ref. */
181 		os_vmobj_rel(vmobj);
182 		return rv;
183 	}
184 
185 	inherit = shared ? VM_INHERIT_SHARE : VM_INHERIT_NONE;
186 	rv = vm_map_inherit(map, start, start + size, inherit);
187 	if (rv != KERN_SUCCESS) {
188 		os_vmobj_unmap(map, start, start + size, false);
189 		return rv;
190 	}
191 
192 	if (wired) {
193 		rv = vm_map_kernel_wiring(map, start, start + size, 0);
194 		if (rv != KERN_SUCCESS) {
195 			os_vmobj_unmap(map, start, start + size, false);
196 			return rv;
197 		}
198 	}
199 
200 	*addr = start;
201 	return 0;
202 }
203 
204 void
205 os_vmobj_unmap(struct vm_map *map, vaddr_t start, vaddr_t end, bool wired)
206 {
207 	if (wired) {
208 		/* Unwire kernel mappings before removing. */
209 		vm_map_kernel_wiring(map, start, end, KM_PAGEABLE);
210 	}
211 	vm_map_remove(map, start, end);
212 }
213 
214 void *
215 os_pagemem_zalloc(size_t size)
216 {
217 	void *ret;
218 
219 	/* NOTE: kmem_alloc() may return 0 ! */
220 	ret = (void *)kmem_alloc(kernel_map, roundup(size, PAGE_SIZE),
221 	    VM_SUBSYS_NVMM);
222 
223 	OS_ASSERT((uintptr_t)ret % PAGE_SIZE == 0);
224 
225 	return ret;
226 }
227 
228 void
229 os_pagemem_free(void *ptr, size_t size)
230 {
231 	kmem_free(kernel_map, (vaddr_t)ptr, roundup(size, PAGE_SIZE));
232 }
233 
234 paddr_t
235 os_pa_zalloc(void)
236 {
237 	struct vm_page *pg;
238 
239 	pg = vm_page_alloczwq(0,
240 	    VM_ALLOC_SYSTEM | VM_ALLOC_ZERO | VM_ALLOC_RETRY);
241 
242 	return VM_PAGE_TO_PHYS(pg);
243 }
244 
245 void
246 os_pa_free(paddr_t pa)
247 {
248 	vm_page_freezwq(PHYS_TO_VM_PAGE(pa));
249 }
250 
251 int
252 os_contigpa_zalloc(paddr_t *pa, vaddr_t *va, size_t npages)
253 {
254 	void *addr;
255 
256 	addr = contigmalloc(npages * PAGE_SIZE, M_NVMM, M_WAITOK | M_ZERO,
257 	    0, ~0UL, PAGE_SIZE, 0);
258 	if (addr == NULL)
259 		return ENOMEM;
260 
261 	*va = (vaddr_t)addr;
262 	*pa = vtophys(addr);
263 	return 0;
264 }
265 
266 void
267 os_contigpa_free(paddr_t pa __unused, vaddr_t va, size_t npages)
268 {
269 	contigfree((void *)va, npages * PAGE_SIZE, M_NVMM);
270 }
271 
272 /* -------------------------------------------------------------------------- */
273 
274 #include <sys/conf.h>
275 #include <sys/devfs.h>
276 #include <sys/device.h>
277 #include <sys/fcntl.h>
278 #include <sys/module.h>
279 
280 static d_open_t dfbsd_nvmm_open;
281 static d_ioctl_t dfbsd_nvmm_ioctl;
282 static d_priv_dtor_t dfbsd_nvmm_dtor;
283 
284 static struct dev_ops nvmm_ops = {
285 	{ "nvmm", 0, D_MPSAFE },
286 	.d_open = dfbsd_nvmm_open,
287 	.d_ioctl = dfbsd_nvmm_ioctl,
288 };
289 
290 static int
291 dfbsd_nvmm_open(struct dev_open_args *ap)
292 {
293 	int flags = ap->a_oflags;
294 	struct nvmm_owner *owner;
295 	struct file *fp;
296 	int error;
297 
298 	if (__predict_false(nvmm_impl == NULL))
299 		return ENXIO;
300 	if (!(flags & O_CLOEXEC))
301 		return EINVAL;
302 
303 	if (OFLAGS(flags) & O_WRONLY) {
304 		owner = &nvmm_root_owner;
305 	} else {
306 		owner = os_mem_alloc(sizeof(*owner));
307 		owner->pid = curthread->td_proc->p_pid;
308 	}
309 
310 	fp = ap->a_fpp ? *ap->a_fpp : NULL;
311 	error = devfs_set_cdevpriv(fp, owner, dfbsd_nvmm_dtor);
312 	if (error) {
313 		dfbsd_nvmm_dtor(owner);
314 		return error;
315 	}
316 
317 	return 0;
318 }
319 
320 static void
321 dfbsd_nvmm_dtor(void *arg)
322 {
323 	struct nvmm_owner *owner = arg;
324 
325 	OS_ASSERT(owner != NULL);
326 	nvmm_kill_machines(owner);
327 	if (owner != &nvmm_root_owner) {
328 		os_mem_free(owner, sizeof(*owner));
329 	}
330 }
331 
332 static int
333 dfbsd_nvmm_ioctl(struct dev_ioctl_args *ap)
334 {
335 	unsigned long cmd = ap->a_cmd;
336 	void *data = ap->a_data;
337 	struct file *fp = ap->a_fp;
338 	struct nvmm_owner *owner = NULL;
339 
340 	devfs_get_cdevpriv(fp, (void **)&owner);
341 	OS_ASSERT(owner != NULL);
342 
343 	return nvmm_ioctl(owner, cmd, data);
344 }
345 
346 /* -------------------------------------------------------------------------- */
347 
348 static int
349 nvmm_attach(void)
350 {
351 	int error;
352 
353 	error = nvmm_init();
354 	if (error)
355 		panic("%s: impossible", __func__);
356 	os_printf("nvmm: attached, using backend %s\n", nvmm_impl->name);
357 
358 	return 0;
359 }
360 
361 static int
362 nvmm_detach(void)
363 {
364 	if (os_atomic_load_uint(&nmachines) > 0)
365 		return EBUSY;
366 
367 	nvmm_fini();
368 	return 0;
369 }
370 
371 static int
372 nvmm_modevent(module_t mod __unused, int type, void *data __unused)
373 {
374 	static cdev_t dev = NULL;
375 	int error;
376 
377 	switch (type) {
378 	case MOD_LOAD:
379 		if (nvmm_ident() == NULL) {
380 			os_printf("nvmm: cpu not supported\n");
381 			return ENOTSUP;
382 		}
383 		error = nvmm_attach();
384 		if (error)
385 			return error;
386 
387 		dev = make_dev(&nvmm_ops, 0, UID_ROOT, GID_NVMM, 0640, "nvmm");
388 		if (dev == NULL) {
389 			os_printf("nvmm: unable to create device\n");
390 			error = ENOMEM;
391 		}
392 		break;
393 
394 	case MOD_UNLOAD:
395 		if (dev == NULL)
396 			return 0;
397 		error = nvmm_detach();
398 		if (error == 0)
399 			destroy_dev(dev);
400 		break;
401 
402 	case MOD_SHUTDOWN:
403 		error = 0;
404 		break;
405 
406 	default:
407 		error = EOPNOTSUPP;
408 		break;
409 	}
410 
411 	return error;
412 }
413 
414 static moduledata_t nvmm_moddata = {
415 	.name = "nvmm",
416 	.evhand = nvmm_modevent,
417 	.priv = NULL,
418 };
419 
420 DECLARE_MODULE(nvmm, nvmm_moddata, SI_SUB_PSEUDO, SI_ORDER_ANY);
421 MODULE_VERSION(nvmm, NVMM_KERN_VERSION);
422