1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2021 The DragonFly Project. All rights reserved. 5 * 6 * This code is derived from software contributed to The DragonFly Project 7 * by Aaron LI <aly@aaronly.me> 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in 17 * the documentation and/or other materials provided with the 18 * distribution. 19 * 3. Neither the name of The DragonFly Project nor the names of its 20 * contributors may be used to endorse or promote products derived 21 * from this software without specific, prior written permission. 22 * 23 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 26 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 27 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 28 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING, 29 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 30 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 31 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 32 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 33 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/kernel.h> 40 #include <sys/mman.h> 41 42 #include "nvmm.h" 43 #include "nvmm_os.h" 44 #include "nvmm_internal.h" 45 46 MALLOC_DEFINE(M_NVMM, "nvmm", "NVMM data"); 47 48 /* 49 * NVMM expects VM functions to return 0 on success, but DragonFly's VM 50 * functions return KERN_SUCCESS. Although it's also defined to be 0, 51 * assert it to be future-proofing. 52 */ 53 CTASSERT(KERN_SUCCESS == 0); 54 55 os_vmspace_t * 56 os_vmspace_create(vaddr_t vmin, vaddr_t vmax) 57 { 58 struct vmspace *vm; 59 60 vm = vmspace_alloc(vmin, vmax); 61 62 /* 63 * Set PMAP_MULTI on the backing pmap for the machine. Only 64 * pmap changes to the backing pmap for the machine affect the 65 * guest. Changes to the host's pmap do not affect the guest's 66 * backing pmap. 67 */ 68 pmap_maybethreaded(&vm->vm_pmap); 69 70 return vm; 71 } 72 73 void 74 os_vmspace_destroy(os_vmspace_t *vm) 75 { 76 pmap_del_all_cpus(vm); 77 vmspace_rel(vm); 78 } 79 80 int 81 os_vmspace_fault(os_vmspace_t *vm, vaddr_t va, vm_prot_t prot) 82 { 83 int fault_flags; 84 85 if (prot & VM_PROT_WRITE) 86 fault_flags = VM_FAULT_DIRTY; 87 else 88 fault_flags = VM_FAULT_NORMAL; 89 90 return vm_fault(&vm->vm_map, trunc_page(va), prot, fault_flags); 91 } 92 93 os_vmobj_t * 94 os_vmobj_create(voff_t size) 95 { 96 struct vm_object *object; 97 98 object = default_pager_alloc(NULL, size, VM_PROT_DEFAULT, 0); 99 vm_object_set_flag(object, OBJ_NOSPLIT); 100 101 return object; 102 } 103 104 void 105 os_vmobj_ref(os_vmobj_t *vmobj) 106 { 107 vm_object_hold(vmobj); 108 vm_object_reference_locked(vmobj); 109 vm_object_drop(vmobj); 110 } 111 112 void 113 os_vmobj_rel(os_vmobj_t *vmobj) 114 { 115 vm_object_deallocate(vmobj); 116 } 117 118 int 119 os_vmobj_map(struct vm_map *map, vaddr_t *addr, vsize_t size, os_vmobj_t *vmobj, 120 voff_t offset, bool wired, bool fixed, bool shared, int prot, int maxprot) 121 { 122 vm_prot_t vmprot, vmmaxprot; 123 vm_inherit_t inherit; 124 vm_offset_t start = *addr; 125 int rv = KERN_SUCCESS; 126 int count; 127 128 /* Convert prot. */ 129 vmprot = 0; 130 if (prot & PROT_READ) 131 vmprot |= VM_PROT_READ; 132 if (prot & PROT_WRITE) 133 vmprot |= VM_PROT_WRITE; 134 if (prot & PROT_EXEC) 135 vmprot |= VM_PROT_EXECUTE; 136 137 /* Convert maxprot. */ 138 vmmaxprot = 0; 139 if (maxprot & PROT_READ) 140 vmmaxprot |= VM_PROT_READ; 141 if (maxprot & PROT_WRITE) 142 vmmaxprot |= VM_PROT_WRITE; 143 if (maxprot & PROT_EXEC) 144 vmmaxprot |= VM_PROT_EXECUTE; 145 146 count = vm_map_entry_reserve(MAP_RESERVE_COUNT); 147 vm_map_lock(map); 148 149 if (fixed) { 150 /* 151 * Remove any existing entries in the range, so the new 152 * mapping can be created at the requested address. 153 */ 154 rv = vm_map_delete(map, start, start + size, &count); 155 } else { 156 if (vm_map_findspace(map, start, size, 1, 0, &start)) 157 rv = KERN_NO_SPACE; 158 } 159 if (rv != KERN_SUCCESS) { 160 vm_map_unlock(map); 161 vm_map_entry_release(count); 162 return rv; 163 } 164 165 /* Get a reference to the object. */ 166 os_vmobj_ref(vmobj); 167 168 /* 169 * Map the object. This consumes the reference on success only. On 170 * failure we must drop the reference manually. 171 */ 172 vm_object_hold(vmobj); 173 rv = vm_map_insert(map, &count, vmobj, NULL, offset, NULL, 174 start, start + size, VM_MAPTYPE_NORMAL, VM_SUBSYS_NVMM, 175 vmprot, vmmaxprot, 0); 176 vm_object_drop(vmobj); 177 vm_map_unlock(map); 178 vm_map_entry_release(count); 179 if (rv != KERN_SUCCESS) { 180 /* Drop the ref. */ 181 os_vmobj_rel(vmobj); 182 return rv; 183 } 184 185 inherit = shared ? VM_INHERIT_SHARE : VM_INHERIT_NONE; 186 rv = vm_map_inherit(map, start, start + size, inherit); 187 if (rv != KERN_SUCCESS) { 188 os_vmobj_unmap(map, start, start + size, false); 189 return rv; 190 } 191 192 if (wired) { 193 rv = vm_map_kernel_wiring(map, start, start + size, 0); 194 if (rv != KERN_SUCCESS) { 195 os_vmobj_unmap(map, start, start + size, false); 196 return rv; 197 } 198 } 199 200 *addr = start; 201 return 0; 202 } 203 204 void 205 os_vmobj_unmap(struct vm_map *map, vaddr_t start, vaddr_t end, bool wired) 206 { 207 if (wired) { 208 /* Unwire kernel mappings before removing. */ 209 vm_map_kernel_wiring(map, start, end, KM_PAGEABLE); 210 } 211 vm_map_remove(map, start, end); 212 } 213 214 void * 215 os_pagemem_zalloc(size_t size) 216 { 217 void *ret; 218 219 /* NOTE: kmem_alloc() may return 0 ! */ 220 ret = (void *)kmem_alloc(kernel_map, roundup(size, PAGE_SIZE), 221 VM_SUBSYS_NVMM); 222 223 OS_ASSERT((uintptr_t)ret % PAGE_SIZE == 0); 224 225 return ret; 226 } 227 228 void 229 os_pagemem_free(void *ptr, size_t size) 230 { 231 kmem_free(kernel_map, (vaddr_t)ptr, roundup(size, PAGE_SIZE)); 232 } 233 234 paddr_t 235 os_pa_zalloc(void) 236 { 237 struct vm_page *pg; 238 239 pg = vm_page_alloczwq(0, 240 VM_ALLOC_SYSTEM | VM_ALLOC_ZERO | VM_ALLOC_RETRY); 241 242 return VM_PAGE_TO_PHYS(pg); 243 } 244 245 void 246 os_pa_free(paddr_t pa) 247 { 248 vm_page_freezwq(PHYS_TO_VM_PAGE(pa)); 249 } 250 251 int 252 os_contigpa_zalloc(paddr_t *pa, vaddr_t *va, size_t npages) 253 { 254 void *addr; 255 256 addr = contigmalloc(npages * PAGE_SIZE, M_NVMM, M_WAITOK | M_ZERO, 257 0, ~0UL, PAGE_SIZE, 0); 258 if (addr == NULL) 259 return ENOMEM; 260 261 *va = (vaddr_t)addr; 262 *pa = vtophys(addr); 263 return 0; 264 } 265 266 void 267 os_contigpa_free(paddr_t pa __unused, vaddr_t va, size_t npages) 268 { 269 contigfree((void *)va, npages * PAGE_SIZE, M_NVMM); 270 } 271 272 /* -------------------------------------------------------------------------- */ 273 274 #include <sys/conf.h> 275 #include <sys/devfs.h> 276 #include <sys/device.h> 277 #include <sys/fcntl.h> 278 #include <sys/module.h> 279 280 static d_open_t dfbsd_nvmm_open; 281 static d_ioctl_t dfbsd_nvmm_ioctl; 282 static d_priv_dtor_t dfbsd_nvmm_dtor; 283 284 static struct dev_ops nvmm_ops = { 285 { "nvmm", 0, D_MPSAFE }, 286 .d_open = dfbsd_nvmm_open, 287 .d_ioctl = dfbsd_nvmm_ioctl, 288 }; 289 290 static int 291 dfbsd_nvmm_open(struct dev_open_args *ap) 292 { 293 int flags = ap->a_oflags; 294 struct nvmm_owner *owner; 295 struct file *fp; 296 int error; 297 298 if (__predict_false(nvmm_impl == NULL)) 299 return ENXIO; 300 if (!(flags & O_CLOEXEC)) 301 return EINVAL; 302 303 if (OFLAGS(flags) & O_WRONLY) { 304 owner = &nvmm_root_owner; 305 } else { 306 owner = os_mem_alloc(sizeof(*owner)); 307 owner->pid = curthread->td_proc->p_pid; 308 } 309 310 fp = ap->a_fpp ? *ap->a_fpp : NULL; 311 error = devfs_set_cdevpriv(fp, owner, dfbsd_nvmm_dtor); 312 if (error) { 313 dfbsd_nvmm_dtor(owner); 314 return error; 315 } 316 317 return 0; 318 } 319 320 static void 321 dfbsd_nvmm_dtor(void *arg) 322 { 323 struct nvmm_owner *owner = arg; 324 325 OS_ASSERT(owner != NULL); 326 nvmm_kill_machines(owner); 327 if (owner != &nvmm_root_owner) { 328 os_mem_free(owner, sizeof(*owner)); 329 } 330 } 331 332 static int 333 dfbsd_nvmm_ioctl(struct dev_ioctl_args *ap) 334 { 335 unsigned long cmd = ap->a_cmd; 336 void *data = ap->a_data; 337 struct file *fp = ap->a_fp; 338 struct nvmm_owner *owner = NULL; 339 340 devfs_get_cdevpriv(fp, (void **)&owner); 341 OS_ASSERT(owner != NULL); 342 343 return nvmm_ioctl(owner, cmd, data); 344 } 345 346 /* -------------------------------------------------------------------------- */ 347 348 static int 349 nvmm_attach(void) 350 { 351 int error; 352 353 error = nvmm_init(); 354 if (error) 355 panic("%s: impossible", __func__); 356 os_printf("nvmm: attached, using backend %s\n", nvmm_impl->name); 357 358 return 0; 359 } 360 361 static int 362 nvmm_detach(void) 363 { 364 if (os_atomic_load_uint(&nmachines) > 0) 365 return EBUSY; 366 367 nvmm_fini(); 368 return 0; 369 } 370 371 static int 372 nvmm_modevent(module_t mod __unused, int type, void *data __unused) 373 { 374 static cdev_t dev = NULL; 375 int error; 376 377 switch (type) { 378 case MOD_LOAD: 379 if (nvmm_ident() == NULL) { 380 os_printf("nvmm: cpu not supported\n"); 381 return ENOTSUP; 382 } 383 error = nvmm_attach(); 384 if (error) 385 return error; 386 387 dev = make_dev(&nvmm_ops, 0, UID_ROOT, GID_NVMM, 0640, "nvmm"); 388 if (dev == NULL) { 389 os_printf("nvmm: unable to create device\n"); 390 error = ENOMEM; 391 } 392 break; 393 394 case MOD_UNLOAD: 395 if (dev == NULL) 396 return 0; 397 error = nvmm_detach(); 398 if (error == 0) 399 destroy_dev(dev); 400 break; 401 402 case MOD_SHUTDOWN: 403 error = 0; 404 break; 405 406 default: 407 error = EOPNOTSUPP; 408 break; 409 } 410 411 return error; 412 } 413 414 static moduledata_t nvmm_moddata = { 415 .name = "nvmm", 416 .evhand = nvmm_modevent, 417 .priv = NULL, 418 }; 419 420 DECLARE_MODULE(nvmm, nvmm_moddata, SI_SUB_PSEUDO, SI_ORDER_ANY); 421 MODULE_VERSION(nvmm, NVMM_KERN_VERSION); 422