1 /* 2 * This file and its contents are supplied under the terms of the 3 * Common Development and Distribution License ("CDDL"), version 1.0. 4 * You may only use this file in accordance with the terms of version 5 * 1.0 of the CDDL. 6 * 7 * A full copy of the text of the CDDL should have accompanied this 8 * source. A copy of the CDDL is also available via the Internet at 9 * http://www.illumos.org/license/CDDL. 10 */ 11 12 /* 13 * Copyright 2016 Joyent, Inc. 14 */ 15 16 /* 17 * VM - Kernel-to-user mapping segment 18 * 19 * The umap segment driver was primarily designed to facilitate the comm page: 20 * a portion of kernel memory shared with userspace so that certain (namely 21 * clock-related) actions could operate without making an expensive trip into 22 * the kernel. 23 * 24 * Since the initial requirements for the comm page are slim, advanced features 25 * of the segment driver such as per-page protection have been left 26 * unimplemented at this time. 27 */ 28 29 30 #include <sys/types.h> 31 #include <sys/param.h> 32 #include <sys/errno.h> 33 #include <sys/cred.h> 34 #include <sys/kmem.h> 35 #include <sys/lgrp.h> 36 #include <sys/mman.h> 37 38 #include <vm/hat.h> 39 #include <vm/as.h> 40 #include <vm/seg.h> 41 #include <vm/seg_kmem.h> 42 #include <vm/seg_umap.h> 43 44 45 static boolean_t segumap_verify_safe(caddr_t, size_t); 46 static int segumap_dup(struct seg *, struct seg *); 47 static int segumap_unmap(struct seg *, caddr_t, size_t); 48 static void segumap_free(struct seg *); 49 static faultcode_t segumap_fault(struct hat *, struct seg *, caddr_t, size_t, 50 enum fault_type, enum seg_rw); 51 static faultcode_t segumap_faulta(struct seg *, caddr_t); 52 static int segumap_setprot(struct seg *, caddr_t, size_t, uint_t); 53 static int segumap_checkprot(struct seg *, caddr_t, size_t, uint_t); 54 static int segumap_sync(struct seg *, caddr_t, size_t, int, uint_t); 55 static size_t segumap_incore(struct seg *, caddr_t, size_t, char *); 56 static int segumap_lockop(struct seg *, caddr_t, size_t, int, int, ulong_t *, 57 size_t); 58 static int segumap_getprot(struct seg *, caddr_t, size_t, uint_t *); 59 static u_offset_t segumap_getoffset(struct seg *, caddr_t); 60 static int segumap_gettype(struct seg *, caddr_t); 61 static int segumap_getvp(struct seg *, caddr_t, struct vnode **); 62 static int segumap_advise(struct seg *, caddr_t, size_t, uint_t); 63 static void segumap_dump(struct seg *); 64 static int segumap_pagelock(struct seg *, caddr_t, size_t, struct page ***, 65 enum lock_type, enum seg_rw); 66 static int segumap_setpagesize(struct seg *, caddr_t, size_t, uint_t); 67 static int segumap_getmemid(struct seg *, caddr_t, memid_t *); 68 static int segumap_capable(struct seg *, segcapability_t); 69 70 static struct seg_ops segumap_ops = { 71 segumap_dup, 72 segumap_unmap, 73 segumap_free, 74 segumap_fault, 75 segumap_faulta, 76 segumap_setprot, 77 segumap_checkprot, 78 NULL, /* kluster: disabled */ 79 NULL, /* swapout: disabled */ 80 segumap_sync, 81 segumap_incore, 82 segumap_lockop, 83 segumap_getprot, 84 segumap_getoffset, 85 segumap_gettype, 86 segumap_getvp, 87 segumap_advise, 88 segumap_dump, 89 segumap_pagelock, 90 segumap_setpagesize, 91 segumap_getmemid, 92 NULL, /* getpolicy: disabled */ 93 segumap_capable, 94 seg_inherit_notsup 95 }; 96 97 98 /* 99 * Create a kernel/user-mapped segment. 100 */ 101 int 102 segumap_create(struct seg *seg, void *argsp) 103 { 104 segumap_crargs_t *a = (struct segumap_crargs *)argsp; 105 segumap_data_t *data; 106 107 ASSERT((uintptr_t)a->kaddr > _userlimit); 108 109 /* 110 * Check several aspects of the mapping request to ensure validity: 111 * - kernel pages must reside entirely in kernel space 112 * - target protection must be user-accessible 113 * - kernel address must be page-aligned 114 * - kernel address must reside inside a "safe" segment 115 */ 116 if ((uintptr_t)a->kaddr <= _userlimit || 117 ((uintptr_t)a->kaddr + seg->s_size) < (uintptr_t)a->kaddr || 118 (a->prot & PROT_USER) == 0 || 119 ((uintptr_t)a->kaddr & PAGEOFFSET) != 0 || 120 !segumap_verify_safe(a->kaddr, seg->s_size)) { 121 return (EINVAL); 122 } 123 124 data = kmem_zalloc(sizeof (*data), KM_SLEEP); 125 rw_init(&data->sud_lock, NULL, RW_DEFAULT, NULL); 126 data->sud_kaddr = a->kaddr; 127 data->sud_prot = a->prot; 128 129 seg->s_ops = &segumap_ops; 130 seg->s_data = data; 131 return (0); 132 } 133 134 static boolean_t 135 segumap_verify_safe(caddr_t kaddr, size_t len) 136 { 137 struct seg *seg; 138 139 /* 140 * Presently, only pages which are backed by segkmem are allowed to be 141 * shared with userspace. This prevents nasty paging behavior with 142 * other drivers such as seg_kp. Furthermore, the backing kernel 143 * segment must completely contain the region to be mapped. 144 * 145 * Failing these checks is fatal for now since such mappings are done 146 * in a very limited context from the kernel. 147 */ 148 AS_LOCK_ENTER(&kas, RW_READER); 149 seg = as_segat(&kas, kaddr); 150 VERIFY(seg != NULL); 151 VERIFY(seg->s_base + seg->s_size >= kaddr + len); 152 VERIFY(seg->s_ops == &segkmem_ops); 153 AS_LOCK_EXIT(&kas); 154 155 return (B_TRUE); 156 } 157 158 static int 159 segumap_dup(struct seg *seg, struct seg *newseg) 160 { 161 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 162 segumap_data_t *newsud; 163 164 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 165 166 newsud = kmem_zalloc(sizeof (segumap_data_t), KM_SLEEP); 167 rw_init(&newsud->sud_lock, NULL, RW_DEFAULT, NULL); 168 newsud->sud_kaddr = sud->sud_kaddr; 169 newsud->sud_prot = sud->sud_prot; 170 171 newseg->s_ops = seg->s_ops; 172 newseg->s_data = newsud; 173 return (0); 174 } 175 176 static int 177 segumap_unmap(struct seg *seg, caddr_t addr, size_t len) 178 { 179 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 180 181 ASSERT(seg->s_as && AS_WRITE_HELD(seg->s_as)); 182 183 /* Only allow unmap of entire segment */ 184 if (addr != seg->s_base || len != seg->s_size) { 185 return (EINVAL); 186 } 187 if (sud->sud_softlockcnt != 0) { 188 return (EAGAIN); 189 } 190 191 /* 192 * Unconditionally unload the entire segment range. 193 */ 194 hat_unload(seg->s_as->a_hat, addr, len, HAT_UNLOAD_UNMAP); 195 196 seg_free(seg); 197 return (0); 198 } 199 200 static void 201 segumap_free(struct seg *seg) 202 { 203 segumap_data_t *data = (segumap_data_t *)seg->s_data; 204 205 ASSERT(data != NULL); 206 207 rw_destroy(&data->sud_lock); 208 VERIFY(data->sud_softlockcnt == 0); 209 kmem_free(data, sizeof (*data)); 210 seg->s_data = NULL; 211 } 212 213 /* ARGSUSED */ 214 static faultcode_t 215 segumap_fault(struct hat *hat, struct seg *seg, caddr_t addr, size_t len, 216 enum fault_type type, enum seg_rw tw) 217 { 218 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 219 220 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 221 222 if (type == F_PROT) { 223 /* 224 * Since protection on the segment is fixed, there is nothing 225 * to do but report an error for protection faults. 226 */ 227 return (FC_PROT); 228 } else if (type == F_SOFTUNLOCK) { 229 size_t plen = btop(len); 230 231 rw_enter(&sud->sud_lock, RW_WRITER); 232 VERIFY(sud->sud_softlockcnt >= plen); 233 sud->sud_softlockcnt -= plen; 234 rw_exit(&sud->sud_lock); 235 return (0); 236 } 237 238 ASSERT(type == F_INVAL || type == F_SOFTLOCK); 239 rw_enter(&sud->sud_lock, RW_WRITER); 240 241 if (type == F_INVAL || 242 (type == F_SOFTLOCK && sud->sud_softlockcnt == 0)) { 243 /* 244 * Load the (entire) segment into the HAT. 245 * 246 * It's possible that threads racing into as_fault will cause 247 * seg_umap to load the same range multiple times in quick 248 * succession. Redundant hat_devload operations are safe. 249 */ 250 for (uintptr_t i = 0; i < seg->s_size; i += PAGESIZE) { 251 pfn_t pfn; 252 253 pfn = hat_getpfnum(kas.a_hat, sud->sud_kaddr + i); 254 VERIFY(pfn != PFN_INVALID); 255 hat_devload(seg->s_as->a_hat, seg->s_base + i, 256 PAGESIZE, pfn, sud->sud_prot, HAT_LOAD); 257 } 258 } 259 if (type == F_SOFTLOCK) { 260 size_t nval = sud->sud_softlockcnt + btop(len); 261 262 if (sud->sud_softlockcnt >= nval) { 263 rw_exit(&sud->sud_lock); 264 return (FC_MAKE_ERR(EOVERFLOW)); 265 } 266 sud->sud_softlockcnt = nval; 267 } 268 269 rw_exit(&sud->sud_lock); 270 return (0); 271 } 272 273 /* ARGSUSED */ 274 static faultcode_t 275 segumap_faulta(struct seg *seg, caddr_t addr) 276 { 277 /* Do nothing since asynch pagefault should not load translation. */ 278 return (0); 279 } 280 281 /* ARGSUSED */ 282 static int 283 segumap_setprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 284 { 285 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 286 287 /* 288 * The seg_umap driver does not yet allow protection to be changed. 289 */ 290 return (EACCES); 291 } 292 293 /* ARGSUSED */ 294 static int 295 segumap_checkprot(struct seg *seg, caddr_t addr, size_t len, uint_t prot) 296 { 297 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 298 int error = 0; 299 300 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 301 302 rw_enter(&sud->sud_lock, RW_READER); 303 if ((sud->sud_prot & prot) != prot) { 304 error = EACCES; 305 } 306 rw_exit(&sud->sud_lock); 307 return (error); 308 } 309 310 /* ARGSUSED */ 311 static int 312 segumap_sync(struct seg *seg, caddr_t addr, size_t len, int attr, uint_t flags) 313 { 314 /* Always succeed since there are no backing store to sync */ 315 return (0); 316 } 317 318 /* ARGSUSED */ 319 static size_t 320 segumap_incore(struct seg *seg, caddr_t addr, size_t len, char *vec) 321 { 322 size_t sz = 0; 323 324 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 325 326 len = (len + PAGEOFFSET) & PAGEMASK; 327 while (len > 0) { 328 *vec = 1; 329 sz += PAGESIZE; 330 vec++; 331 len -= PAGESIZE; 332 } 333 return (sz); 334 } 335 336 /* ARGSUSED */ 337 static int 338 segumap_lockop(struct seg *seg, caddr_t addr, size_t len, int attr, int op, 339 ulong_t *lockmap, size_t pos) 340 { 341 /* Report success since kernel pages are always in memory. */ 342 return (0); 343 } 344 345 static int 346 segumap_getprot(struct seg *seg, caddr_t addr, size_t len, uint_t *protv) 347 { 348 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 349 size_t pgno; 350 uint_t prot; 351 352 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 353 354 rw_enter(&sud->sud_lock, RW_READER); 355 prot = sud->sud_prot; 356 rw_exit(&sud->sud_lock); 357 358 /* 359 * Reporting protection is simple since it is not tracked per-page. 360 */ 361 pgno = seg_page(seg, addr + len) - seg_page(seg, addr) + 1; 362 while (pgno > 0) { 363 protv[--pgno] = prot; 364 } 365 return (0); 366 } 367 368 /* ARGSUSED */ 369 static u_offset_t 370 segumap_getoffset(struct seg *seg, caddr_t addr) 371 { 372 /* 373 * To avoid leaking information about the layout of the kernel address 374 * space, always report '0' as the offset. 375 */ 376 return (0); 377 } 378 379 /* ARGSUSED */ 380 static int 381 segumap_gettype(struct seg *seg, caddr_t addr) 382 { 383 /* 384 * Since already-existing kernel pages are being mapped into userspace, 385 * always report the segment type as shared. 386 */ 387 return (MAP_SHARED); 388 } 389 390 /* ARGSUSED */ 391 static int 392 segumap_getvp(struct seg *seg, caddr_t addr, struct vnode **vpp) 393 { 394 ASSERT(seg->s_as && AS_LOCK_HELD(seg->s_as)); 395 396 *vpp = NULL; 397 return (0); 398 } 399 400 /* ARGSUSED */ 401 static int 402 segumap_advise(struct seg *seg, caddr_t addr, size_t len, uint_t behav) 403 { 404 if (behav == MADV_PURGE) { 405 /* Purge does not make sense for this mapping */ 406 return (EINVAL); 407 } 408 /* Indicate success for everything else. */ 409 return (0); 410 } 411 412 /* ARGSUSED */ 413 static void 414 segumap_dump(struct seg *seg) 415 { 416 /* 417 * Since this is a mapping to share kernel data with userspace, nothing 418 * additional should be dumped. 419 */ 420 } 421 422 /* ARGSUSED */ 423 static int 424 segumap_pagelock(struct seg *seg, caddr_t addr, size_t len, struct page ***ppp, 425 enum lock_type type, enum seg_rw rw) 426 { 427 return (ENOTSUP); 428 } 429 430 /* ARGSUSED */ 431 static int 432 segumap_setpagesize(struct seg *seg, caddr_t addr, size_t len, uint_t szc) 433 { 434 return (ENOTSUP); 435 } 436 437 static int 438 segumap_getmemid(struct seg *seg, caddr_t addr, memid_t *memidp) 439 { 440 segumap_data_t *sud = (segumap_data_t *)seg->s_data; 441 442 memidp->val[0] = (uintptr_t)sud->sud_kaddr; 443 memidp->val[1] = (uintptr_t)(addr - seg->s_base); 444 return (0); 445 } 446 447 /* ARGSUSED */ 448 static int 449 segumap_capable(struct seg *seg, segcapability_t capability) 450 { 451 /* no special capablities */ 452 return (0); 453 } 454