xref: /openbsd/sys/uvm/uvm_mmap.c (revision 3e142e7f)
1 /*	$OpenBSD: uvm_mmap.c,v 1.193 2024/12/14 12:07:38 mvs Exp $	*/
2 /*	$NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $	*/
3 
4 /*
5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
6  * Copyright (c) 1991, 1993 The Regents of the University of California.
7  * Copyright (c) 1988 University of Utah.
8  *
9  * All rights reserved.
10  *
11  * This code is derived from software contributed to Berkeley by
12  * the Systems Programming Group of the University of Utah Computer
13  * Science Department.
14  *
15  * Redistribution and use in source and binary forms, with or without
16  * modification, are permitted provided that the following conditions
17  * are met:
18  * 1. Redistributions of source code must retain the above copyright
19  *    notice, this list of conditions and the following disclaimer.
20  * 2. Redistributions in binary form must reproduce the above copyright
21  *    notice, this list of conditions and the following disclaimer in the
22  *    documentation and/or other materials provided with the distribution.
23  * 3. Neither the name of the University nor the names of its contributors
24  *    may be used to endorse or promote products derived from this software
25  *    without specific prior written permission.
26  *
27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37  * SUCH DAMAGE.
38  *
39  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
40  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
41  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
42  */
43 
44 /*
45  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
46  * function.
47  */
48 #include <sys/param.h>
49 #include <sys/systm.h>
50 #include <sys/fcntl.h>
51 #include <sys/file.h>
52 #include <sys/filedesc.h>
53 #include <sys/resourcevar.h>
54 #include <sys/mman.h>
55 #include <sys/mount.h>
56 #include <sys/proc.h>
57 #include <sys/malloc.h>
58 #include <sys/vnode.h>
59 #include <sys/conf.h>
60 #include <sys/signalvar.h>
61 #include <sys/syslog.h>
62 #include <sys/stat.h>
63 #include <sys/specdev.h>
64 #include <sys/stdint.h>
65 #include <sys/pledge.h>
66 #include <sys/unistd.h>		/* for KBIND* */
67 #include <sys/user.h>
68 
69 #include <machine/exec.h>	/* for __LDPGSZ */
70 
71 #include <sys/syscall.h>
72 #include <sys/syscallargs.h>
73 
74 #include <uvm/uvm.h>
75 #include <uvm/uvm_device.h>
76 #include <uvm/uvm_vnode.h>
77 
78 /*
79  * Locks used to protect data:
80  *	a	atomic
81  */
82 
83 int uvm_mmapanon(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
84     vsize_t, struct proc *);
85 int uvm_mmapfile(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
86     struct vnode *, voff_t, vsize_t, struct proc *);
87 
88 
89 /*
90  * Page align addr and size, returning EINVAL on wraparound.
91  */
92 #define ALIGN_ADDR(addr, size, pageoff)	do {				\
93 	pageoff = (addr & PAGE_MASK);					\
94 	if (pageoff != 0) {						\
95 		if (size > SIZE_MAX - pageoff)				\
96 			return EINVAL;	/* wraparound */	\
97 		addr -= pageoff;					\
98 		size += pageoff;					\
99 	}								\
100 	if (size != 0) {						\
101 		size = (vsize_t)round_page(size);			\
102 		if (size == 0)						\
103 			return EINVAL;	/* wraparound */	\
104 	}								\
105 } while (0)
106 
107 /*
108  * sys_mquery: provide mapping hints to applications that do fixed mappings
109  *
110  * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
111  *	don't care about PMAP_PREFER or such)
112  * addr: hint where we'd like to place the mapping.
113  * size: size of the mapping
114  * fd: fd of the file we want to map
115  * off: offset within the file
116  */
117 int
sys_mquery(struct proc * p,void * v,register_t * retval)118 sys_mquery(struct proc *p, void *v, register_t *retval)
119 {
120 	struct sys_mquery_args /* {
121 		syscallarg(void *) addr;
122 		syscallarg(size_t) len;
123 		syscallarg(int) prot;
124 		syscallarg(int) flags;
125 		syscallarg(int) fd;
126 		syscallarg(off_t) pos;
127 	} */ *uap = v;
128 	struct file *fp;
129 	voff_t uoff;
130 	int error;
131 	vaddr_t vaddr;
132 	int flags = 0;
133 	vsize_t size;
134 	vm_prot_t prot;
135 	int fd;
136 
137 	vaddr = (vaddr_t) SCARG(uap, addr);
138 	prot = SCARG(uap, prot);
139 	size = (vsize_t) SCARG(uap, len);
140 	fd = SCARG(uap, fd);
141 
142 	if ((prot & PROT_MASK) != prot)
143 		return EINVAL;
144 
145 	if (SCARG(uap, flags) & MAP_FIXED)
146 		flags |= UVM_FLAG_FIXED;
147 
148 	if (fd >= 0) {
149 		if ((error = getvnode(p, fd, &fp)) != 0)
150 			return error;
151 		uoff = SCARG(uap, pos);
152 	} else {
153 		fp = NULL;
154 		uoff = UVM_UNKNOWN_OFFSET;
155 	}
156 
157 	if (vaddr == 0)
158 		vaddr = uvm_map_hint(p->p_vmspace, prot, VM_MIN_ADDRESS,
159 		    VM_MAXUSER_ADDRESS);
160 
161 	error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff,
162 	    flags);
163 	if (error == 0)
164 		*retval = (register_t)(vaddr);
165 
166 	if (fp != NULL)
167 		FRELE(fp, p);
168 	return error;
169 }
170 
171 int	uvm_wxabort;	/* [a] */
172 
173 /*
174  * W^X violations are only allowed on permitted filesystems.
175  */
176 static inline int
uvm_wxcheck(struct proc * p,char * call)177 uvm_wxcheck(struct proc *p, char *call)
178 {
179 	struct process *pr = p->p_p;
180 	int wxallowed = (pr->ps_textvp->v_mount &&
181 	    (pr->ps_textvp->v_mount->mnt_flag & MNT_WXALLOWED));
182 
183 	if (wxallowed && (pr->ps_flags & PS_WXNEEDED))
184 		return 0;
185 
186 	if (atomic_load_int(&uvm_wxabort)) {
187 		KERNEL_LOCK();
188 		/* Report W^X failures */
189 		if (pr->ps_wxcounter++ == 0)
190 			log(LOG_NOTICE, "%s(%d): %s W^X violation\n",
191 			    pr->ps_comm, pr->ps_pid, call);
192 		/* Send uncatchable SIGABRT for coredump */
193 		sigexit(p, SIGABRT);
194 		KERNEL_UNLOCK();
195 	}
196 
197 	return ENOTSUP;
198 }
199 
200 /*
201  * sys_mmap: mmap system call.
202  *
203  * => file offset and address may not be page aligned
204  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
205  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
206  *      and the return value is adjusted up by the page offset.
207  */
208 int
sys_mmap(struct proc * p,void * v,register_t * retval)209 sys_mmap(struct proc *p, void *v, register_t *retval)
210 {
211 	struct sys_mmap_args /* {
212 		syscallarg(void *) addr;
213 		syscallarg(size_t) len;
214 		syscallarg(int) prot;
215 		syscallarg(int) flags;
216 		syscallarg(int) fd;
217 		syscallarg(off_t) pos;
218 	} */ *uap = v;
219 	vaddr_t addr;
220 	struct vattr va;
221 	off_t pos;
222 	vsize_t limit, pageoff, size;
223 	vm_prot_t prot, maxprot;
224 	int flags, fd;
225 	vaddr_t vm_min_address = VM_MIN_ADDRESS;
226 	struct filedesc *fdp = p->p_fd;
227 	struct file *fp = NULL;
228 	struct vnode *vp;
229 	int error;
230 
231 	/* first, extract syscall args from the uap. */
232 	addr = (vaddr_t) SCARG(uap, addr);
233 	size = (vsize_t) SCARG(uap, len);
234 	prot = SCARG(uap, prot);
235 	flags = SCARG(uap, flags);
236 	fd = SCARG(uap, fd);
237 	pos = SCARG(uap, pos);
238 
239 	/*
240 	 * Validate the flags.
241 	 */
242 	if ((prot & PROT_MASK) != prot)
243 		return EINVAL;
244 	if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
245 	    (error = uvm_wxcheck(p, "mmap")))
246 		return error;
247 
248 	if ((flags & MAP_FLAGMASK) != flags)
249 		return EINVAL;
250 	if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
251 		return EINVAL;
252 	if ((flags & (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE)
253 		return EINVAL;
254 	if (flags & MAP_STACK) {
255 		if ((flags & (MAP_ANON|MAP_PRIVATE)) != (MAP_ANON|MAP_PRIVATE))
256 			return EINVAL;
257 		if (flags & ~(MAP_STACK|MAP_FIXED|MAP_ANON|MAP_PRIVATE))
258 			return EINVAL;
259 		if (pos != 0)
260 			return EINVAL;
261 		if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
262 			return EINVAL;
263 	}
264 	if (size == 0)
265 		return EINVAL;
266 
267 	error = pledge_protexec(p, prot);
268 	if (error)
269 		return error;
270 
271 	/* align file position and save offset.  adjust size. */
272 	ALIGN_ADDR(pos, size, pageoff);
273 
274 	/* now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" */
275 	if (flags & MAP_FIXED) {
276 		/* adjust address by the same amount as we did the offset */
277 		addr -= pageoff;
278 		if (addr & PAGE_MASK)
279 			return EINVAL;		/* not page aligned */
280 
281 		if (addr > SIZE_MAX - size)
282 			return EINVAL;		/* no wrapping! */
283 		if (VM_MAXUSER_ADDRESS > 0 &&
284 		    (addr + size) > VM_MAXUSER_ADDRESS)
285 			return EINVAL;
286 		if (vm_min_address > 0 && addr < vm_min_address)
287 			return EINVAL;
288 	}
289 
290 	/* check for file mappings (i.e. not anonymous) and verify file. */
291 	if ((flags & MAP_ANON) == 0) {
292 		KERNEL_LOCK();
293 		if ((fp = fd_getfile(fdp, fd)) == NULL) {
294 			error = EBADF;
295 			goto out;
296 		}
297 
298 		if (fp->f_type != DTYPE_VNODE) {
299 			error = ENODEV;		/* only mmap vnodes! */
300 			goto out;
301 		}
302 		vp = (struct vnode *)fp->f_data;	/* convert to vnode */
303 
304 		if (vp->v_type != VREG && vp->v_type != VCHR &&
305 		    vp->v_type != VBLK) {
306 			error = ENODEV; /* only REG/CHR/BLK support mmap */
307 			goto out;
308 		}
309 
310 		if (vp->v_type == VREG && (pos + size) < pos) {
311 			error = EINVAL;		/* no offset wrapping */
312 			goto out;
313 		}
314 
315 		/* special case: catch SunOS style /dev/zero */
316 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
317 			flags |= MAP_ANON;
318 			FRELE(fp, p);
319 			fp = NULL;
320 			KERNEL_UNLOCK();
321 			goto is_anon;
322 		}
323 
324 		/*
325 		 * Old programs may not select a specific sharing type, so
326 		 * default to an appropriate one.
327 		 */
328 		if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
329 #if defined(DEBUG)
330 			printf("WARNING: defaulted mmap() share type to"
331 			    " %s (pid %d comm %s)\n",
332 			    vp->v_type == VCHR ? "MAP_SHARED" : "MAP_PRIVATE",
333 			    p->p_p->ps_pid, p->p_p->ps_comm);
334 #endif
335 			if (vp->v_type == VCHR)
336 				flags |= MAP_SHARED;	/* for a device */
337 			else
338 				flags |= MAP_PRIVATE;	/* for a file */
339 		}
340 
341 		/*
342 		 * MAP_PRIVATE device mappings don't make sense (and aren't
343 		 * supported anyway).  However, some programs rely on this,
344 		 * so just change it to MAP_SHARED.
345 		 */
346 		if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
347 			flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
348 		}
349 
350 		/* now check protection */
351 		maxprot = PROT_EXEC;
352 
353 		/* check read access */
354 		if (fp->f_flag & FREAD)
355 			maxprot |= PROT_READ;
356 		else if (prot & PROT_READ) {
357 			error = EACCES;
358 			goto out;
359 		}
360 
361 		/* check write access, shared case first */
362 		if (flags & MAP_SHARED) {
363 			/*
364 			 * if the file is writable, only add PROT_WRITE to
365 			 * maxprot if the file is not immutable, append-only.
366 			 * otherwise, if we have asked for PROT_WRITE, return
367 			 * EPERM.
368 			 */
369 			if (fp->f_flag & FWRITE) {
370 				error = VOP_GETATTR(vp, &va, p->p_ucred, p);
371 				if (error)
372 					goto out;
373 				if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
374 					maxprot |= PROT_WRITE;
375 				else if (prot & PROT_WRITE) {
376 					error = EPERM;
377 					goto out;
378 				}
379 			} else if (prot & PROT_WRITE) {
380 				error = EACCES;
381 				goto out;
382 			}
383 		} else {
384 			/* MAP_PRIVATE mappings can always write to */
385 			maxprot |= PROT_WRITE;
386 		}
387 		if ((flags & __MAP_NOFAULT) != 0 ||
388 		    ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
389 			limit = lim_cur(RLIMIT_DATA);
390 			if (limit < size ||
391 			    limit - size < ptoa(p->p_vmspace->vm_dused)) {
392 				error = ENOMEM;
393 				goto out;
394 			}
395 		}
396 		error = uvm_mmapfile(&p->p_vmspace->vm_map, &addr, size, prot,
397 		    maxprot, flags, vp, pos, lim_cur(RLIMIT_MEMLOCK), p);
398 		FRELE(fp, p);
399 		KERNEL_UNLOCK();
400 	} else {		/* MAP_ANON case */
401 		if (fd != -1)
402 			return EINVAL;
403 
404 is_anon:	/* label for SunOS style /dev/zero */
405 
406 		/* __MAP_NOFAULT only makes sense with a backing object */
407 		if ((flags & __MAP_NOFAULT) != 0)
408 			return EINVAL;
409 
410 		if (prot != PROT_NONE || (flags & MAP_SHARED)) {
411 			limit = lim_cur(RLIMIT_DATA);
412 			if (limit < size ||
413 			    limit - size < ptoa(p->p_vmspace->vm_dused)) {
414 				return ENOMEM;
415 			}
416 		}
417 
418 		/*
419 		 * We've been treating (MAP_SHARED|MAP_PRIVATE) == 0 as
420 		 * MAP_PRIVATE, so make that clear.
421 		 */
422 		if ((flags & MAP_SHARED) == 0)
423 			flags |= MAP_PRIVATE;
424 
425 		maxprot = PROT_MASK;
426 		error = uvm_mmapanon(&p->p_vmspace->vm_map, &addr, size, prot,
427 		    maxprot, flags, lim_cur(RLIMIT_MEMLOCK), p);
428 	}
429 
430 	if (error == 0)
431 		/* remember to add offset */
432 		*retval = (register_t)(addr + pageoff);
433 
434 	return error;
435 
436 out:
437 	KERNEL_UNLOCK();
438 	if (fp)
439 		FRELE(fp, p);
440 	return error;
441 }
442 
443 /*
444  * sys_msync: the msync system call (a front-end for flush)
445  */
446 
447 int
sys_msync(struct proc * p,void * v,register_t * retval)448 sys_msync(struct proc *p, void *v, register_t *retval)
449 {
450 	struct sys_msync_args /* {
451 		syscallarg(void *) addr;
452 		syscallarg(size_t) len;
453 		syscallarg(int) flags;
454 	} */ *uap = v;
455 	vaddr_t addr;
456 	vsize_t size, pageoff;
457 	int flags, uvmflags;
458 
459 	/* extract syscall args from the uap */
460 	addr = (vaddr_t)SCARG(uap, addr);
461 	size = (vsize_t)SCARG(uap, len);
462 	flags = SCARG(uap, flags);
463 
464 	/* sanity check flags */
465 	if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
466 			(flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
467 			(flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
468 		return EINVAL;
469 	if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
470 		flags |= MS_SYNC;
471 
472 	/* align the address to a page boundary, and adjust the size accordingly */
473 	ALIGN_ADDR(addr, size, pageoff);
474 	if (addr > SIZE_MAX - size)
475 		return EINVAL;		/* disallow wrap-around. */
476 
477 	/* translate MS_ flags into PGO_ flags */
478 	uvmflags = PGO_CLEANIT;
479 	if (flags & MS_INVALIDATE)
480 		uvmflags |= PGO_FREE;
481 	if (flags & MS_SYNC)
482 		uvmflags |= PGO_SYNCIO;
483 	else
484 		uvmflags |= PGO_SYNCIO;	 /* XXXCDC: force sync for now! */
485 
486 	return uvm_map_clean(&p->p_vmspace->vm_map, addr, addr+size, uvmflags);
487 }
488 
489 /*
490  * sys_munmap: unmap a users memory
491  */
492 int
sys_munmap(struct proc * p,void * v,register_t * retval)493 sys_munmap(struct proc *p, void *v, register_t *retval)
494 {
495 	struct sys_munmap_args /* {
496 		syscallarg(void *) addr;
497 		syscallarg(size_t) len;
498 	} */ *uap = v;
499 	vaddr_t addr;
500 	vsize_t size, pageoff;
501 	vm_map_t map;
502 	vaddr_t vm_min_address = VM_MIN_ADDRESS;
503 	struct uvm_map_deadq dead_entries;
504 
505 	/* get syscall args... */
506 	addr = (vaddr_t) SCARG(uap, addr);
507 	size = (vsize_t) SCARG(uap, len);
508 
509 	/* align address to a page boundary, and adjust size accordingly */
510 	ALIGN_ADDR(addr, size, pageoff);
511 
512 	/*
513 	 * Check for illegal addresses.  Watch out for address wrap...
514 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
515 	 */
516 	if (addr > SIZE_MAX - size)
517 		return EINVAL;
518 	if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
519 		return EINVAL;
520 	if (vm_min_address > 0 && addr < vm_min_address)
521 		return EINVAL;
522 	map = &p->p_vmspace->vm_map;
523 
524 
525 	vm_map_lock(map);	/* lock map so we can checkprot */
526 
527 	/*
528 	 * interesting system call semantic: make sure entire range is
529 	 * allocated before allowing an unmap.
530 	 */
531 	if (!uvm_map_checkprot(map, addr, addr + size, PROT_NONE)) {
532 		vm_map_unlock(map);
533 		return EINVAL;
534 	}
535 
536 	TAILQ_INIT(&dead_entries);
537 	if (uvm_unmap_remove(map, addr, addr + size, &dead_entries,
538 	    FALSE, TRUE, TRUE) != 0) {
539 		vm_map_unlock(map);
540 		return EPERM;	/* immutable entries found */
541 	}
542 	vm_map_unlock(map);	/* and unlock */
543 
544 	uvm_unmap_detach(&dead_entries, 0);
545 
546 	return 0;
547 }
548 
549 /*
550  * sys_mprotect: the mprotect system call
551  */
552 int
sys_mprotect(struct proc * p,void * v,register_t * retval)553 sys_mprotect(struct proc *p, void *v, register_t *retval)
554 {
555 	struct sys_mprotect_args /* {
556 		syscallarg(void *) addr;
557 		syscallarg(size_t) len;
558 		syscallarg(int) prot;
559 	} */ *uap = v;
560 	vaddr_t addr;
561 	vsize_t size, pageoff;
562 	vm_prot_t prot;
563 	int error;
564 
565 	/*
566 	 * extract syscall args from uap
567 	 */
568 
569 	addr = (vaddr_t)SCARG(uap, addr);
570 	size = (vsize_t)SCARG(uap, len);
571 	prot = SCARG(uap, prot);
572 
573 	if ((prot & PROT_MASK) != prot)
574 		return EINVAL;
575 	if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
576 	    (error = uvm_wxcheck(p, "mprotect")))
577 		return error;
578 
579 	error = pledge_protexec(p, prot);
580 	if (error)
581 		return error;
582 
583 	/*
584 	 * align the address to a page boundary, and adjust the size accordingly
585 	 */
586 	ALIGN_ADDR(addr, size, pageoff);
587 	if (addr > SIZE_MAX - size)
588 		return EINVAL;		/* disallow wrap-around. */
589 
590 	return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size,
591 	    prot, 0, FALSE, TRUE));
592 }
593 
594 /*
595  * sys_pinsyscalls.  The caller is required to normalize base,len
596  * to the minimum .text region, and adjust pintable offsets relative
597  * to that base.
598  */
599 int
sys_pinsyscalls(struct proc * p,void * v,register_t * retval)600 sys_pinsyscalls(struct proc *p, void *v, register_t *retval)
601 {
602 	struct sys_pinsyscalls_args /* {
603 		syscallarg(void *) base;
604 		syscallarg(size_t) len;
605 		syscallarg(u_int *) pins;
606 		syscallarg(int) npins;
607 	} */ *uap = v;
608 	struct process *pr = p->p_p;
609 	struct vm_map *map = &p->p_vmspace->vm_map;
610 	int npins, error = 0, i;
611 	vaddr_t base;
612 	size_t len;
613 	u_int *pins;
614 
615 	if (pr->ps_libcpin.pn_start ||
616 	    (pr->ps_vmspace->vm_map.flags & VM_MAP_PINSYSCALL_ONCE))
617 		return (EPERM);
618 	base = (vaddr_t)SCARG(uap, base);
619 	len = (vsize_t)SCARG(uap, len);
620 	if (base > SIZE_MAX - len)
621 		return (EINVAL);	/* disallow wrap-around. */
622 	if (base < map->min_offset || base+len > map->max_offset)
623 		return (EINVAL);
624 
625 	/* XXX MP unlock */
626 
627 	npins = SCARG(uap, npins);
628 	if (npins < 1 || npins > SYS_MAXSYSCALL)
629 		return (E2BIG);
630 	pins = malloc(npins * sizeof(u_int), M_PINSYSCALL, M_WAITOK|M_ZERO);
631 	if (pins == NULL)
632 		return (ENOMEM);
633 	error = copyin(SCARG(uap, pins), pins, npins * sizeof(u_int));
634 	if (error)
635 		goto err;
636 
637 	/* Range-check pintable offsets */
638 	for (i = 0; i < npins; i++) {
639 		if (pins[i] == (u_int)-1 || pins[i] == 0)
640 			continue;
641 		if (pins[i] > SCARG(uap, len)) {
642 			error = ERANGE;
643 			break;
644 		}
645 	}
646 	if (error) {
647 err:
648 		free(pins, M_PINSYSCALL, npins * sizeof(u_int));
649 		return (error);
650 	}
651 	pr->ps_libcpin.pn_start = base;
652 	pr->ps_libcpin.pn_end = base + len;
653 	pr->ps_libcpin.pn_pins = pins;
654 	pr->ps_libcpin.pn_npins = npins;
655 
656 #ifdef PMAP_CHECK_COPYIN
657 	/* Assume (and insist) on libc.so text being execute-only */
658 	if (PMAP_CHECK_COPYIN)
659 		uvm_map_check_copyin_add(map, base, base+len);
660 #endif
661 	return (0);
662 }
663 
664 /*
665  * sys_mimmutable: the mimmutable system call
666  */
667 int
sys_mimmutable(struct proc * p,void * v,register_t * retval)668 sys_mimmutable(struct proc *p, void *v, register_t *retval)
669 {
670 	struct sys_mimmutable_args /* {
671 		immutablearg(void *) addr;
672 		immutablearg(size_t) len;
673 	} */ *uap = v;
674 	vaddr_t addr;
675 	vsize_t size, pageoff;
676 
677 	addr = (vaddr_t)SCARG(uap, addr);
678 	size = (vsize_t)SCARG(uap, len);
679 
680 	/*
681 	 * align the address to a page boundary, and adjust the size accordingly
682 	 */
683 	ALIGN_ADDR(addr, size, pageoff);
684 	if (addr > SIZE_MAX - size)
685 		return EINVAL;		/* disallow wrap-around. */
686 
687 	return uvm_map_immutable(&p->p_vmspace->vm_map, addr, addr+size, 1);
688 }
689 
690 /*
691  * sys_minherit: the minherit system call
692  */
693 int
sys_minherit(struct proc * p,void * v,register_t * retval)694 sys_minherit(struct proc *p, void *v, register_t *retval)
695 {
696 	struct sys_minherit_args /* {
697 		syscallarg(void *) addr;
698 		syscallarg(size_t) len;
699 		syscallarg(int) inherit;
700 	} */ *uap = v;
701 	vaddr_t addr;
702 	vsize_t size, pageoff;
703 	vm_inherit_t inherit;
704 
705 	addr = (vaddr_t)SCARG(uap, addr);
706 	size = (vsize_t)SCARG(uap, len);
707 	inherit = SCARG(uap, inherit);
708 
709 	/*
710 	 * align the address to a page boundary, and adjust the size accordingly
711 	 */
712 	ALIGN_ADDR(addr, size, pageoff);
713 	if (addr > SIZE_MAX - size)
714 		return EINVAL;		/* disallow wrap-around. */
715 
716 	return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
717 	    inherit));
718 }
719 
720 /*
721  * sys_madvise: give advice about memory usage.
722  */
723 int
sys_madvise(struct proc * p,void * v,register_t * retval)724 sys_madvise(struct proc *p, void *v, register_t *retval)
725 {
726 	struct sys_madvise_args /* {
727 		syscallarg(void *) addr;
728 		syscallarg(size_t) len;
729 		syscallarg(int) behav;
730 	} */ *uap = v;
731 	vaddr_t addr;
732 	vsize_t size, pageoff;
733 	int advice, error;
734 
735 	addr = (vaddr_t)SCARG(uap, addr);
736 	size = (vsize_t)SCARG(uap, len);
737 	advice = SCARG(uap, behav);
738 
739 	/*
740 	 * align the address to a page boundary, and adjust the size accordingly
741 	 */
742 	ALIGN_ADDR(addr, size, pageoff);
743 	if (addr > SIZE_MAX - size)
744 		return EINVAL;		/* disallow wrap-around. */
745 
746 	switch (advice) {
747 	case MADV_NORMAL:
748 	case MADV_RANDOM:
749 	case MADV_SEQUENTIAL:
750 		error = uvm_map_advice(&p->p_vmspace->vm_map, addr,
751 		    addr + size, advice);
752 		break;
753 
754 	case MADV_WILLNEED:
755 		/*
756 		 * Activate all these pages, pre-faulting them in if
757 		 * necessary.
758 		 */
759 		/*
760 		 * XXX IMPLEMENT ME.
761 		 * Should invent a "weak" mode for uvm_fault()
762 		 * which would only do the PGO_LOCKED pgo_get().
763 		 */
764 		return 0;
765 
766 	case MADV_DONTNEED:
767 		/*
768 		 * Deactivate all these pages.  We don't need them
769 		 * any more.  We don't, however, toss the data in
770 		 * the pages.
771 		 */
772 		error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
773 		    PGO_DEACTIVATE);
774 		break;
775 
776 	case MADV_FREE:
777 		/*
778 		 * These pages contain no valid data, and may be
779 		 * garbage-collected.  Toss all resources, including
780 		 * any swap space in use.
781 		 */
782 		error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
783 		    PGO_FREE);
784 		break;
785 
786 	case MADV_SPACEAVAIL:
787 		/*
788 		 * XXXMRG What is this?  I think it's:
789 		 *
790 		 *	Ensure that we have allocated backing-store
791 		 *	for these pages.
792 		 *
793 		 * This is going to require changes to the page daemon,
794 		 * as it will free swap space allocated to pages in core.
795 		 * There's also what to do for device/file/anonymous memory.
796 		 */
797 		return EINVAL;
798 
799 	default:
800 		return EINVAL;
801 	}
802 
803 	return error;
804 }
805 
806 /*
807  * sys_mlock: memory lock
808  */
809 
810 int
sys_mlock(struct proc * p,void * v,register_t * retval)811 sys_mlock(struct proc *p, void *v, register_t *retval)
812 {
813 	struct sys_mlock_args /* {
814 		syscallarg(const void *) addr;
815 		syscallarg(size_t) len;
816 	} */ *uap = v;
817 	vaddr_t addr;
818 	vsize_t size, pageoff;
819 	int error;
820 
821 	/* extract syscall args from uap */
822 	addr = (vaddr_t)SCARG(uap, addr);
823 	size = (vsize_t)SCARG(uap, len);
824 
825 	/* align address to a page boundary and adjust size accordingly */
826 	ALIGN_ADDR(addr, size, pageoff);
827 	if (addr > SIZE_MAX - size)
828 		return EINVAL;		/* disallow wrap-around. */
829 
830 	if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
831 		return EAGAIN;
832 
833 #ifdef pmap_wired_count
834 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
835 			lim_cur(RLIMIT_MEMLOCK))
836 		return EAGAIN;
837 #else
838 	if ((error = suser(p)) != 0)
839 		return error;
840 #endif
841 
842 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
843 	    0);
844 	return error == 0 ? 0 : ENOMEM;
845 }
846 
847 /*
848  * sys_munlock: unlock wired pages
849  */
850 
851 int
sys_munlock(struct proc * p,void * v,register_t * retval)852 sys_munlock(struct proc *p, void *v, register_t *retval)
853 {
854 	struct sys_munlock_args /* {
855 		syscallarg(const void *) addr;
856 		syscallarg(size_t) len;
857 	} */ *uap = v;
858 	vaddr_t addr;
859 	vsize_t size, pageoff;
860 	int error;
861 
862 	/* extract syscall args from uap */
863 	addr = (vaddr_t)SCARG(uap, addr);
864 	size = (vsize_t)SCARG(uap, len);
865 
866 	/* align address to a page boundary, and adjust size accordingly */
867 	ALIGN_ADDR(addr, size, pageoff);
868 	if (addr > SIZE_MAX - size)
869 		return EINVAL;		/* disallow wrap-around. */
870 
871 #ifndef pmap_wired_count
872 	if ((error = suser(p)) != 0)
873 		return error;
874 #endif
875 
876 	error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
877 	    0);
878 	return error == 0 ? 0 : ENOMEM;
879 }
880 
881 /*
882  * sys_mlockall: lock all pages mapped into an address space.
883  */
884 int
sys_mlockall(struct proc * p,void * v,register_t * retval)885 sys_mlockall(struct proc *p, void *v, register_t *retval)
886 {
887 	struct sys_mlockall_args /* {
888 		syscallarg(int) flags;
889 	} */ *uap = v;
890 	int error, flags;
891 
892 	flags = SCARG(uap, flags);
893 
894 	if (flags == 0 ||
895 	    (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
896 		return EINVAL;
897 
898 #ifndef pmap_wired_count
899 	if ((error = suser(p)) != 0)
900 		return error;
901 #endif
902 
903 	error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
904 	    lim_cur(RLIMIT_MEMLOCK));
905 	if (error != 0 && error != ENOMEM)
906 		return EAGAIN;
907 	return error;
908 }
909 
910 /*
911  * sys_munlockall: unlock all pages mapped into an address space.
912  */
913 int
sys_munlockall(struct proc * p,void * v,register_t * retval)914 sys_munlockall(struct proc *p, void *v, register_t *retval)
915 {
916 
917 	(void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
918 	return 0;
919 }
920 
921 /*
922  * common code for mmapanon and mmapfile to lock a mmaping
923  */
924 int
uvm_mmaplock(vm_map_t map,vaddr_t * addr,vsize_t size,vm_prot_t prot,vsize_t locklimit)925 uvm_mmaplock(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
926     vsize_t locklimit)
927 {
928 	int error;
929 
930 	/*
931 	 * POSIX 1003.1b -- if our address space was configured
932 	 * to lock all future mappings, wire the one we just made.
933 	 */
934 	if (prot == PROT_NONE) {
935 		/*
936 		 * No more work to do in this case.
937 		 */
938 		return 0;
939 	}
940 
941 	vm_map_lock(map);
942 	if (map->flags & VM_MAP_WIREFUTURE) {
943 		KERNEL_LOCK();
944 		if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
945 #ifdef pmap_wired_count
946 		    || (locklimit != 0 && (size +
947 			 ptoa(pmap_wired_count(vm_map_pmap(map)))) >
948 			locklimit)
949 #endif
950 		) {
951 			error = ENOMEM;
952 			vm_map_unlock(map);
953 			/* unmap the region! */
954 			uvm_unmap(map, *addr, *addr + size);
955 			KERNEL_UNLOCK();
956 			return error;
957 		}
958 		/*
959 		 * uvm_map_pageable() always returns the map
960 		 * unlocked.
961 		 */
962 		error = uvm_map_pageable(map, *addr, *addr + size,
963 		    FALSE, UVM_LK_ENTER);
964 		if (error != 0) {
965 			/* unmap the region! */
966 			uvm_unmap(map, *addr, *addr + size);
967 			KERNEL_UNLOCK();
968 			return error;
969 		}
970 		KERNEL_UNLOCK();
971 		return 0;
972 	}
973 	vm_map_unlock(map);
974 	return 0;
975 }
976 
977 /*
978  * uvm_mmapanon: internal version of mmap for anons
979  *
980  * - used by sys_mmap
981  */
982 int
uvm_mmapanon(vm_map_t map,vaddr_t * addr,vsize_t size,vm_prot_t prot,vm_prot_t maxprot,int flags,vsize_t locklimit,struct proc * p)983 uvm_mmapanon(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
984     vm_prot_t maxprot, int flags, vsize_t locklimit, struct proc *p)
985 {
986 	int error;
987 	int advice = MADV_NORMAL;
988 	unsigned int uvmflag = 0;
989 	vsize_t align = 0;	/* userland page size */
990 
991 	/*
992 	 * for non-fixed mappings, round off the suggested address.
993 	 * for fixed mappings, check alignment and zap old mappings.
994 	 */
995 	if ((flags & MAP_FIXED) == 0) {
996 		*addr = round_page(*addr);	/* round */
997 	} else {
998 		if (*addr & PAGE_MASK)
999 			return EINVAL;
1000 
1001 		uvmflag |= UVM_FLAG_FIXED;
1002 		if ((flags & __MAP_NOREPLACE) == 0)
1003 			uvmflag |= UVM_FLAG_UNMAP;
1004 	}
1005 
1006 	if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
1007 		align = __LDPGSZ;
1008 	if ((flags & MAP_SHARED) == 0)
1009 		/* XXX: defer amap create */
1010 		uvmflag |= UVM_FLAG_COPYONW;
1011 	else
1012 		/* shared: create amap now */
1013 		uvmflag |= UVM_FLAG_OVERLAY;
1014 	if (flags & MAP_STACK)
1015 		uvmflag |= UVM_FLAG_STACK;
1016 	if (flags & MAP_CONCEAL)
1017 		uvmflag |= UVM_FLAG_CONCEAL;
1018 
1019 	/* set up mapping flags */
1020 	uvmflag = UVM_MAPFLAG(prot, maxprot,
1021 	    (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
1022 	    advice, uvmflag);
1023 
1024 	error = uvm_mapanon(map, addr, size, align, uvmflag);
1025 
1026 	if (error == 0)
1027 		error = uvm_mmaplock(map, addr, size, prot, locklimit);
1028 	return error;
1029 }
1030 
1031 /*
1032  * uvm_mmapfile: internal version of mmap for non-anons
1033  *
1034  * - used by sys_mmap
1035  * - caller must page-align the file offset
1036  */
1037 int
uvm_mmapfile(vm_map_t map,vaddr_t * addr,vsize_t size,vm_prot_t prot,vm_prot_t maxprot,int flags,struct vnode * vp,voff_t foff,vsize_t locklimit,struct proc * p)1038 uvm_mmapfile(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
1039     vm_prot_t maxprot, int flags, struct vnode *vp, voff_t foff,
1040     vsize_t locklimit, struct proc *p)
1041 {
1042 	struct uvm_object *uobj;
1043 	int error;
1044 	int advice = MADV_NORMAL;
1045 	unsigned int uvmflag = 0;
1046 	vsize_t align = 0;	/* userland page size */
1047 
1048 	/*
1049 	 * for non-fixed mappings, round off the suggested address.
1050 	 * for fixed mappings, check alignment and zap old mappings.
1051 	 */
1052 	if ((flags & MAP_FIXED) == 0) {
1053 		*addr = round_page(*addr);	/* round */
1054 	} else {
1055 		if (*addr & PAGE_MASK)
1056 			return EINVAL;
1057 
1058 		uvmflag |= UVM_FLAG_FIXED;
1059 		if ((flags & __MAP_NOREPLACE) == 0)
1060 			uvmflag |= UVM_FLAG_UNMAP;
1061 	}
1062 
1063 	/*
1064 	 * attach to underlying vm object.
1065 	 */
1066 	if (vp->v_type != VCHR) {
1067 		uobj = uvn_attach(vp, (flags & MAP_SHARED) ?
1068 		   maxprot : (maxprot & ~PROT_WRITE));
1069 
1070 		/*
1071 		 * XXXCDC: hack from old code
1072 		 * don't allow vnodes which have been mapped
1073 		 * shared-writeable to persist [forces them to be
1074 		 * flushed out when last reference goes].
1075 		 * XXXCDC: interesting side effect: avoids a bug.
1076 		 * note that in WRITE [ufs_readwrite.c] that we
1077 		 * allocate buffer, uncache, and then do the write.
1078 		 * the problem with this is that if the uncache causes
1079 		 * VM data to be flushed to the same area of the file
1080 		 * we are writing to... in that case we've got the
1081 		 * buffer locked and our process goes to sleep forever.
1082 		 *
1083 		 * XXXCDC: checking maxprot protects us from the
1084 		 * "persistbug" program but this is not a long term
1085 		 * solution.
1086 		 *
1087 		 * XXXCDC: we don't bother calling uncache with the vp
1088 		 * VOP_LOCKed since we know that we are already
1089 		 * holding a valid reference to the uvn (from the
1090 		 * uvn_attach above), and thus it is impossible for
1091 		 * the uncache to kill the uvn and trigger I/O.
1092 		 */
1093 		if (flags & MAP_SHARED) {
1094 			if ((prot & PROT_WRITE) ||
1095 			    (maxprot & PROT_WRITE)) {
1096 				uvm_vnp_uncache(vp);
1097 			}
1098 		}
1099 	} else {
1100 		uobj = udv_attach(vp->v_rdev,
1101 		    (flags & MAP_SHARED) ? maxprot :
1102 		    (maxprot & ~PROT_WRITE), foff, size);
1103 		/*
1104 		 * XXX Some devices don't like to be mapped with
1105 		 * XXX PROT_EXEC, but we don't really have a
1106 		 * XXX better way of handling this, right now
1107 		 */
1108 		if (uobj == NULL && (prot & PROT_EXEC) == 0) {
1109 			maxprot &= ~PROT_EXEC;
1110 			uobj = udv_attach(vp->v_rdev,
1111 			    (flags & MAP_SHARED) ? maxprot :
1112 			    (maxprot & ~PROT_WRITE), foff, size);
1113 		}
1114 		advice = MADV_RANDOM;
1115 	}
1116 
1117 	if (uobj == NULL)
1118 		return vp->v_type == VREG ? ENOMEM : EINVAL;
1119 
1120 	if ((flags & MAP_SHARED) == 0)
1121 		uvmflag |= UVM_FLAG_COPYONW;
1122 	if (flags & __MAP_NOFAULT)
1123 		uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY);
1124 	if (flags & MAP_STACK)
1125 		uvmflag |= UVM_FLAG_STACK;
1126 	if (flags & MAP_CONCEAL)
1127 		uvmflag |= UVM_FLAG_CONCEAL;
1128 
1129 	/* set up mapping flags */
1130 	uvmflag = UVM_MAPFLAG(prot, maxprot,
1131 	    (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
1132 	    advice, uvmflag);
1133 
1134 	error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
1135 
1136 	if (error == 0)
1137 		return uvm_mmaplock(map, addr, size, prot, locklimit);
1138 
1139 	/* errors: first detach from the uobj, if any.  */
1140 	if (uobj)
1141 		uobj->pgops->pgo_detach(uobj);
1142 
1143 	return error;
1144 }
1145 
1146 int
sys_kbind(struct proc * p,void * v,register_t * retval)1147 sys_kbind(struct proc *p, void *v, register_t *retval)
1148 {
1149 	struct sys_kbind_args /* {
1150 		syscallarg(const struct __kbind *) param;
1151 		syscallarg(size_t) psize;
1152 		syscallarg(uint64_t) proc_cookie;
1153 	} */ *uap = v;
1154 	const struct __kbind *paramp;
1155 	union {
1156 		struct __kbind uk[KBIND_BLOCK_MAX];
1157 		char upad[KBIND_BLOCK_MAX * sizeof(*paramp) + KBIND_DATA_MAX];
1158 	} param;
1159 	struct uvm_map_deadq dead_entries;
1160 	struct process *pr = p->p_p;
1161 	const char *data;
1162 	vaddr_t baseva, last_baseva, endva, pageoffset, kva;
1163 	size_t psize, s;
1164 	u_long pc;
1165 	int count, i, extra;
1166 	int error, sigill = 0;
1167 
1168 	/*
1169 	 * extract syscall args from uap
1170 	 */
1171 	paramp = SCARG(uap, param);
1172 	psize = SCARG(uap, psize);
1173 
1174 	/*
1175 	 * If paramp is NULL and we're uninitialized, disable the syscall
1176 	 * for the process.  Raise SIGILL if paramp is NULL and we're
1177 	 * already initialized.
1178 	 *
1179 	 * If paramp is non-NULL and we're uninitialized, do initialization.
1180 	 * Otherwise, do security checks and raise SIGILL on failure.
1181 	 */
1182 	pc = PROC_PC(p);
1183 	mtx_enter(&pr->ps_mtx);
1184 	if (paramp == NULL) {
1185 		/* ld.so disables kbind() when lazy binding is disabled */
1186 		if (pr->ps_kbind_addr == 0)
1187 			pr->ps_kbind_addr = BOGO_PC;
1188 		/* pre-7.3 static binaries disable kbind */
1189 		/* XXX delete check in 2026 */
1190 		else if (pr->ps_kbind_addr != BOGO_PC)
1191 			sigill = 1;
1192 	} else if (pr->ps_kbind_addr == 0) {
1193 		pr->ps_kbind_addr = pc;
1194 		pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
1195 	} else if (pc != pr->ps_kbind_addr || pc == BOGO_PC ||
1196 	    pr->ps_kbind_cookie != SCARG(uap, proc_cookie)) {
1197 		sigill = 1;
1198 	}
1199 	mtx_leave(&pr->ps_mtx);
1200 
1201 	/* Raise SIGILL if something is off. */
1202 	if (sigill) {
1203 		KERNEL_LOCK();
1204 		sigexit(p, SIGILL);
1205 		/* NOTREACHED */
1206 		KERNEL_UNLOCK();
1207 	}
1208 
1209 	/* We're done if we were disabling the syscall. */
1210 	if (paramp == NULL)
1211 		return 0;
1212 
1213 	if (psize < sizeof(struct __kbind) || psize > sizeof(param))
1214 		return EINVAL;
1215 	if ((error = copyin(paramp, &param, psize)))
1216 		return error;
1217 
1218 	/*
1219 	 * The param argument points to an array of __kbind structures
1220 	 * followed by the corresponding new data areas for them.  Verify
1221 	 * that the sizes in the __kbind structures add up to the total
1222 	 * size and find the start of the new area.
1223 	 */
1224 	paramp = &param.uk[0];
1225 	s = psize;
1226 	for (count = 0; s > 0 && count < KBIND_BLOCK_MAX; count++) {
1227 		if (s < sizeof(*paramp))
1228 			return EINVAL;
1229 		s -= sizeof(*paramp);
1230 
1231 		baseva = (vaddr_t)paramp[count].kb_addr;
1232 		endva = baseva + paramp[count].kb_size - 1;
1233 		if (paramp[count].kb_addr == NULL ||
1234 		    paramp[count].kb_size == 0 ||
1235 		    paramp[count].kb_size > KBIND_DATA_MAX ||
1236 		    baseva >= VM_MAXUSER_ADDRESS ||
1237 		    endva >= VM_MAXUSER_ADDRESS ||
1238 		    s < paramp[count].kb_size)
1239 			return EINVAL;
1240 
1241 		s -= paramp[count].kb_size;
1242 	}
1243 	if (s > 0)
1244 		return EINVAL;
1245 	data = (const char *)&paramp[count];
1246 
1247 	/* all looks good, so do the bindings */
1248 	last_baseva = VM_MAXUSER_ADDRESS;
1249 	kva = 0;
1250 	TAILQ_INIT(&dead_entries);
1251 	for (i = 0; i < count; i++) {
1252 		baseva = (vaddr_t)paramp[i].kb_addr;
1253 		s = paramp[i].kb_size;
1254 		pageoffset = baseva & PAGE_MASK;
1255 		baseva = trunc_page(baseva);
1256 
1257 		/* hppa at least runs PLT entries over page edge */
1258 		extra = (pageoffset + s) & PAGE_MASK;
1259 		if (extra > pageoffset)
1260 			extra = 0;
1261 		else
1262 			s -= extra;
1263 redo:
1264 		/* make sure the desired page is mapped into kernel_map */
1265 		if (baseva != last_baseva) {
1266 			if (kva != 0) {
1267 				vm_map_lock(kernel_map);
1268 				uvm_unmap_remove(kernel_map, kva,
1269 				    kva+PAGE_SIZE, &dead_entries,
1270 				    FALSE, TRUE, FALSE);	/* XXX */
1271 				vm_map_unlock(kernel_map);
1272 				kva = 0;
1273 			}
1274 			if ((error = uvm_map_extract(&p->p_vmspace->vm_map,
1275 			    baseva, PAGE_SIZE, &kva, UVM_EXTRACT_FIXPROT)))
1276 				break;
1277 			last_baseva = baseva;
1278 		}
1279 
1280 		/* do the update */
1281 		if ((error = kcopy(data, (char *)kva + pageoffset, s)))
1282 			break;
1283 		data += s;
1284 
1285 		if (extra > 0) {
1286 			baseva += PAGE_SIZE;
1287 			s = extra;
1288 			pageoffset = 0;
1289 			extra = 0;
1290 			goto redo;
1291 		}
1292 	}
1293 
1294 	if (kva != 0) {
1295 		vm_map_lock(kernel_map);
1296 		uvm_unmap_remove(kernel_map, kva, kva+PAGE_SIZE,
1297 		    &dead_entries, FALSE, TRUE, FALSE);		/* XXX */
1298 		vm_map_unlock(kernel_map);
1299 	}
1300 	uvm_unmap_detach(&dead_entries, AMAP_REFALL);
1301 
1302 	return error;
1303 }
1304