xref: /original-bsd/sys/vm/vm_mmap.c (revision 4ba124f7)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
13  *
14  *	@(#)vm_mmap.c	8.8 (Berkeley) 06/19/94
15  */
16 
17 /*
18  * Mapped file (mmap) interface to VM
19  */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/filedesc.h>
24 #include <sys/resourcevar.h>
25 #include <sys/proc.h>
26 #include <sys/vnode.h>
27 #include <sys/file.h>
28 #include <sys/mman.h>
29 #include <sys/conf.h>
30 
31 #include <miscfs/specfs/specdev.h>
32 
33 #include <vm/vm.h>
34 #include <vm/vm_pager.h>
35 #include <vm/vm_prot.h>
36 
37 #ifdef DEBUG
38 int mmapdebug = 0;
39 #define MDB_FOLLOW	0x01
40 #define MDB_SYNC	0x02
41 #define MDB_MAPIT	0x04
42 #endif
43 
44 struct sbrk_args {
45 	int	incr;
46 };
47 /* ARGSUSED */
48 int
49 sbrk(p, uap, retval)
50 	struct proc *p;
51 	struct sbrk_args *uap;
52 	int *retval;
53 {
54 
55 	/* Not yet implemented */
56 	return (EOPNOTSUPP);
57 }
58 
59 struct sstk_args {
60 	int	incr;
61 };
62 /* ARGSUSED */
63 int
64 sstk(p, uap, retval)
65 	struct proc *p;
66 	struct sstk_args *uap;
67 	int *retval;
68 {
69 
70 	/* Not yet implemented */
71 	return (EOPNOTSUPP);
72 }
73 
74 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
75 struct getpagesize_args {
76 	int	dummy;
77 };
78 /* ARGSUSED */
79 int
80 ogetpagesize(p, uap, retval)
81 	struct proc *p;
82 	struct getpagesize_args *uap;
83 	int *retval;
84 {
85 
86 	*retval = PAGE_SIZE;
87 	return (0);
88 }
89 #endif /* COMPAT_43 || COMPAT_SUNOS */
90 
91 struct mmap_args {
92 	caddr_t	addr;
93 	size_t	len;
94 	int	prot;
95 	int	flags;
96 	int	fd;
97 	long	pad;
98 	off_t	pos;
99 };
100 
101 #ifdef COMPAT_43
102 struct ommap_args {
103 	caddr_t	addr;
104 	int	len;
105 	int	prot;
106 	int	flags;
107 	int	fd;
108 	long	pos;
109 };
110 int
111 ommap(p, uap, retval)
112 	struct proc *p;
113 	register struct ommap_args *uap;
114 	int *retval;
115 {
116 	struct mmap_args nargs;
117 	static const char cvtbsdprot[8] = {
118 		0,
119 		PROT_EXEC,
120 		PROT_WRITE,
121 		PROT_EXEC|PROT_WRITE,
122 		PROT_READ,
123 		PROT_EXEC|PROT_READ,
124 		PROT_WRITE|PROT_READ,
125 		PROT_EXEC|PROT_WRITE|PROT_READ,
126 	};
127 #define	OMAP_ANON	0x0002
128 #define	OMAP_COPY	0x0020
129 #define	OMAP_SHARED	0x0010
130 #define	OMAP_FIXED	0x0100
131 #define	OMAP_INHERIT	0x0800
132 
133 	nargs.addr = uap->addr;
134 	nargs.len = uap->len;
135 	nargs.prot = cvtbsdprot[uap->prot&0x7];
136 	nargs.flags = 0;
137 	if (uap->flags & OMAP_ANON)
138 		nargs.flags |= MAP_ANON;
139 	if (uap->flags & OMAP_COPY)
140 		nargs.flags |= MAP_COPY;
141 	if (uap->flags & OMAP_SHARED)
142 		nargs.flags |= MAP_SHARED;
143 	else
144 		nargs.flags |= MAP_PRIVATE;
145 	if (uap->flags & OMAP_FIXED)
146 		nargs.flags |= MAP_FIXED;
147 	if (uap->flags & OMAP_INHERIT)
148 		nargs.flags |= MAP_INHERIT;
149 	nargs.fd = uap->fd;
150 	nargs.pos = uap->pos;
151 	return (mmap(p, &nargs, retval));
152 }
153 #endif
154 
155 int
156 mmap(p, uap, retval)
157 	struct proc *p;
158 	register struct mmap_args *uap;
159 	int *retval;
160 {
161 	register struct filedesc *fdp = p->p_fd;
162 	register struct file *fp;
163 	struct vnode *vp;
164 	vm_offset_t addr, pos;
165 	vm_size_t size;
166 	vm_prot_t prot, maxprot;
167 	caddr_t handle;
168 	int flags, error;
169 
170 	prot = uap->prot & VM_PROT_ALL;
171 	flags = uap->flags;
172 	pos = uap->pos;
173 #ifdef DEBUG
174 	if (mmapdebug & MDB_FOLLOW)
175 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
176 		       p->p_pid, uap->addr, uap->len, prot,
177 		       flags, uap->fd, pos);
178 #endif
179 	/*
180 	 * Address (if FIXED) must be page aligned.
181 	 * Size is implicitly rounded to a page boundary.
182 	 *
183 	 * XXX most (all?) vendors require that the file offset be
184 	 * page aligned as well.  However, we already have applications
185 	 * (e.g. nlist) that rely on unrestricted alignment.  Since we
186 	 * support it, let it happen.
187 	 */
188 	addr = (vm_offset_t) uap->addr;
189 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
190 #if 0
191 	    ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
192 #endif
193 	    (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
194 		return (EINVAL);
195 	size = (vm_size_t) round_page(uap->len);
196 	/*
197 	 * Check for illegal addresses.  Watch out for address wrap...
198 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
199 	 */
200 	if (flags & MAP_FIXED) {
201 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
202 			return (EINVAL);
203 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
204 			return (EINVAL);
205 		if (addr > addr + size)
206 			return (EINVAL);
207 	}
208 	/*
209 	 * XXX for non-fixed mappings where no hint is provided or
210 	 * the hint would fall in the potential heap space,
211 	 * place it after the end of the largest possible heap.
212 	 *
213 	 * There should really be a pmap call to determine a reasonable
214 	 * location.
215 	 */
216 	else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
217 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
218 	if (flags & MAP_ANON) {
219 		/*
220 		 * Mapping blank space is trivial.
221 		 */
222 		handle = NULL;
223 		maxprot = VM_PROT_ALL;
224 		pos = 0;
225 	} else {
226 		/*
227 		 * Mapping file, get fp for validation.
228 		 * Obtain vnode and make sure it is of appropriate type.
229 		 */
230 		if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
231 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
232 			return (EBADF);
233 		if (fp->f_type != DTYPE_VNODE)
234 			return (EINVAL);
235 		vp = (struct vnode *)fp->f_data;
236 		if (vp->v_type != VREG && vp->v_type != VCHR)
237 			return (EINVAL);
238 		/*
239 		 * XXX hack to handle use of /dev/zero to map anon
240 		 * memory (ala SunOS).
241 		 */
242 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
243 			handle = NULL;
244 			maxprot = VM_PROT_ALL;
245 			flags |= MAP_ANON;
246 		} else {
247 			/*
248 			 * Ensure that file and memory protections are
249 			 * compatible.  Note that we only worry about
250 			 * writability if mapping is shared; in this case,
251 			 * current and max prot are dictated by the open file.
252 			 * XXX use the vnode instead?  Problem is: what
253 			 * credentials do we use for determination?
254 			 * What if proc does a setuid?
255 			 */
256 			maxprot = VM_PROT_EXECUTE;	/* ??? */
257 			if (fp->f_flag & FREAD)
258 				maxprot |= VM_PROT_READ;
259 			else if (prot & PROT_READ)
260 				return (EACCES);
261 			if (flags & MAP_SHARED) {
262 				if (fp->f_flag & FWRITE)
263 					maxprot |= VM_PROT_WRITE;
264 				else if (prot & PROT_WRITE)
265 					return (EACCES);
266 			} else
267 				maxprot |= VM_PROT_WRITE;
268 			handle = (caddr_t)vp;
269 		}
270 	}
271 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
272 	    flags, handle, pos);
273 	if (error == 0)
274 		*retval = (int)addr;
275 	return (error);
276 }
277 
278 struct msync_args {
279 	caddr_t	addr;
280 	int	len;
281 };
282 int
283 msync(p, uap, retval)
284 	struct proc *p;
285 	struct msync_args *uap;
286 	int *retval;
287 {
288 	vm_offset_t addr;
289 	vm_size_t size;
290 	vm_map_t map;
291 	int rv;
292 	boolean_t syncio, invalidate;
293 
294 #ifdef DEBUG
295 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
296 		printf("msync(%d): addr %x len %x\n",
297 		       p->p_pid, uap->addr, uap->len);
298 #endif
299 	if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
300 		return (EINVAL);
301 	map = &p->p_vmspace->vm_map;
302 	addr = (vm_offset_t)uap->addr;
303 	size = (vm_size_t)uap->len;
304 	/*
305 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
306 	 * pages with the region containing addr".  Unfortunately, we
307 	 * don't really keep track of individual mmaps so we approximate
308 	 * by flushing the range of the map entry containing addr.
309 	 * This can be incorrect if the region splits or is coalesced
310 	 * with a neighbor.
311 	 */
312 	if (size == 0) {
313 		vm_map_entry_t entry;
314 
315 		vm_map_lock_read(map);
316 		rv = vm_map_lookup_entry(map, addr, &entry);
317 		vm_map_unlock_read(map);
318 		if (!rv)
319 			return (EINVAL);
320 		addr = entry->start;
321 		size = entry->end - entry->start;
322 	}
323 #ifdef DEBUG
324 	if (mmapdebug & MDB_SYNC)
325 		printf("msync: cleaning/flushing address range [%x-%x)\n",
326 		       addr, addr+size);
327 #endif
328 	/*
329 	 * Could pass this in as a third flag argument to implement
330 	 * Sun's MS_ASYNC.
331 	 */
332 	syncio = TRUE;
333 	/*
334 	 * XXX bummer, gotta flush all cached pages to ensure
335 	 * consistency with the file system cache.  Otherwise, we could
336 	 * pass this in to implement Sun's MS_INVALIDATE.
337 	 */
338 	invalidate = TRUE;
339 	/*
340 	 * Clean the pages and interpret the return value.
341 	 */
342 	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
343 	switch (rv) {
344 	case KERN_SUCCESS:
345 		break;
346 	case KERN_INVALID_ADDRESS:
347 		return (EINVAL);	/* Sun returns ENOMEM? */
348 	case KERN_FAILURE:
349 		return (EIO);
350 	default:
351 		return (EINVAL);
352 	}
353 	return (0);
354 }
355 
356 struct munmap_args {
357 	caddr_t	addr;
358 	int	len;
359 };
360 int
361 munmap(p, uap, retval)
362 	register struct proc *p;
363 	register struct munmap_args *uap;
364 	int *retval;
365 {
366 	vm_offset_t addr;
367 	vm_size_t size;
368 	vm_map_t map;
369 
370 #ifdef DEBUG
371 	if (mmapdebug & MDB_FOLLOW)
372 		printf("munmap(%d): addr %x len %x\n",
373 		       p->p_pid, uap->addr, uap->len);
374 #endif
375 
376 	addr = (vm_offset_t) uap->addr;
377 	if ((addr & PAGE_MASK) || uap->len < 0)
378 		return(EINVAL);
379 	size = (vm_size_t) round_page(uap->len);
380 	if (size == 0)
381 		return(0);
382 	/*
383 	 * Check for illegal addresses.  Watch out for address wrap...
384 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
385 	 */
386 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
387 		return (EINVAL);
388 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
389 		return (EINVAL);
390 	if (addr > addr + size)
391 		return (EINVAL);
392 	map = &p->p_vmspace->vm_map;
393 	/*
394 	 * Make sure entire range is allocated.
395 	 * XXX this seemed overly restrictive, so we relaxed it.
396 	 */
397 #if 0
398 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
399 		return(EINVAL);
400 #endif
401 	/* returns nothing but KERN_SUCCESS anyway */
402 	(void) vm_map_remove(map, addr, addr+size);
403 	return(0);
404 }
405 
406 void
407 munmapfd(fd)
408 	int fd;
409 {
410 #ifdef DEBUG
411 	if (mmapdebug & MDB_FOLLOW)
412 		printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
413 #endif
414 
415 	/*
416 	 * XXX should vm_deallocate any regions mapped to this file
417 	 */
418 	curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
419 }
420 
421 struct mprotect_args {
422 	caddr_t	addr;
423 	int	len;
424 	int	prot;
425 };
426 int
427 mprotect(p, uap, retval)
428 	struct proc *p;
429 	struct mprotect_args *uap;
430 	int *retval;
431 {
432 	vm_offset_t addr;
433 	vm_size_t size;
434 	register vm_prot_t prot;
435 
436 #ifdef DEBUG
437 	if (mmapdebug & MDB_FOLLOW)
438 		printf("mprotect(%d): addr %x len %x prot %d\n",
439 		       p->p_pid, uap->addr, uap->len, uap->prot);
440 #endif
441 
442 	addr = (vm_offset_t)uap->addr;
443 	if ((addr & PAGE_MASK) || uap->len < 0)
444 		return(EINVAL);
445 	size = (vm_size_t)uap->len;
446 	prot = uap->prot & VM_PROT_ALL;
447 
448 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
449 	    FALSE)) {
450 	case KERN_SUCCESS:
451 		return (0);
452 	case KERN_PROTECTION_FAILURE:
453 		return (EACCES);
454 	}
455 	return (EINVAL);
456 }
457 
458 struct madvise_args {
459 	caddr_t	addr;
460 	int	len;
461 	int	behav;
462 };
463 /* ARGSUSED */
464 int
465 madvise(p, uap, retval)
466 	struct proc *p;
467 	struct madvise_args *uap;
468 	int *retval;
469 {
470 
471 	/* Not yet implemented */
472 	return (EOPNOTSUPP);
473 }
474 
475 struct mincore_args {
476 	caddr_t	addr;
477 	int	len;
478 	char	*vec;
479 };
480 /* ARGSUSED */
481 int
482 mincore(p, uap, retval)
483 	struct proc *p;
484 	struct mincore_args *uap;
485 	int *retval;
486 {
487 
488 	/* Not yet implemented */
489 	return (EOPNOTSUPP);
490 }
491 
492 struct mlock_args {
493 	caddr_t	addr;
494 	size_t	len;
495 };
496 int
497 mlock(p, uap, retval)
498 	struct proc *p;
499 	struct mlock_args *uap;
500 	int *retval;
501 {
502 	vm_offset_t addr;
503 	vm_size_t size;
504 	int error;
505 	extern int vm_page_max_wired;
506 
507 #ifdef DEBUG
508 	if (mmapdebug & MDB_FOLLOW)
509 		printf("mlock(%d): addr %x len %x\n",
510 		       p->p_pid, uap->addr, uap->len);
511 #endif
512 	addr = (vm_offset_t)uap->addr;
513 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
514 		return (EINVAL);
515 	size = round_page((vm_size_t)uap->len);
516 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
517 		return (EAGAIN);
518 #ifdef pmap_wired_count
519 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
520 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
521 		return (EAGAIN);
522 #else
523 	if (error = suser(p->p_ucred, &p->p_acflag))
524 		return (error);
525 #endif
526 
527 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
528 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
529 }
530 
531 struct munlock_args {
532 	caddr_t	addr;
533 	size_t	len;
534 };
535 int
536 munlock(p, uap, retval)
537 	struct proc *p;
538 	struct munlock_args *uap;
539 	int *retval;
540 {
541 	vm_offset_t addr;
542 	vm_size_t size;
543 	int error;
544 
545 #ifdef DEBUG
546 	if (mmapdebug & MDB_FOLLOW)
547 		printf("munlock(%d): addr %x len %x\n",
548 		       p->p_pid, uap->addr, uap->len);
549 #endif
550 	addr = (vm_offset_t)uap->addr;
551 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
552 		return (EINVAL);
553 #ifndef pmap_wired_count
554 	if (error = suser(p->p_ucred, &p->p_acflag))
555 		return (error);
556 #endif
557 	size = round_page((vm_size_t)uap->len);
558 
559 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
560 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
561 }
562 
563 /*
564  * Internal version of mmap.
565  * Currently used by mmap, exec, and sys5 shared memory.
566  * Handle is either a vnode pointer or NULL for MAP_ANON.
567  */
568 int
569 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
570 	register vm_map_t map;
571 	register vm_offset_t *addr;
572 	register vm_size_t size;
573 	vm_prot_t prot, maxprot;
574 	register int flags;
575 	caddr_t handle;		/* XXX should be vp */
576 	vm_offset_t foff;
577 {
578 	register vm_pager_t pager;
579 	boolean_t fitit;
580 	vm_object_t object;
581 	struct vnode *vp = NULL;
582 	int type;
583 	int rv = KERN_SUCCESS;
584 
585 	if (size == 0)
586 		return (0);
587 
588 	if ((flags & MAP_FIXED) == 0) {
589 		fitit = TRUE;
590 		*addr = round_page(*addr);
591 	} else {
592 		fitit = FALSE;
593 		(void)vm_deallocate(map, *addr, size);
594 	}
595 
596 	/*
597 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
598 	 * gain a reference to ensure continued existance of the object.
599 	 * (XXX the exception is to appease the pageout daemon)
600 	 */
601 	if (flags & MAP_ANON)
602 		type = PG_DFLT;
603 	else {
604 		vp = (struct vnode *)handle;
605 		if (vp->v_type == VCHR) {
606 			type = PG_DEVICE;
607 			handle = (caddr_t)vp->v_rdev;
608 		} else
609 			type = PG_VNODE;
610 	}
611 	pager = vm_pager_allocate(type, handle, size, prot, foff);
612 	if (pager == NULL)
613 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
614 	/*
615 	 * Find object and release extra reference gained by lookup
616 	 */
617 	object = vm_object_lookup(pager);
618 	vm_object_deallocate(object);
619 
620 	/*
621 	 * Anonymous memory.
622 	 */
623 	if (flags & MAP_ANON) {
624 		rv = vm_allocate_with_pager(map, addr, size, fitit,
625 					    pager, foff, TRUE);
626 		if (rv != KERN_SUCCESS) {
627 			if (handle == NULL)
628 				vm_pager_deallocate(pager);
629 			else
630 				vm_object_deallocate(object);
631 			goto out;
632 		}
633 		/*
634 		 * Don't cache anonymous objects.
635 		 * Loses the reference gained by vm_pager_allocate.
636 		 * Note that object will be NULL when handle == NULL,
637 		 * this is ok since vm_allocate_with_pager has made
638 		 * sure that these objects are uncached.
639 		 */
640 		(void) pager_cache(object, FALSE);
641 #ifdef DEBUG
642 		if (mmapdebug & MDB_MAPIT)
643 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
644 			       curproc->p_pid, *addr, size, pager);
645 #endif
646 	}
647 	/*
648 	 * Must be a mapped file.
649 	 * Distinguish between character special and regular files.
650 	 */
651 	else if (vp->v_type == VCHR) {
652 		rv = vm_allocate_with_pager(map, addr, size, fitit,
653 					    pager, foff, FALSE);
654 		/*
655 		 * Uncache the object and lose the reference gained
656 		 * by vm_pager_allocate().  If the call to
657 		 * vm_allocate_with_pager() was sucessful, then we
658 		 * gained an additional reference ensuring the object
659 		 * will continue to exist.  If the call failed then
660 		 * the deallocate call below will terminate the
661 		 * object which is fine.
662 		 */
663 		(void) pager_cache(object, FALSE);
664 		if (rv != KERN_SUCCESS)
665 			goto out;
666 	}
667 	/*
668 	 * A regular file
669 	 */
670 	else {
671 #ifdef DEBUG
672 		if (object == NULL)
673 			printf("vm_mmap: no object: vp %x, pager %x\n",
674 			       vp, pager);
675 #endif
676 		/*
677 		 * Map it directly.
678 		 * Allows modifications to go out to the vnode.
679 		 */
680 		if (flags & MAP_SHARED) {
681 			rv = vm_allocate_with_pager(map, addr, size,
682 						    fitit, pager,
683 						    foff, FALSE);
684 			if (rv != KERN_SUCCESS) {
685 				vm_object_deallocate(object);
686 				goto out;
687 			}
688 			/*
689 			 * Don't cache the object.  This is the easiest way
690 			 * of ensuring that data gets back to the filesystem
691 			 * because vnode_pager_deallocate() will fsync the
692 			 * vnode.  pager_cache() will lose the extra ref.
693 			 */
694 			if (prot & VM_PROT_WRITE)
695 				pager_cache(object, FALSE);
696 			else
697 				vm_object_deallocate(object);
698 		}
699 		/*
700 		 * Copy-on-write of file.  Two flavors.
701 		 * MAP_COPY is true COW, you essentially get a snapshot of
702 		 * the region at the time of mapping.  MAP_PRIVATE means only
703 		 * that your changes are not reflected back to the object.
704 		 * Changes made by others will be seen.
705 		 */
706 		else {
707 			vm_map_t tmap;
708 			vm_offset_t off;
709 
710 			/* locate and allocate the target address space */
711 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
712 					 addr, size, fitit);
713 			if (rv != KERN_SUCCESS) {
714 				vm_object_deallocate(object);
715 				goto out;
716 			}
717 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
718 					     VM_MIN_ADDRESS+size, TRUE);
719 			off = VM_MIN_ADDRESS;
720 			rv = vm_allocate_with_pager(tmap, &off, size,
721 						    TRUE, pager,
722 						    foff, FALSE);
723 			if (rv != KERN_SUCCESS) {
724 				vm_object_deallocate(object);
725 				vm_map_deallocate(tmap);
726 				goto out;
727 			}
728 			/*
729 			 * (XXX)
730 			 * MAP_PRIVATE implies that we see changes made by
731 			 * others.  To ensure that we need to guarentee that
732 			 * no copy object is created (otherwise original
733 			 * pages would be pushed to the copy object and we
734 			 * would never see changes made by others).  We
735 			 * totally sleeze it right now by marking the object
736 			 * internal temporarily.
737 			 */
738 			if ((flags & MAP_COPY) == 0)
739 				object->flags |= OBJ_INTERNAL;
740 			rv = vm_map_copy(map, tmap, *addr, size, off,
741 					 FALSE, FALSE);
742 			object->flags &= ~OBJ_INTERNAL;
743 			/*
744 			 * (XXX)
745 			 * My oh my, this only gets worse...
746 			 * Force creation of a shadow object so that
747 			 * vm_map_fork will do the right thing.
748 			 */
749 			if ((flags & MAP_COPY) == 0) {
750 				vm_map_t tmap;
751 				vm_map_entry_t tentry;
752 				vm_object_t tobject;
753 				vm_offset_t toffset;
754 				vm_prot_t tprot;
755 				boolean_t twired, tsu;
756 
757 				tmap = map;
758 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
759 					      &tentry, &tobject, &toffset,
760 					      &tprot, &twired, &tsu);
761 				vm_map_lookup_done(tmap, tentry);
762 			}
763 			/*
764 			 * (XXX)
765 			 * Map copy code cannot detect sharing unless a
766 			 * sharing map is involved.  So we cheat and write
767 			 * protect everything ourselves.
768 			 */
769 			vm_object_pmap_copy(object, foff, foff + size);
770 			vm_object_deallocate(object);
771 			vm_map_deallocate(tmap);
772 			if (rv != KERN_SUCCESS)
773 				goto out;
774 		}
775 #ifdef DEBUG
776 		if (mmapdebug & MDB_MAPIT)
777 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
778 			       curproc->p_pid, *addr, size, pager);
779 #endif
780 	}
781 	/*
782 	 * Correct protection (default is VM_PROT_ALL).
783 	 * If maxprot is different than prot, we must set both explicitly.
784 	 */
785 	rv = KERN_SUCCESS;
786 	if (maxprot != VM_PROT_ALL)
787 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
788 	if (rv == KERN_SUCCESS && prot != maxprot)
789 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
790 	if (rv != KERN_SUCCESS) {
791 		(void) vm_deallocate(map, *addr, size);
792 		goto out;
793 	}
794 	/*
795 	 * Shared memory is also shared with children.
796 	 */
797 	if (flags & MAP_SHARED) {
798 		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
799 		if (rv != KERN_SUCCESS) {
800 			(void) vm_deallocate(map, *addr, size);
801 			goto out;
802 		}
803 	}
804 out:
805 #ifdef DEBUG
806 	if (mmapdebug & MDB_MAPIT)
807 		printf("vm_mmap: rv %d\n", rv);
808 #endif
809 	switch (rv) {
810 	case KERN_SUCCESS:
811 		return (0);
812 	case KERN_INVALID_ADDRESS:
813 	case KERN_NO_SPACE:
814 		return (ENOMEM);
815 	case KERN_PROTECTION_FAILURE:
816 		return (EACCES);
817 	default:
818 		return (EINVAL);
819 	}
820 }
821