xref: /original-bsd/sys/vm/vm_mmap.c (revision 0997b878)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
13  *
14  *	@(#)vm_mmap.c	8.9 (Berkeley) 01/09/95
15  */
16 
17 /*
18  * Mapped file (mmap) interface to VM
19  */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/filedesc.h>
24 #include <sys/resourcevar.h>
25 #include <sys/proc.h>
26 #include <sys/vnode.h>
27 #include <sys/file.h>
28 #include <sys/mman.h>
29 #include <sys/conf.h>
30 
31 #include <miscfs/specfs/specdev.h>
32 
33 #include <vm/vm.h>
34 #include <vm/vm_pager.h>
35 #include <vm/vm_prot.h>
36 
37 #ifdef DEBUG
38 int mmapdebug = 0;
39 #define MDB_FOLLOW	0x01
40 #define MDB_SYNC	0x02
41 #define MDB_MAPIT	0x04
42 #endif
43 
44 struct sbrk_args {
45 	int	incr;
46 };
47 /* ARGSUSED */
48 int
49 sbrk(p, uap, retval)
50 	struct proc *p;
51 	struct sbrk_args *uap;
52 	int *retval;
53 {
54 
55 	/* Not yet implemented */
56 	return (EOPNOTSUPP);
57 }
58 
59 struct sstk_args {
60 	int	incr;
61 };
62 /* ARGSUSED */
63 int
64 sstk(p, uap, retval)
65 	struct proc *p;
66 	struct sstk_args *uap;
67 	int *retval;
68 {
69 
70 	/* Not yet implemented */
71 	return (EOPNOTSUPP);
72 }
73 
74 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
75 struct getpagesize_args {
76 	int	dummy;
77 };
78 /* ARGSUSED */
79 int
80 ogetpagesize(p, uap, retval)
81 	struct proc *p;
82 	struct getpagesize_args *uap;
83 	int *retval;
84 {
85 
86 	*retval = PAGE_SIZE;
87 	return (0);
88 }
89 #endif /* COMPAT_43 || COMPAT_SUNOS */
90 
91 struct mmap_args {
92 	caddr_t	addr;
93 	size_t	len;
94 	int	prot;
95 	int	flags;
96 	int	fd;
97 	long	pad;
98 	off_t	pos;
99 };
100 
101 #ifdef COMPAT_43
102 struct ommap_args {
103 	caddr_t	addr;
104 	int	len;
105 	int	prot;
106 	int	flags;
107 	int	fd;
108 	long	pos;
109 };
110 int
111 ommap(p, uap, retval)
112 	struct proc *p;
113 	register struct ommap_args *uap;
114 	int *retval;
115 {
116 	struct mmap_args nargs;
117 	static const char cvtbsdprot[8] = {
118 		0,
119 		PROT_EXEC,
120 		PROT_WRITE,
121 		PROT_EXEC|PROT_WRITE,
122 		PROT_READ,
123 		PROT_EXEC|PROT_READ,
124 		PROT_WRITE|PROT_READ,
125 		PROT_EXEC|PROT_WRITE|PROT_READ,
126 	};
127 #define	OMAP_ANON	0x0002
128 #define	OMAP_COPY	0x0020
129 #define	OMAP_SHARED	0x0010
130 #define	OMAP_FIXED	0x0100
131 #define	OMAP_INHERIT	0x0800
132 
133 	nargs.addr = uap->addr;
134 	nargs.len = uap->len;
135 	nargs.prot = cvtbsdprot[uap->prot&0x7];
136 	nargs.flags = 0;
137 	if (uap->flags & OMAP_ANON)
138 		nargs.flags |= MAP_ANON;
139 	if (uap->flags & OMAP_COPY)
140 		nargs.flags |= MAP_COPY;
141 	if (uap->flags & OMAP_SHARED)
142 		nargs.flags |= MAP_SHARED;
143 	else
144 		nargs.flags |= MAP_PRIVATE;
145 	if (uap->flags & OMAP_FIXED)
146 		nargs.flags |= MAP_FIXED;
147 	if (uap->flags & OMAP_INHERIT)
148 		nargs.flags |= MAP_INHERIT;
149 	nargs.fd = uap->fd;
150 	nargs.pos = uap->pos;
151 	return (mmap(p, &nargs, retval));
152 }
153 #endif
154 
155 int
156 mmap(p, uap, retval)
157 	struct proc *p;
158 	register struct mmap_args *uap;
159 	int *retval;
160 {
161 	register struct filedesc *fdp = p->p_fd;
162 	register struct file *fp;
163 	struct vnode *vp;
164 	vm_offset_t addr, pos;
165 	vm_size_t size;
166 	vm_prot_t prot, maxprot;
167 	caddr_t handle;
168 	int flags, error;
169 
170 	prot = uap->prot & VM_PROT_ALL;
171 	flags = uap->flags;
172 	pos = uap->pos;
173 #ifdef DEBUG
174 	if (mmapdebug & MDB_FOLLOW)
175 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
176 		       p->p_pid, uap->addr, uap->len, prot,
177 		       flags, uap->fd, pos);
178 #endif
179 	/*
180 	 * Address (if FIXED) must be page aligned.
181 	 * Size is implicitly rounded to a page boundary.
182 	 *
183 	 * XXX most (all?) vendors require that the file offset be
184 	 * page aligned as well.  However, we already have applications
185 	 * (e.g. nlist) that rely on unrestricted alignment.  Since we
186 	 * support it, let it happen.
187 	 */
188 	addr = (vm_offset_t) uap->addr;
189 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
190 #if 0
191 	    ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
192 #endif
193 	    (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
194 		return (EINVAL);
195 	size = (vm_size_t) round_page(uap->len);
196 	/*
197 	 * Check for illegal addresses.  Watch out for address wrap...
198 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
199 	 */
200 	if (flags & MAP_FIXED) {
201 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
202 			return (EINVAL);
203 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
204 			return (EINVAL);
205 		if (addr > addr + size)
206 			return (EINVAL);
207 	}
208 	/*
209 	 * XXX for non-fixed mappings where no hint is provided or
210 	 * the hint would fall in the potential heap space,
211 	 * place it after the end of the largest possible heap.
212 	 *
213 	 * There should really be a pmap call to determine a reasonable
214 	 * location.
215 	 */
216 	else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
217 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
218 	if (flags & MAP_ANON) {
219 		/*
220 		 * Mapping blank space is trivial.
221 		 */
222 		handle = NULL;
223 		maxprot = VM_PROT_ALL;
224 		pos = 0;
225 	} else {
226 		/*
227 		 * Mapping file, get fp for validation.
228 		 * Obtain vnode and make sure it is of appropriate type.
229 		 */
230 		if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
231 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
232 			return (EBADF);
233 		if (fp->f_type != DTYPE_VNODE)
234 			return (EINVAL);
235 		vp = (struct vnode *)fp->f_data;
236 		if (vp->v_type != VREG && vp->v_type != VCHR)
237 			return (EINVAL);
238 		/*
239 		 * XXX hack to handle use of /dev/zero to map anon
240 		 * memory (ala SunOS).
241 		 */
242 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
243 			handle = NULL;
244 			maxprot = VM_PROT_ALL;
245 			flags |= MAP_ANON;
246 		} else {
247 			/*
248 			 * Ensure that file and memory protections are
249 			 * compatible.  Note that we only worry about
250 			 * writability if mapping is shared; in this case,
251 			 * current and max prot are dictated by the open file.
252 			 * XXX use the vnode instead?  Problem is: what
253 			 * credentials do we use for determination?
254 			 * What if proc does a setuid?
255 			 */
256 			maxprot = VM_PROT_EXECUTE;	/* ??? */
257 			if (fp->f_flag & FREAD)
258 				maxprot |= VM_PROT_READ;
259 			else if (prot & PROT_READ)
260 				return (EACCES);
261 			if (flags & MAP_SHARED) {
262 				if (fp->f_flag & FWRITE)
263 					maxprot |= VM_PROT_WRITE;
264 				else if (prot & PROT_WRITE)
265 					return (EACCES);
266 			} else
267 				maxprot |= VM_PROT_WRITE;
268 			handle = (caddr_t)vp;
269 		}
270 	}
271 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
272 	    flags, handle, pos);
273 	if (error == 0)
274 		*retval = (int)addr;
275 	return (error);
276 }
277 
278 struct msync_args {
279 	caddr_t	addr;
280 	int	len;
281 };
282 int
283 msync(p, uap, retval)
284 	struct proc *p;
285 	struct msync_args *uap;
286 	int *retval;
287 {
288 	vm_offset_t addr;
289 	vm_size_t size;
290 	vm_map_t map;
291 	int rv;
292 	boolean_t syncio, invalidate;
293 
294 #ifdef DEBUG
295 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
296 		printf("msync(%d): addr %x len %x\n",
297 		       p->p_pid, uap->addr, uap->len);
298 #endif
299 	if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
300 		return (EINVAL);
301 	map = &p->p_vmspace->vm_map;
302 	addr = (vm_offset_t)uap->addr;
303 	size = (vm_size_t)uap->len;
304 	/*
305 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
306 	 * pages with the region containing addr".  Unfortunately, we
307 	 * don't really keep track of individual mmaps so we approximate
308 	 * by flushing the range of the map entry containing addr.
309 	 * This can be incorrect if the region splits or is coalesced
310 	 * with a neighbor.
311 	 */
312 	if (size == 0) {
313 		vm_map_entry_t entry;
314 
315 		vm_map_lock_read(map);
316 		rv = vm_map_lookup_entry(map, addr, &entry);
317 		vm_map_unlock_read(map);
318 		if (!rv)
319 			return (EINVAL);
320 		addr = entry->start;
321 		size = entry->end - entry->start;
322 	}
323 #ifdef DEBUG
324 	if (mmapdebug & MDB_SYNC)
325 		printf("msync: cleaning/flushing address range [%x-%x)\n",
326 		       addr, addr+size);
327 #endif
328 	/*
329 	 * Could pass this in as a third flag argument to implement
330 	 * Sun's MS_ASYNC.
331 	 */
332 	syncio = TRUE;
333 	/*
334 	 * XXX bummer, gotta flush all cached pages to ensure
335 	 * consistency with the file system cache.  Otherwise, we could
336 	 * pass this in to implement Sun's MS_INVALIDATE.
337 	 */
338 	invalidate = TRUE;
339 	/*
340 	 * Clean the pages and interpret the return value.
341 	 */
342 	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
343 	switch (rv) {
344 	case KERN_SUCCESS:
345 		break;
346 	case KERN_INVALID_ADDRESS:
347 		return (EINVAL);	/* Sun returns ENOMEM? */
348 	case KERN_FAILURE:
349 		return (EIO);
350 	default:
351 		return (EINVAL);
352 	}
353 	return (0);
354 }
355 
356 struct munmap_args {
357 	caddr_t	addr;
358 	int	len;
359 };
360 int
361 munmap(p, uap, retval)
362 	register struct proc *p;
363 	register struct munmap_args *uap;
364 	int *retval;
365 {
366 	vm_offset_t addr;
367 	vm_size_t size;
368 	vm_map_t map;
369 
370 #ifdef DEBUG
371 	if (mmapdebug & MDB_FOLLOW)
372 		printf("munmap(%d): addr %x len %x\n",
373 		       p->p_pid, uap->addr, uap->len);
374 #endif
375 
376 	addr = (vm_offset_t) uap->addr;
377 	if ((addr & PAGE_MASK) || uap->len < 0)
378 		return(EINVAL);
379 	size = (vm_size_t) round_page(uap->len);
380 	if (size == 0)
381 		return(0);
382 	/*
383 	 * Check for illegal addresses.  Watch out for address wrap...
384 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
385 	 */
386 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
387 		return (EINVAL);
388 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
389 		return (EINVAL);
390 	if (addr > addr + size)
391 		return (EINVAL);
392 	map = &p->p_vmspace->vm_map;
393 	/*
394 	 * Make sure entire range is allocated.
395 	 * XXX this seemed overly restrictive, so we relaxed it.
396 	 */
397 #if 0
398 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
399 		return(EINVAL);
400 #endif
401 	/* returns nothing but KERN_SUCCESS anyway */
402 	(void) vm_map_remove(map, addr, addr+size);
403 	return(0);
404 }
405 
406 void
407 munmapfd(p, fd)
408 	struct proc *p;
409 	int fd;
410 {
411 #ifdef DEBUG
412 	if (mmapdebug & MDB_FOLLOW)
413 		printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
414 #endif
415 
416 	/*
417 	 * XXX should vm_deallocate any regions mapped to this file
418 	 */
419 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
420 }
421 
422 struct mprotect_args {
423 	caddr_t	addr;
424 	int	len;
425 	int	prot;
426 };
427 int
428 mprotect(p, uap, retval)
429 	struct proc *p;
430 	struct mprotect_args *uap;
431 	int *retval;
432 {
433 	vm_offset_t addr;
434 	vm_size_t size;
435 	register vm_prot_t prot;
436 
437 #ifdef DEBUG
438 	if (mmapdebug & MDB_FOLLOW)
439 		printf("mprotect(%d): addr %x len %x prot %d\n",
440 		       p->p_pid, uap->addr, uap->len, uap->prot);
441 #endif
442 
443 	addr = (vm_offset_t)uap->addr;
444 	if ((addr & PAGE_MASK) || uap->len < 0)
445 		return(EINVAL);
446 	size = (vm_size_t)uap->len;
447 	prot = uap->prot & VM_PROT_ALL;
448 
449 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
450 	    FALSE)) {
451 	case KERN_SUCCESS:
452 		return (0);
453 	case KERN_PROTECTION_FAILURE:
454 		return (EACCES);
455 	}
456 	return (EINVAL);
457 }
458 
459 struct madvise_args {
460 	caddr_t	addr;
461 	int	len;
462 	int	behav;
463 };
464 /* ARGSUSED */
465 int
466 madvise(p, uap, retval)
467 	struct proc *p;
468 	struct madvise_args *uap;
469 	int *retval;
470 {
471 
472 	/* Not yet implemented */
473 	return (EOPNOTSUPP);
474 }
475 
476 struct mincore_args {
477 	caddr_t	addr;
478 	int	len;
479 	char	*vec;
480 };
481 /* ARGSUSED */
482 int
483 mincore(p, uap, retval)
484 	struct proc *p;
485 	struct mincore_args *uap;
486 	int *retval;
487 {
488 
489 	/* Not yet implemented */
490 	return (EOPNOTSUPP);
491 }
492 
493 struct mlock_args {
494 	caddr_t	addr;
495 	size_t	len;
496 };
497 int
498 mlock(p, uap, retval)
499 	struct proc *p;
500 	struct mlock_args *uap;
501 	int *retval;
502 {
503 	vm_offset_t addr;
504 	vm_size_t size;
505 	int error;
506 	extern int vm_page_max_wired;
507 
508 #ifdef DEBUG
509 	if (mmapdebug & MDB_FOLLOW)
510 		printf("mlock(%d): addr %x len %x\n",
511 		       p->p_pid, uap->addr, uap->len);
512 #endif
513 	addr = (vm_offset_t)uap->addr;
514 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
515 		return (EINVAL);
516 	size = round_page((vm_size_t)uap->len);
517 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
518 		return (EAGAIN);
519 #ifdef pmap_wired_count
520 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
521 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
522 		return (EAGAIN);
523 #else
524 	if (error = suser(p->p_ucred, &p->p_acflag))
525 		return (error);
526 #endif
527 
528 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
529 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
530 }
531 
532 struct munlock_args {
533 	caddr_t	addr;
534 	size_t	len;
535 };
536 int
537 munlock(p, uap, retval)
538 	struct proc *p;
539 	struct munlock_args *uap;
540 	int *retval;
541 {
542 	vm_offset_t addr;
543 	vm_size_t size;
544 	int error;
545 
546 #ifdef DEBUG
547 	if (mmapdebug & MDB_FOLLOW)
548 		printf("munlock(%d): addr %x len %x\n",
549 		       p->p_pid, uap->addr, uap->len);
550 #endif
551 	addr = (vm_offset_t)uap->addr;
552 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
553 		return (EINVAL);
554 #ifndef pmap_wired_count
555 	if (error = suser(p->p_ucred, &p->p_acflag))
556 		return (error);
557 #endif
558 	size = round_page((vm_size_t)uap->len);
559 
560 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
561 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
562 }
563 
564 /*
565  * Internal version of mmap.
566  * Currently used by mmap, exec, and sys5 shared memory.
567  * Handle is either a vnode pointer or NULL for MAP_ANON.
568  */
569 int
570 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
571 	register vm_map_t map;
572 	register vm_offset_t *addr;
573 	register vm_size_t size;
574 	vm_prot_t prot, maxprot;
575 	register int flags;
576 	caddr_t handle;		/* XXX should be vp */
577 	vm_offset_t foff;
578 {
579 	register vm_pager_t pager;
580 	boolean_t fitit;
581 	vm_object_t object;
582 	struct vnode *vp = NULL;
583 	int type;
584 	int rv = KERN_SUCCESS;
585 
586 	if (size == 0)
587 		return (0);
588 
589 	if ((flags & MAP_FIXED) == 0) {
590 		fitit = TRUE;
591 		*addr = round_page(*addr);
592 	} else {
593 		fitit = FALSE;
594 		(void)vm_deallocate(map, *addr, size);
595 	}
596 
597 	/*
598 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
599 	 * gain a reference to ensure continued existance of the object.
600 	 * (XXX the exception is to appease the pageout daemon)
601 	 */
602 	if (flags & MAP_ANON)
603 		type = PG_DFLT;
604 	else {
605 		vp = (struct vnode *)handle;
606 		if (vp->v_type == VCHR) {
607 			type = PG_DEVICE;
608 			handle = (caddr_t)vp->v_rdev;
609 		} else
610 			type = PG_VNODE;
611 	}
612 	pager = vm_pager_allocate(type, handle, size, prot, foff);
613 	if (pager == NULL)
614 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
615 	/*
616 	 * Find object and release extra reference gained by lookup
617 	 */
618 	object = vm_object_lookup(pager);
619 	vm_object_deallocate(object);
620 
621 	/*
622 	 * Anonymous memory.
623 	 */
624 	if (flags & MAP_ANON) {
625 		rv = vm_allocate_with_pager(map, addr, size, fitit,
626 					    pager, foff, TRUE);
627 		if (rv != KERN_SUCCESS) {
628 			if (handle == NULL)
629 				vm_pager_deallocate(pager);
630 			else
631 				vm_object_deallocate(object);
632 			goto out;
633 		}
634 		/*
635 		 * Don't cache anonymous objects.
636 		 * Loses the reference gained by vm_pager_allocate.
637 		 * Note that object will be NULL when handle == NULL,
638 		 * this is ok since vm_allocate_with_pager has made
639 		 * sure that these objects are uncached.
640 		 */
641 		(void) pager_cache(object, FALSE);
642 #ifdef DEBUG
643 		if (mmapdebug & MDB_MAPIT)
644 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
645 			       curproc->p_pid, *addr, size, pager);
646 #endif
647 	}
648 	/*
649 	 * Must be a mapped file.
650 	 * Distinguish between character special and regular files.
651 	 */
652 	else if (vp->v_type == VCHR) {
653 		rv = vm_allocate_with_pager(map, addr, size, fitit,
654 					    pager, foff, FALSE);
655 		/*
656 		 * Uncache the object and lose the reference gained
657 		 * by vm_pager_allocate().  If the call to
658 		 * vm_allocate_with_pager() was sucessful, then we
659 		 * gained an additional reference ensuring the object
660 		 * will continue to exist.  If the call failed then
661 		 * the deallocate call below will terminate the
662 		 * object which is fine.
663 		 */
664 		(void) pager_cache(object, FALSE);
665 		if (rv != KERN_SUCCESS)
666 			goto out;
667 	}
668 	/*
669 	 * A regular file
670 	 */
671 	else {
672 #ifdef DEBUG
673 		if (object == NULL)
674 			printf("vm_mmap: no object: vp %x, pager %x\n",
675 			       vp, pager);
676 #endif
677 		/*
678 		 * Map it directly.
679 		 * Allows modifications to go out to the vnode.
680 		 */
681 		if (flags & MAP_SHARED) {
682 			rv = vm_allocate_with_pager(map, addr, size,
683 						    fitit, pager,
684 						    foff, FALSE);
685 			if (rv != KERN_SUCCESS) {
686 				vm_object_deallocate(object);
687 				goto out;
688 			}
689 			/*
690 			 * Don't cache the object.  This is the easiest way
691 			 * of ensuring that data gets back to the filesystem
692 			 * because vnode_pager_deallocate() will fsync the
693 			 * vnode.  pager_cache() will lose the extra ref.
694 			 */
695 			if (prot & VM_PROT_WRITE)
696 				pager_cache(object, FALSE);
697 			else
698 				vm_object_deallocate(object);
699 		}
700 		/*
701 		 * Copy-on-write of file.  Two flavors.
702 		 * MAP_COPY is true COW, you essentially get a snapshot of
703 		 * the region at the time of mapping.  MAP_PRIVATE means only
704 		 * that your changes are not reflected back to the object.
705 		 * Changes made by others will be seen.
706 		 */
707 		else {
708 			vm_map_t tmap;
709 			vm_offset_t off;
710 
711 			/* locate and allocate the target address space */
712 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
713 					 addr, size, fitit);
714 			if (rv != KERN_SUCCESS) {
715 				vm_object_deallocate(object);
716 				goto out;
717 			}
718 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
719 					     VM_MIN_ADDRESS+size, TRUE);
720 			off = VM_MIN_ADDRESS;
721 			rv = vm_allocate_with_pager(tmap, &off, size,
722 						    TRUE, pager,
723 						    foff, FALSE);
724 			if (rv != KERN_SUCCESS) {
725 				vm_object_deallocate(object);
726 				vm_map_deallocate(tmap);
727 				goto out;
728 			}
729 			/*
730 			 * (XXX)
731 			 * MAP_PRIVATE implies that we see changes made by
732 			 * others.  To ensure that we need to guarentee that
733 			 * no copy object is created (otherwise original
734 			 * pages would be pushed to the copy object and we
735 			 * would never see changes made by others).  We
736 			 * totally sleeze it right now by marking the object
737 			 * internal temporarily.
738 			 */
739 			if ((flags & MAP_COPY) == 0)
740 				object->flags |= OBJ_INTERNAL;
741 			rv = vm_map_copy(map, tmap, *addr, size, off,
742 					 FALSE, FALSE);
743 			object->flags &= ~OBJ_INTERNAL;
744 			/*
745 			 * (XXX)
746 			 * My oh my, this only gets worse...
747 			 * Force creation of a shadow object so that
748 			 * vm_map_fork will do the right thing.
749 			 */
750 			if ((flags & MAP_COPY) == 0) {
751 				vm_map_t tmap;
752 				vm_map_entry_t tentry;
753 				vm_object_t tobject;
754 				vm_offset_t toffset;
755 				vm_prot_t tprot;
756 				boolean_t twired, tsu;
757 
758 				tmap = map;
759 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
760 					      &tentry, &tobject, &toffset,
761 					      &tprot, &twired, &tsu);
762 				vm_map_lookup_done(tmap, tentry);
763 			}
764 			/*
765 			 * (XXX)
766 			 * Map copy code cannot detect sharing unless a
767 			 * sharing map is involved.  So we cheat and write
768 			 * protect everything ourselves.
769 			 */
770 			vm_object_pmap_copy(object, foff, foff + size);
771 			vm_object_deallocate(object);
772 			vm_map_deallocate(tmap);
773 			if (rv != KERN_SUCCESS)
774 				goto out;
775 		}
776 #ifdef DEBUG
777 		if (mmapdebug & MDB_MAPIT)
778 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
779 			       curproc->p_pid, *addr, size, pager);
780 #endif
781 	}
782 	/*
783 	 * Correct protection (default is VM_PROT_ALL).
784 	 * If maxprot is different than prot, we must set both explicitly.
785 	 */
786 	rv = KERN_SUCCESS;
787 	if (maxprot != VM_PROT_ALL)
788 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
789 	if (rv == KERN_SUCCESS && prot != maxprot)
790 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
791 	if (rv != KERN_SUCCESS) {
792 		(void) vm_deallocate(map, *addr, size);
793 		goto out;
794 	}
795 	/*
796 	 * Shared memory is also shared with children.
797 	 */
798 	if (flags & MAP_SHARED) {
799 		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
800 		if (rv != KERN_SUCCESS) {
801 			(void) vm_deallocate(map, *addr, size);
802 			goto out;
803 		}
804 	}
805 out:
806 #ifdef DEBUG
807 	if (mmapdebug & MDB_MAPIT)
808 		printf("vm_mmap: rv %d\n", rv);
809 #endif
810 	switch (rv) {
811 	case KERN_SUCCESS:
812 		return (0);
813 	case KERN_INVALID_ADDRESS:
814 	case KERN_NO_SPACE:
815 		return (ENOMEM);
816 	case KERN_PROTECTION_FAILURE:
817 		return (EACCES);
818 	default:
819 		return (EINVAL);
820 	}
821 }
822