xref: /original-bsd/sys/vm/vm_mmap.c (revision 333da485)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
13  *
14  *	@(#)vm_mmap.c	8.4 (Berkeley) 01/12/94
15  */
16 
17 /*
18  * Mapped file (mmap) interface to VM
19  */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/filedesc.h>
24 #include <sys/resourcevar.h>
25 #include <sys/proc.h>
26 #include <sys/vnode.h>
27 #include <sys/file.h>
28 #include <sys/mman.h>
29 #include <sys/conf.h>
30 
31 #include <miscfs/specfs/specdev.h>
32 
33 #include <vm/vm.h>
34 #include <vm/vm_pager.h>
35 #include <vm/vm_prot.h>
36 
37 #ifdef DEBUG
38 int mmapdebug = 0;
39 #define MDB_FOLLOW	0x01
40 #define MDB_SYNC	0x02
41 #define MDB_MAPIT	0x04
42 #endif
43 
44 struct sbrk_args {
45 	int	incr;
46 };
47 /* ARGSUSED */
48 int
49 sbrk(p, uap, retval)
50 	struct proc *p;
51 	struct sbrk_args *uap;
52 	int *retval;
53 {
54 
55 	/* Not yet implemented */
56 	return (EOPNOTSUPP);
57 }
58 
59 struct sstk_args {
60 	int	incr;
61 };
62 /* ARGSUSED */
63 int
64 sstk(p, uap, retval)
65 	struct proc *p;
66 	struct sstk_args *uap;
67 	int *retval;
68 {
69 
70 	/* Not yet implemented */
71 	return (EOPNOTSUPP);
72 }
73 
74 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
75 struct getpagesize_args {
76 	int	dummy;
77 };
78 /* ARGSUSED */
79 int
80 ogetpagesize(p, uap, retval)
81 	struct proc *p;
82 	struct getpagesize_args *uap;
83 	int *retval;
84 {
85 
86 	*retval = PAGE_SIZE;
87 	return (0);
88 }
89 #endif /* COMPAT_43 || COMPAT_SUNOS */
90 
91 struct mmap_args {
92 	caddr_t	addr;
93 	size_t	len;
94 	int	prot;
95 	int	flags;
96 	int	fd;
97 	long	pad;
98 	off_t	pos;
99 };
100 
101 #ifdef COMPAT_43
102 struct ommap_args {
103 	caddr_t	addr;
104 	int	len;
105 	int	prot;
106 	int	flags;
107 	int	fd;
108 	long	pos;
109 };
110 int
111 ommap(p, uap, retval)
112 	struct proc *p;
113 	register struct ommap_args *uap;
114 	int *retval;
115 {
116 	struct mmap_args nargs;
117 	static const char cvtbsdprot[8] = {
118 		0,
119 		PROT_EXEC,
120 		PROT_WRITE,
121 		PROT_EXEC|PROT_WRITE,
122 		PROT_READ,
123 		PROT_EXEC|PROT_READ,
124 		PROT_WRITE|PROT_READ,
125 		PROT_EXEC|PROT_WRITE|PROT_READ,
126 	};
127 #define	OMAP_ANON	0x0002
128 #define	OMAP_COPY	0x0020
129 #define	OMAP_SHARED	0x0010
130 #define	OMAP_FIXED	0x0100
131 #define	OMAP_INHERIT	0x0800
132 
133 	nargs.addr = uap->addr;
134 	nargs.len = uap->len;
135 	nargs.prot = cvtbsdprot[uap->prot&0x7];
136 	nargs.flags = 0;
137 	if (uap->flags & OMAP_ANON)
138 		nargs.flags |= MAP_ANON;
139 	if (uap->flags & OMAP_COPY)
140 		nargs.flags |= MAP_COPY;
141 	if (uap->flags & OMAP_SHARED)
142 		nargs.flags |= MAP_SHARED;
143 	else
144 		nargs.flags |= MAP_PRIVATE;
145 	if (uap->flags & OMAP_FIXED)
146 		nargs.flags |= MAP_FIXED;
147 	if (uap->flags & OMAP_INHERIT)
148 		nargs.flags |= MAP_INHERIT;
149 	nargs.fd = uap->fd;
150 	nargs.pos = uap->pos;
151 	return (mmap(p, &nargs, retval));
152 }
153 #endif
154 
155 int
156 mmap(p, uap, retval)
157 	struct proc *p;
158 	register struct mmap_args *uap;
159 	int *retval;
160 {
161 	register struct filedesc *fdp = p->p_fd;
162 	register struct file *fp;
163 	struct vnode *vp;
164 	vm_offset_t addr;
165 	vm_size_t size;
166 	vm_prot_t prot, maxprot;
167 	caddr_t handle;
168 	int flags, error;
169 
170 	prot = uap->prot & VM_PROT_ALL;
171 	flags = uap->flags;
172 #ifdef DEBUG
173 	if (mmapdebug & MDB_FOLLOW)
174 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
175 		       p->p_pid, uap->addr, uap->len, prot,
176 		       flags, uap->fd, (vm_offset_t)uap->pos);
177 #endif
178 	/*
179 	 * Address (if FIXED) must be page aligned.
180 	 * Size is implicitly rounded to a page boundary.
181 	 */
182 	addr = (vm_offset_t) uap->addr;
183 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
184 	    (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
185 		return (EINVAL);
186 	size = (vm_size_t) round_page(uap->len);
187 	/*
188 	 * Check for illegal addresses.  Watch out for address wrap...
189 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
190 	 */
191 	if (flags & MAP_FIXED) {
192 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
193 			return (EINVAL);
194 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
195 			return (EINVAL);
196 		if (addr > addr + size)
197 			return (EINVAL);
198 	}
199 	/*
200 	 * XXX if no hint provided for a non-fixed mapping place it after
201 	 * the end of the largest possible heap.
202 	 *
203 	 * There should really be a pmap call to determine a reasonable
204 	 * location.
205 	 */
206 	if (addr == 0 && (flags & MAP_FIXED) == 0)
207 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
208 	if (flags & MAP_ANON) {
209 		/*
210 		 * Mapping blank space is trivial.
211 		 */
212 		handle = NULL;
213 		maxprot = VM_PROT_ALL;
214 	} else {
215 		/*
216 		 * Mapping file, get fp for validation.
217 		 * Obtain vnode and make sure it is of appropriate type.
218 		 */
219 		if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
220 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
221 			return (EBADF);
222 		if (fp->f_type != DTYPE_VNODE)
223 			return (EINVAL);
224 		vp = (struct vnode *)fp->f_data;
225 		if (vp->v_type != VREG && vp->v_type != VCHR)
226 			return (EINVAL);
227 		/*
228 		 * XXX hack to handle use of /dev/zero to map anon
229 		 * memory (ala SunOS).
230 		 */
231 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
232 			handle = NULL;
233 			maxprot = VM_PROT_ALL;
234 			flags |= MAP_ANON;
235 		} else {
236 			/*
237 			 * Ensure that file and memory protections are
238 			 * compatible.  Note that we only worry about
239 			 * writability if mapping is shared; in this case,
240 			 * current and max prot are dictated by the open file.
241 			 * XXX use the vnode instead?  Problem is: what
242 			 * credentials do we use for determination?
243 			 * What if proc does a setuid?
244 			 */
245 			maxprot = VM_PROT_EXECUTE;	/* ??? */
246 			if (fp->f_flag & FREAD)
247 				maxprot |= VM_PROT_READ;
248 			else if (prot & PROT_READ)
249 				return (EACCES);
250 			if (flags & MAP_SHARED) {
251 				if (fp->f_flag & FWRITE)
252 					maxprot |= VM_PROT_WRITE;
253 				else if (prot & PROT_WRITE)
254 					return (EACCES);
255 			} else
256 				maxprot |= VM_PROT_WRITE;
257 			handle = (caddr_t)vp;
258 		}
259 	}
260 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
261 	    flags, handle, (vm_offset_t)uap->pos);
262 	if (error == 0)
263 		*retval = (int)addr;
264 	return (error);
265 }
266 
267 struct msync_args {
268 	caddr_t	addr;
269 	int	len;
270 };
271 int
272 msync(p, uap, retval)
273 	struct proc *p;
274 	struct msync_args *uap;
275 	int *retval;
276 {
277 	vm_offset_t addr;
278 	vm_size_t size;
279 	vm_map_t map;
280 	int rv;
281 	boolean_t syncio, invalidate;
282 
283 #ifdef DEBUG
284 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
285 		printf("msync(%d): addr %x len %x\n",
286 		       p->p_pid, uap->addr, uap->len);
287 #endif
288 	if (((int)uap->addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
289 		return (EINVAL);
290 	map = &p->p_vmspace->vm_map;
291 	addr = (vm_offset_t)uap->addr;
292 	size = (vm_size_t)uap->len;
293 	/*
294 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
295 	 * pages with the region containing addr".  Unfortunately, we
296 	 * don't really keep track of individual mmaps so we approximate
297 	 * by flushing the range of the map entry containing addr.
298 	 * This can be incorrect if the region splits or is coalesced
299 	 * with a neighbor.
300 	 */
301 	if (size == 0) {
302 		vm_map_entry_t entry;
303 
304 		vm_map_lock_read(map);
305 		rv = vm_map_lookup_entry(map, addr, &entry);
306 		vm_map_unlock_read(map);
307 		if (rv)
308 			return (EINVAL);
309 		addr = entry->start;
310 		size = entry->end - entry->start;
311 	}
312 #ifdef DEBUG
313 	if (mmapdebug & MDB_SYNC)
314 		printf("msync: cleaning/flushing address range [%x-%x)\n",
315 		       addr, addr+size);
316 #endif
317 	/*
318 	 * Could pass this in as a third flag argument to implement
319 	 * Sun's MS_ASYNC.
320 	 */
321 	syncio = TRUE;
322 	/*
323 	 * XXX bummer, gotta flush all cached pages to ensure
324 	 * consistency with the file system cache.  Otherwise, we could
325 	 * pass this in to implement Sun's MS_INVALIDATE.
326 	 */
327 	invalidate = TRUE;
328 	/*
329 	 * Clean the pages and interpret the return value.
330 	 */
331 	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
332 	switch (rv) {
333 	case KERN_SUCCESS:
334 		break;
335 	case KERN_INVALID_ADDRESS:
336 		return (EINVAL);	/* Sun returns ENOMEM? */
337 	case KERN_FAILURE:
338 		return (EIO);
339 	default:
340 		return (EINVAL);
341 	}
342 	return (0);
343 }
344 
345 struct munmap_args {
346 	caddr_t	addr;
347 	int	len;
348 };
349 int
350 munmap(p, uap, retval)
351 	register struct proc *p;
352 	register struct munmap_args *uap;
353 	int *retval;
354 {
355 	vm_offset_t addr;
356 	vm_size_t size;
357 	vm_map_t map;
358 
359 #ifdef DEBUG
360 	if (mmapdebug & MDB_FOLLOW)
361 		printf("munmap(%d): addr %x len %x\n",
362 		       p->p_pid, uap->addr, uap->len);
363 #endif
364 
365 	addr = (vm_offset_t) uap->addr;
366 	if ((addr & PAGE_MASK) || uap->len < 0)
367 		return(EINVAL);
368 	size = (vm_size_t) round_page(uap->len);
369 	if (size == 0)
370 		return(0);
371 	/*
372 	 * Check for illegal addresses.  Watch out for address wrap...
373 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
374 	 */
375 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
376 		return (EINVAL);
377 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
378 		return (EINVAL);
379 	if (addr > addr + size)
380 		return (EINVAL);
381 	map = &p->p_vmspace->vm_map;
382 	/*
383 	 * Make sure entire range is allocated.
384 	 */
385 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
386 		return(EINVAL);
387 	/* returns nothing but KERN_SUCCESS anyway */
388 	(void) vm_map_remove(map, addr, addr+size);
389 	return(0);
390 }
391 
392 void
393 munmapfd(fd)
394 	int fd;
395 {
396 #ifdef DEBUG
397 	if (mmapdebug & MDB_FOLLOW)
398 		printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
399 #endif
400 
401 	/*
402 	 * XXX should vm_deallocate any regions mapped to this file
403 	 */
404 	curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
405 }
406 
407 struct mprotect_args {
408 	caddr_t	addr;
409 	int	len;
410 	int	prot;
411 };
412 int
413 mprotect(p, uap, retval)
414 	struct proc *p;
415 	struct mprotect_args *uap;
416 	int *retval;
417 {
418 	vm_offset_t addr;
419 	vm_size_t size;
420 	register vm_prot_t prot;
421 
422 #ifdef DEBUG
423 	if (mmapdebug & MDB_FOLLOW)
424 		printf("mprotect(%d): addr %x len %x prot %d\n",
425 		       p->p_pid, uap->addr, uap->len, uap->prot);
426 #endif
427 
428 	addr = (vm_offset_t)uap->addr;
429 	if ((addr & PAGE_MASK) || uap->len < 0)
430 		return(EINVAL);
431 	size = (vm_size_t)uap->len;
432 	prot = uap->prot & VM_PROT_ALL;
433 
434 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
435 	    FALSE)) {
436 	case KERN_SUCCESS:
437 		return (0);
438 	case KERN_PROTECTION_FAILURE:
439 		return (EACCES);
440 	}
441 	return (EINVAL);
442 }
443 
444 struct madvise_args {
445 	caddr_t	addr;
446 	int	len;
447 	int	behav;
448 };
449 /* ARGSUSED */
450 int
451 madvise(p, uap, retval)
452 	struct proc *p;
453 	struct madvise_args *uap;
454 	int *retval;
455 {
456 
457 	/* Not yet implemented */
458 	return (EOPNOTSUPP);
459 }
460 
461 struct mincore_args {
462 	caddr_t	addr;
463 	int	len;
464 	char	*vec;
465 };
466 /* ARGSUSED */
467 int
468 mincore(p, uap, retval)
469 	struct proc *p;
470 	struct mincore_args *uap;
471 	int *retval;
472 {
473 
474 	/* Not yet implemented */
475 	return (EOPNOTSUPP);
476 }
477 
478 struct mlock_args {
479 	caddr_t	addr;
480 	size_t	len;
481 };
482 int
483 mlock(p, uap, retval)
484 	struct proc *p;
485 	struct mlock_args *uap;
486 	int *retval;
487 {
488 	vm_offset_t addr;
489 	vm_size_t size;
490 	int error;
491 	extern int vm_page_max_wired;
492 
493 #ifdef DEBUG
494 	if (mmapdebug & MDB_FOLLOW)
495 		printf("mlock(%d): addr %x len %x\n",
496 		       p->p_pid, uap->addr, uap->len);
497 #endif
498 	addr = (vm_offset_t)uap->addr;
499 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
500 		return (EINVAL);
501 	size = round_page((vm_size_t)uap->len);
502 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
503 		return (EAGAIN);
504 #ifdef pmap_wired_count
505 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
506 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
507 		return (EAGAIN);
508 #else
509 	if (error = suser(p->p_ucred, &p->p_acflag))
510 		return (error);
511 #endif
512 
513 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
514 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
515 }
516 
517 struct munlock_args {
518 	caddr_t	addr;
519 	size_t	len;
520 };
521 int
522 munlock(p, uap, retval)
523 	struct proc *p;
524 	struct munlock_args *uap;
525 	int *retval;
526 {
527 	vm_offset_t addr;
528 	vm_size_t size;
529 	int error;
530 
531 #ifdef DEBUG
532 	if (mmapdebug & MDB_FOLLOW)
533 		printf("munlock(%d): addr %x len %x\n",
534 		       p->p_pid, uap->addr, uap->len);
535 #endif
536 	addr = (vm_offset_t)uap->addr;
537 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
538 		return (EINVAL);
539 #ifndef pmap_wired_count
540 	if (error = suser(p->p_ucred, &p->p_acflag))
541 		return (error);
542 #endif
543 	size = round_page((vm_size_t)uap->len);
544 
545 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
546 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
547 }
548 
549 /*
550  * Internal version of mmap.
551  * Currently used by mmap, exec, and sys5 shared memory.
552  * Handle is either a vnode pointer or NULL for MAP_ANON.
553  */
554 int
555 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
556 	register vm_map_t map;
557 	register vm_offset_t *addr;
558 	register vm_size_t size;
559 	vm_prot_t prot, maxprot;
560 	register int flags;
561 	caddr_t handle;		/* XXX should be vp */
562 	vm_offset_t foff;
563 {
564 	register vm_pager_t pager;
565 	boolean_t fitit;
566 	vm_object_t object;
567 	struct vnode *vp = NULL;
568 	int type;
569 	int rv = KERN_SUCCESS;
570 
571 	if (size == 0)
572 		return (0);
573 
574 	if ((flags & MAP_FIXED) == 0) {
575 		fitit = TRUE;
576 		*addr = round_page(*addr);
577 	} else {
578 		fitit = FALSE;
579 		(void)vm_deallocate(map, *addr, size);
580 	}
581 
582 	/*
583 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
584 	 * gain a reference to ensure continued existance of the object.
585 	 * (XXX the exception is to appease the pageout daemon)
586 	 */
587 	if (flags & MAP_ANON)
588 		type = PG_DFLT;
589 	else {
590 		vp = (struct vnode *)handle;
591 		if (vp->v_type == VCHR) {
592 			type = PG_DEVICE;
593 			handle = (caddr_t)vp->v_rdev;
594 		} else
595 			type = PG_VNODE;
596 	}
597 	pager = vm_pager_allocate(type, handle, size, prot, foff);
598 	if (pager == NULL)
599 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
600 	/*
601 	 * Find object and release extra reference gained by lookup
602 	 */
603 	object = vm_object_lookup(pager);
604 	vm_object_deallocate(object);
605 
606 	/*
607 	 * Anonymous memory.
608 	 */
609 	if (flags & MAP_ANON) {
610 		rv = vm_allocate_with_pager(map, addr, size, fitit,
611 					    pager, foff, TRUE);
612 		if (rv != KERN_SUCCESS) {
613 			if (handle == NULL)
614 				vm_pager_deallocate(pager);
615 			else
616 				vm_object_deallocate(object);
617 			goto out;
618 		}
619 		/*
620 		 * Don't cache anonymous objects.
621 		 * Loses the reference gained by vm_pager_allocate.
622 		 * Note that object will be NULL when handle == NULL,
623 		 * this is ok since vm_allocate_with_pager has made
624 		 * sure that these objects are uncached.
625 		 */
626 		(void) pager_cache(object, FALSE);
627 #ifdef DEBUG
628 		if (mmapdebug & MDB_MAPIT)
629 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
630 			       curproc->p_pid, *addr, size, pager);
631 #endif
632 	}
633 	/*
634 	 * Must be a mapped file.
635 	 * Distinguish between character special and regular files.
636 	 */
637 	else if (vp->v_type == VCHR) {
638 		rv = vm_allocate_with_pager(map, addr, size, fitit,
639 					    pager, foff, FALSE);
640 		/*
641 		 * Uncache the object and lose the reference gained
642 		 * by vm_pager_allocate().  If the call to
643 		 * vm_allocate_with_pager() was sucessful, then we
644 		 * gained an additional reference ensuring the object
645 		 * will continue to exist.  If the call failed then
646 		 * the deallocate call below will terminate the
647 		 * object which is fine.
648 		 */
649 		(void) pager_cache(object, FALSE);
650 		if (rv != KERN_SUCCESS)
651 			goto out;
652 	}
653 	/*
654 	 * A regular file
655 	 */
656 	else {
657 #ifdef DEBUG
658 		if (object == NULL)
659 			printf("vm_mmap: no object: vp %x, pager %x\n",
660 			       vp, pager);
661 #endif
662 		/*
663 		 * Map it directly.
664 		 * Allows modifications to go out to the vnode.
665 		 */
666 		if (flags & MAP_SHARED) {
667 			rv = vm_allocate_with_pager(map, addr, size,
668 						    fitit, pager,
669 						    foff, FALSE);
670 			if (rv != KERN_SUCCESS) {
671 				vm_object_deallocate(object);
672 				goto out;
673 			}
674 			/*
675 			 * Don't cache the object.  This is the easiest way
676 			 * of ensuring that data gets back to the filesystem
677 			 * because vnode_pager_deallocate() will fsync the
678 			 * vnode.  pager_cache() will lose the extra ref.
679 			 */
680 			if (prot & VM_PROT_WRITE)
681 				pager_cache(object, FALSE);
682 			else
683 				vm_object_deallocate(object);
684 		}
685 		/*
686 		 * Copy-on-write of file.  Two flavors.
687 		 * MAP_COPY is true COW, you essentially get a snapshot of
688 		 * the region at the time of mapping.  MAP_PRIVATE means only
689 		 * that your changes are not reflected back to the object.
690 		 * Changes made by others will be seen.
691 		 */
692 		else {
693 			vm_map_t tmap;
694 			vm_offset_t off;
695 
696 			/* locate and allocate the target address space */
697 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
698 					 addr, size, fitit);
699 			if (rv != KERN_SUCCESS) {
700 				vm_object_deallocate(object);
701 				goto out;
702 			}
703 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
704 					     VM_MIN_ADDRESS+size, TRUE);
705 			off = VM_MIN_ADDRESS;
706 			rv = vm_allocate_with_pager(tmap, &off, size,
707 						    TRUE, pager,
708 						    foff, FALSE);
709 			if (rv != KERN_SUCCESS) {
710 				vm_object_deallocate(object);
711 				vm_map_deallocate(tmap);
712 				goto out;
713 			}
714 			/*
715 			 * (XXX)
716 			 * MAP_PRIVATE implies that we see changes made by
717 			 * others.  To ensure that we need to guarentee that
718 			 * no copy object is created (otherwise original
719 			 * pages would be pushed to the copy object and we
720 			 * would never see changes made by others).  We
721 			 * totally sleeze it right now by marking the object
722 			 * internal temporarily.
723 			 */
724 			if ((flags & MAP_COPY) == 0)
725 				object->flags |= OBJ_INTERNAL;
726 			rv = vm_map_copy(map, tmap, *addr, size, off,
727 					 FALSE, FALSE);
728 			object->flags &= ~OBJ_INTERNAL;
729 			/*
730 			 * (XXX)
731 			 * My oh my, this only gets worse...
732 			 * Force creation of a shadow object so that
733 			 * vm_map_fork will do the right thing.
734 			 */
735 			if ((flags & MAP_COPY) == 0) {
736 				vm_map_t tmap;
737 				vm_map_entry_t tentry;
738 				vm_object_t tobject;
739 				vm_offset_t toffset;
740 				vm_prot_t tprot;
741 				boolean_t twired, tsu;
742 
743 				tmap = map;
744 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
745 					      &tentry, &tobject, &toffset,
746 					      &tprot, &twired, &tsu);
747 				vm_map_lookup_done(tmap, tentry);
748 			}
749 			/*
750 			 * (XXX)
751 			 * Map copy code cannot detect sharing unless a
752 			 * sharing map is involved.  So we cheat and write
753 			 * protect everything ourselves.
754 			 */
755 			vm_object_pmap_copy(object, foff, foff + size);
756 			vm_object_deallocate(object);
757 			vm_map_deallocate(tmap);
758 			if (rv != KERN_SUCCESS)
759 				goto out;
760 		}
761 #ifdef DEBUG
762 		if (mmapdebug & MDB_MAPIT)
763 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
764 			       curproc->p_pid, *addr, size, pager);
765 #endif
766 	}
767 	/*
768 	 * Correct protection (default is VM_PROT_ALL).
769 	 * If maxprot is different than prot, we must set both explicitly.
770 	 */
771 	rv = KERN_SUCCESS;
772 	if (maxprot != VM_PROT_ALL)
773 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
774 	if (rv == KERN_SUCCESS && prot != maxprot)
775 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
776 	if (rv != KERN_SUCCESS) {
777 		(void) vm_deallocate(map, *addr, size);
778 		goto out;
779 	}
780 	/*
781 	 * Shared memory is also shared with children.
782 	 */
783 	if (flags & MAP_SHARED) {
784 		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
785 		if (rv != KERN_SUCCESS) {
786 			(void) vm_deallocate(map, *addr, size);
787 			goto out;
788 		}
789 	}
790 out:
791 #ifdef DEBUG
792 	if (mmapdebug & MDB_MAPIT)
793 		printf("vm_mmap: rv %d\n", rv);
794 #endif
795 	switch (rv) {
796 	case KERN_SUCCESS:
797 		return (0);
798 	case KERN_INVALID_ADDRESS:
799 	case KERN_NO_SPACE:
800 		return (ENOMEM);
801 	case KERN_PROTECTION_FAILURE:
802 		return (EACCES);
803 	default:
804 		return (EINVAL);
805 	}
806 }
807