xref: /original-bsd/sys/vm/vm_mmap.c (revision 95ecee29)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
13  *
14  *	@(#)vm_mmap.c	8.3 (Berkeley) 11/14/93
15  */
16 
17 /*
18  * Mapped file (mmap) interface to VM
19  */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/filedesc.h>
24 #include <sys/resourcevar.h>
25 #include <sys/proc.h>
26 #include <sys/vnode.h>
27 #include <sys/file.h>
28 #include <sys/mman.h>
29 #include <sys/conf.h>
30 
31 #include <miscfs/specfs/specdev.h>
32 
33 #include <vm/vm.h>
34 #include <vm/vm_pager.h>
35 #include <vm/vm_prot.h>
36 
37 #ifdef DEBUG
38 int mmapdebug = 0;
39 #define MDB_FOLLOW	0x01
40 #define MDB_SYNC	0x02
41 #define MDB_MAPIT	0x04
42 #endif
43 
44 struct sbrk_args {
45 	int	incr;
46 };
47 /* ARGSUSED */
48 int
49 sbrk(p, uap, retval)
50 	struct proc *p;
51 	struct sbrk_args *uap;
52 	int *retval;
53 {
54 
55 	/* Not yet implemented */
56 	return (EOPNOTSUPP);
57 }
58 
59 struct sstk_args {
60 	int	incr;
61 };
62 /* ARGSUSED */
63 int
64 sstk(p, uap, retval)
65 	struct proc *p;
66 	struct sstk_args *uap;
67 	int *retval;
68 {
69 
70 	/* Not yet implemented */
71 	return (EOPNOTSUPP);
72 }
73 
74 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
75 struct getpagesize_args {
76 	int	dummy;
77 };
78 /* ARGSUSED */
79 int
80 ogetpagesize(p, uap, retval)
81 	struct proc *p;
82 	struct getpagesize_args *uap;
83 	int *retval;
84 {
85 
86 	*retval = PAGE_SIZE;
87 	return (0);
88 }
89 #endif /* COMPAT_43 || COMPAT_SUNOS */
90 
91 struct mmap_args {
92 	caddr_t	addr;
93 	size_t	len;
94 	int	prot;
95 	int	flags;
96 	int	fd;
97 	long	pad;
98 	off_t	pos;
99 };
100 
101 #ifdef COMPAT_43
102 struct ommap_args {
103 	caddr_t	addr;
104 	int	len;
105 	int	prot;
106 	int	flags;
107 	int	fd;
108 	long	pos;
109 };
110 int
111 ommap(p, uap, retval)
112 	struct proc *p;
113 	register struct ommap_args *uap;
114 	int *retval;
115 {
116 	struct mmap_args nargs;
117 	static const char cvtbsdprot[8] = {
118 		0,
119 		PROT_EXEC,
120 		PROT_WRITE,
121 		PROT_EXEC|PROT_WRITE,
122 		PROT_READ,
123 		PROT_EXEC|PROT_READ,
124 		PROT_WRITE|PROT_READ,
125 		PROT_EXEC|PROT_WRITE|PROT_READ,
126 	};
127 #define	OMAP_ANON	0x0002
128 #define	OMAP_COPY	0x0020
129 #define	OMAP_SHARED	0x0010
130 #define	OMAP_FIXED	0x0100
131 #define	OMAP_INHERIT	0x0800
132 
133 	nargs.addr = uap->addr;
134 	nargs.len = uap->len;
135 	nargs.prot = cvtbsdprot[uap->prot&0x7];
136 	nargs.flags = 0;
137 	if (uap->flags & OMAP_ANON)
138 		nargs.flags |= MAP_ANON;
139 	if (uap->flags & OMAP_COPY)
140 		nargs.flags |= MAP_COPY;
141 	if (uap->flags & OMAP_SHARED)
142 		nargs.flags |= MAP_SHARED;
143 	else
144 		nargs.flags |= MAP_PRIVATE;
145 	if (uap->flags & OMAP_FIXED)
146 		nargs.flags |= MAP_FIXED;
147 	if (uap->flags & OMAP_INHERIT)
148 		nargs.flags |= MAP_INHERIT;
149 	nargs.fd = uap->fd;
150 	nargs.pos = uap->pos;
151 	return (mmap(p, &nargs, retval));
152 }
153 #endif
154 
155 int
156 mmap(p, uap, retval)
157 	struct proc *p;
158 	register struct mmap_args *uap;
159 	int *retval;
160 {
161 	register struct filedesc *fdp = p->p_fd;
162 	register struct file *fp;
163 	struct vnode *vp;
164 	vm_offset_t addr;
165 	vm_size_t size;
166 	vm_prot_t prot, maxprot;
167 	caddr_t handle;
168 	int flags, error;
169 
170 	prot = uap->prot & VM_PROT_ALL;
171 	flags = uap->flags;
172 #ifdef DEBUG
173 	if (mmapdebug & MDB_FOLLOW)
174 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
175 		       p->p_pid, uap->addr, uap->len, prot,
176 		       flags, uap->fd, (vm_offset_t)uap->pos);
177 #endif
178 	/*
179 	 * Address (if FIXED) must be page aligned.
180 	 * Size is implicitly rounded to a page boundary.
181 	 */
182 	addr = (vm_offset_t) uap->addr;
183 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
184 	    (ssize_t)uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
185 		return (EINVAL);
186 	size = (vm_size_t) round_page(uap->len);
187 	/*
188 	 * Check for illegal addresses.  Watch out for address wrap...
189 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
190 	 */
191 	if (flags & MAP_FIXED) {
192 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
193 			return (EINVAL);
194 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
195 			return (EINVAL);
196 		if (addr > addr + size)
197 			return (EINVAL);
198 	}
199 	/*
200 	 * XXX if no hint provided for a non-fixed mapping place it after
201 	 * the end of the largest possible heap.
202 	 *
203 	 * There should really be a pmap call to determine a reasonable
204 	 * location.
205 	 */
206 	if (addr == 0 && (flags & MAP_FIXED) == 0)
207 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
208 	if (flags & MAP_ANON) {
209 		/*
210 		 * Mapping blank space is trivial.
211 		 */
212 		handle = NULL;
213 		maxprot = VM_PROT_ALL;
214 	} else {
215 		/*
216 		 * Mapping file, get fp for validation.
217 		 * Obtain vnode and make sure it is of appropriate type.
218 		 */
219 		if (((unsigned)uap->fd) >= fdp->fd_nfiles ||
220 		    (fp = fdp->fd_ofiles[uap->fd]) == NULL)
221 			return (EBADF);
222 		if (fp->f_type != DTYPE_VNODE)
223 			return (EINVAL);
224 		vp = (struct vnode *)fp->f_data;
225 		if (vp->v_type != VREG && vp->v_type != VCHR)
226 			return (EINVAL);
227 		/*
228 		 * XXX hack to handle use of /dev/zero to map anon
229 		 * memory (ala SunOS).
230 		 */
231 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
232 			handle = NULL;
233 			maxprot = VM_PROT_ALL;
234 			flags |= MAP_ANON;
235 		} else {
236 			/*
237 			 * Ensure that file and memory protections are
238 			 * compatible.  Note that we only worry about
239 			 * writability if mapping is shared; in this case,
240 			 * current and max prot are dictated by the open file.
241 			 * XXX use the vnode instead?  Problem is: what
242 			 * credentials do we use for determination?
243 			 * What if proc does a setuid?
244 			 */
245 			maxprot = VM_PROT_EXECUTE;	/* ??? */
246 			if (fp->f_flag & FREAD)
247 				maxprot |= VM_PROT_READ;
248 			else if (prot & PROT_READ)
249 				return (EACCES);
250 			if (flags & MAP_SHARED) {
251 				if (fp->f_flag & FWRITE)
252 					maxprot |= VM_PROT_WRITE;
253 				else if (prot & PROT_WRITE)
254 					return (EACCES);
255 			} else
256 				maxprot |= VM_PROT_WRITE;
257 			handle = (caddr_t)vp;
258 		}
259 	}
260 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
261 	    flags, handle, (vm_offset_t)uap->pos);
262 	if (error == 0)
263 		*retval = (int)addr;
264 	return (error);
265 }
266 
267 struct msync_args {
268 	caddr_t	addr;
269 	int	len;
270 };
271 int
272 msync(p, uap, retval)
273 	struct proc *p;
274 	struct msync_args *uap;
275 	int *retval;
276 {
277 	vm_offset_t addr, objoff, oaddr;
278 	vm_size_t size, osize;
279 	vm_prot_t prot, mprot;
280 	vm_inherit_t inherit;
281 	vm_object_t object;
282 	boolean_t shared;
283 	int rv;
284 
285 #ifdef DEBUG
286 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
287 		printf("msync(%d): addr %x len %x\n",
288 		       p->p_pid, uap->addr, uap->len);
289 #endif
290 	if (((int)uap->addr & PAGE_MASK) || uap->len < 0)
291 		return(EINVAL);
292 	addr = oaddr = (vm_offset_t)uap->addr;
293 	osize = (vm_size_t)uap->len;
294 	/*
295 	 * Region must be entirely contained in a single entry
296 	 */
297 	if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr+osize,
298 	    TRUE))
299 		return(EINVAL);
300 	/*
301 	 * Determine the object associated with that entry
302 	 * (object is returned locked on KERN_SUCCESS)
303 	 */
304 	rv = vm_region(&p->p_vmspace->vm_map, &addr, &size, &prot, &mprot,
305 		       &inherit, &shared, &object, &objoff);
306 	if (rv != KERN_SUCCESS)
307 		return(EINVAL);
308 #ifdef DEBUG
309 	if (mmapdebug & MDB_SYNC)
310 		printf("msync: region: object %x addr %x size %d objoff %d\n",
311 		       object, addr, size, objoff);
312 #endif
313 	/*
314 	 * Do not msync non-vnoded backed objects.
315 	 */
316 	if ((object->flags & OBJ_INTERNAL) || object->pager == NULL ||
317 	    object->pager->pg_type != PG_VNODE) {
318 		vm_object_unlock(object);
319 		return(EINVAL);
320 	}
321 	objoff += oaddr - addr;
322 	if (osize == 0)
323 		osize = size;
324 #ifdef DEBUG
325 	if (mmapdebug & MDB_SYNC)
326 		printf("msync: cleaning/flushing object range [%x-%x)\n",
327 		       objoff, objoff+osize);
328 #endif
329 	if (prot & VM_PROT_WRITE)
330 		vm_object_page_clean(object, objoff, objoff+osize, FALSE);
331 	/*
332 	 * (XXX)
333 	 * Bummer, gotta flush all cached pages to ensure
334 	 * consistency with the file system cache.
335 	 */
336 	vm_object_page_remove(object, objoff, objoff+osize);
337 	vm_object_unlock(object);
338 	return(0);
339 }
340 
341 struct munmap_args {
342 	caddr_t	addr;
343 	int	len;
344 };
345 int
346 munmap(p, uap, retval)
347 	register struct proc *p;
348 	register struct munmap_args *uap;
349 	int *retval;
350 {
351 	vm_offset_t addr;
352 	vm_size_t size;
353 
354 #ifdef DEBUG
355 	if (mmapdebug & MDB_FOLLOW)
356 		printf("munmap(%d): addr %x len %x\n",
357 		       p->p_pid, uap->addr, uap->len);
358 #endif
359 
360 	addr = (vm_offset_t) uap->addr;
361 	if ((addr & PAGE_MASK) || uap->len < 0)
362 		return(EINVAL);
363 	size = (vm_size_t) round_page(uap->len);
364 	if (size == 0)
365 		return(0);
366 	/*
367 	 * Check for illegal addresses.  Watch out for address wrap...
368 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
369 	 */
370 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
371 		return (EINVAL);
372 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
373 		return (EINVAL);
374 	if (addr > addr + size)
375 		return (EINVAL);
376 	if (!vm_map_is_allocated(&p->p_vmspace->vm_map, addr, addr + size,
377 	    FALSE))
378 		return(EINVAL);
379 	/* returns nothing but KERN_SUCCESS anyway */
380 	(void) vm_map_remove(&p->p_vmspace->vm_map, addr, addr+size);
381 	return(0);
382 }
383 
384 void
385 munmapfd(fd)
386 	int fd;
387 {
388 #ifdef DEBUG
389 	if (mmapdebug & MDB_FOLLOW)
390 		printf("munmapfd(%d): fd %d\n", curproc->p_pid, fd);
391 #endif
392 
393 	/*
394 	 * XXX -- should vm_deallocate any regions mapped to this file
395 	 */
396 	curproc->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
397 }
398 
399 struct mprotect_args {
400 	caddr_t	addr;
401 	int	len;
402 	int	prot;
403 };
404 int
405 mprotect(p, uap, retval)
406 	struct proc *p;
407 	struct mprotect_args *uap;
408 	int *retval;
409 {
410 	vm_offset_t addr;
411 	vm_size_t size;
412 	register vm_prot_t prot;
413 
414 #ifdef DEBUG
415 	if (mmapdebug & MDB_FOLLOW)
416 		printf("mprotect(%d): addr %x len %x prot %d\n",
417 		       p->p_pid, uap->addr, uap->len, uap->prot);
418 #endif
419 
420 	addr = (vm_offset_t)uap->addr;
421 	if ((addr & PAGE_MASK) || uap->len < 0)
422 		return(EINVAL);
423 	size = (vm_size_t)uap->len;
424 	prot = uap->prot & VM_PROT_ALL;
425 
426 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
427 	    FALSE)) {
428 	case KERN_SUCCESS:
429 		return (0);
430 	case KERN_PROTECTION_FAILURE:
431 		return (EACCES);
432 	}
433 	return (EINVAL);
434 }
435 
436 struct madvise_args {
437 	caddr_t	addr;
438 	int	len;
439 	int	behav;
440 };
441 /* ARGSUSED */
442 int
443 madvise(p, uap, retval)
444 	struct proc *p;
445 	struct madvise_args *uap;
446 	int *retval;
447 {
448 
449 	/* Not yet implemented */
450 	return (EOPNOTSUPP);
451 }
452 
453 struct mincore_args {
454 	caddr_t	addr;
455 	int	len;
456 	char	*vec;
457 };
458 /* ARGSUSED */
459 int
460 mincore(p, uap, retval)
461 	struct proc *p;
462 	struct mincore_args *uap;
463 	int *retval;
464 {
465 
466 	/* Not yet implemented */
467 	return (EOPNOTSUPP);
468 }
469 
470 struct mlock_args {
471 	caddr_t	addr;
472 	size_t	len;
473 };
474 int
475 mlock(p, uap, retval)
476 	struct proc *p;
477 	struct mlock_args *uap;
478 	int *retval;
479 {
480 	vm_offset_t addr;
481 	vm_size_t size;
482 	int error;
483 	extern int vm_page_max_wired;
484 
485 #ifdef DEBUG
486 	if (mmapdebug & MDB_FOLLOW)
487 		printf("mlock(%d): addr %x len %x\n",
488 		       p->p_pid, uap->addr, uap->len);
489 #endif
490 	addr = (vm_offset_t)uap->addr;
491 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
492 		return (EINVAL);
493 	size = round_page((vm_size_t)uap->len);
494 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
495 		return (EAGAIN);
496 #ifdef pmap_wired_count
497 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
498 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
499 		return (EAGAIN);
500 #else
501 	if (error = suser(p->p_ucred, &p->p_acflag))
502 		return (error);
503 #endif
504 
505 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
506 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
507 }
508 
509 struct munlock_args {
510 	caddr_t	addr;
511 	size_t	len;
512 };
513 int
514 munlock(p, uap, retval)
515 	struct proc *p;
516 	struct munlock_args *uap;
517 	int *retval;
518 {
519 	vm_offset_t addr;
520 	vm_size_t size;
521 	int error;
522 
523 #ifdef DEBUG
524 	if (mmapdebug & MDB_FOLLOW)
525 		printf("munlock(%d): addr %x len %x\n",
526 		       p->p_pid, uap->addr, uap->len);
527 #endif
528 	addr = (vm_offset_t)uap->addr;
529 	if ((addr & PAGE_MASK) || uap->addr + uap->len < uap->addr)
530 		return (EINVAL);
531 #ifndef pmap_wired_count
532 	if (error = suser(p->p_ucred, &p->p_acflag))
533 		return (error);
534 #endif
535 	size = round_page((vm_size_t)uap->len);
536 
537 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
538 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
539 }
540 
541 /*
542  * Internal version of mmap.
543  * Currently used by mmap, exec, and sys5 shared memory.
544  * Handle is either a vnode pointer or NULL for MAP_ANON.
545  */
546 int
547 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
548 	register vm_map_t map;
549 	register vm_offset_t *addr;
550 	register vm_size_t size;
551 	vm_prot_t prot, maxprot;
552 	register int flags;
553 	caddr_t handle;		/* XXX should be vp */
554 	vm_offset_t foff;
555 {
556 	register vm_pager_t pager;
557 	boolean_t fitit;
558 	vm_object_t object;
559 	struct vnode *vp = NULL;
560 	int type;
561 	int rv = KERN_SUCCESS;
562 
563 	if (size == 0)
564 		return (0);
565 
566 	if ((flags & MAP_FIXED) == 0) {
567 		fitit = TRUE;
568 		*addr = round_page(*addr);
569 	} else {
570 		fitit = FALSE;
571 		(void)vm_deallocate(map, *addr, size);
572 	}
573 
574 	/*
575 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
576 	 * gain a reference to ensure continued existance of the object.
577 	 * (XXX the exception is to appease the pageout daemon)
578 	 */
579 	if (flags & MAP_ANON)
580 		type = PG_DFLT;
581 	else {
582 		vp = (struct vnode *)handle;
583 		if (vp->v_type == VCHR) {
584 			type = PG_DEVICE;
585 			handle = (caddr_t)vp->v_rdev;
586 		} else
587 			type = PG_VNODE;
588 	}
589 	pager = vm_pager_allocate(type, handle, size, prot, foff);
590 	if (pager == NULL)
591 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
592 	/*
593 	 * Find object and release extra reference gained by lookup
594 	 */
595 	object = vm_object_lookup(pager);
596 	vm_object_deallocate(object);
597 
598 	/*
599 	 * Anonymous memory.
600 	 */
601 	if (flags & MAP_ANON) {
602 		rv = vm_allocate_with_pager(map, addr, size, fitit,
603 					    pager, foff, TRUE);
604 		if (rv != KERN_SUCCESS) {
605 			if (handle == NULL)
606 				vm_pager_deallocate(pager);
607 			else
608 				vm_object_deallocate(object);
609 			goto out;
610 		}
611 		/*
612 		 * Don't cache anonymous objects.
613 		 * Loses the reference gained by vm_pager_allocate.
614 		 * Note that object will be NULL when handle == NULL,
615 		 * this is ok since vm_allocate_with_pager has made
616 		 * sure that these objects are uncached.
617 		 */
618 		(void) pager_cache(object, FALSE);
619 #ifdef DEBUG
620 		if (mmapdebug & MDB_MAPIT)
621 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
622 			       curproc->p_pid, *addr, size, pager);
623 #endif
624 	}
625 	/*
626 	 * Must be a mapped file.
627 	 * Distinguish between character special and regular files.
628 	 */
629 	else if (vp->v_type == VCHR) {
630 		rv = vm_allocate_with_pager(map, addr, size, fitit,
631 					    pager, foff, FALSE);
632 		/*
633 		 * Uncache the object and lose the reference gained
634 		 * by vm_pager_allocate().  If the call to
635 		 * vm_allocate_with_pager() was sucessful, then we
636 		 * gained an additional reference ensuring the object
637 		 * will continue to exist.  If the call failed then
638 		 * the deallocate call below will terminate the
639 		 * object which is fine.
640 		 */
641 		(void) pager_cache(object, FALSE);
642 		if (rv != KERN_SUCCESS)
643 			goto out;
644 	}
645 	/*
646 	 * A regular file
647 	 */
648 	else {
649 #ifdef DEBUG
650 		if (object == NULL)
651 			printf("vm_mmap: no object: vp %x, pager %x\n",
652 			       vp, pager);
653 #endif
654 		/*
655 		 * Map it directly.
656 		 * Allows modifications to go out to the vnode.
657 		 */
658 		if (flags & MAP_SHARED) {
659 			rv = vm_allocate_with_pager(map, addr, size,
660 						    fitit, pager,
661 						    foff, FALSE);
662 			if (rv != KERN_SUCCESS) {
663 				vm_object_deallocate(object);
664 				goto out;
665 			}
666 			/*
667 			 * Don't cache the object.  This is the easiest way
668 			 * of ensuring that data gets back to the filesystem
669 			 * because vnode_pager_deallocate() will fsync the
670 			 * vnode.  pager_cache() will lose the extra ref.
671 			 */
672 			if (prot & VM_PROT_WRITE)
673 				pager_cache(object, FALSE);
674 			else
675 				vm_object_deallocate(object);
676 		}
677 		/*
678 		 * Copy-on-write of file.  Two flavors.
679 		 * MAP_COPY is true COW, you essentially get a snapshot of
680 		 * the region at the time of mapping.  MAP_PRIVATE means only
681 		 * that your changes are not reflected back to the object.
682 		 * Changes made by others will be seen.
683 		 */
684 		else {
685 			vm_map_t tmap;
686 			vm_offset_t off;
687 
688 			/* locate and allocate the target address space */
689 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
690 					 addr, size, fitit);
691 			if (rv != KERN_SUCCESS) {
692 				vm_object_deallocate(object);
693 				goto out;
694 			}
695 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
696 					     VM_MIN_ADDRESS+size, TRUE);
697 			off = VM_MIN_ADDRESS;
698 			rv = vm_allocate_with_pager(tmap, &off, size,
699 						    TRUE, pager,
700 						    foff, FALSE);
701 			if (rv != KERN_SUCCESS) {
702 				vm_object_deallocate(object);
703 				vm_map_deallocate(tmap);
704 				goto out;
705 			}
706 			/*
707 			 * (XXX)
708 			 * MAP_PRIVATE implies that we see changes made by
709 			 * others.  To ensure that we need to guarentee that
710 			 * no copy object is created (otherwise original
711 			 * pages would be pushed to the copy object and we
712 			 * would never see changes made by others).  We
713 			 * totally sleeze it right now by marking the object
714 			 * internal temporarily.
715 			 */
716 			if ((flags & MAP_COPY) == 0)
717 				object->flags |= OBJ_INTERNAL;
718 			rv = vm_map_copy(map, tmap, *addr, size, off,
719 					 FALSE, FALSE);
720 			object->flags &= ~OBJ_INTERNAL;
721 			/*
722 			 * (XXX)
723 			 * My oh my, this only gets worse...
724 			 * Force creation of a shadow object so that
725 			 * vm_map_fork will do the right thing.
726 			 */
727 			if ((flags & MAP_COPY) == 0) {
728 				vm_map_t tmap;
729 				vm_map_entry_t tentry;
730 				vm_object_t tobject;
731 				vm_offset_t toffset;
732 				vm_prot_t tprot;
733 				boolean_t twired, tsu;
734 
735 				tmap = map;
736 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
737 					      &tentry, &tobject, &toffset,
738 					      &tprot, &twired, &tsu);
739 				vm_map_lookup_done(tmap, tentry);
740 			}
741 			/*
742 			 * (XXX)
743 			 * Map copy code cannot detect sharing unless a
744 			 * sharing map is involved.  So we cheat and write
745 			 * protect everything ourselves.
746 			 */
747 			vm_object_pmap_copy(object, foff, foff + size);
748 			vm_object_deallocate(object);
749 			vm_map_deallocate(tmap);
750 			if (rv != KERN_SUCCESS)
751 				goto out;
752 		}
753 #ifdef DEBUG
754 		if (mmapdebug & MDB_MAPIT)
755 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
756 			       curproc->p_pid, *addr, size, pager);
757 #endif
758 	}
759 	/*
760 	 * Correct protection (default is VM_PROT_ALL).
761 	 * If maxprot is different than prot, we must set both explicitly.
762 	 */
763 	rv = KERN_SUCCESS;
764 	if (maxprot != VM_PROT_ALL)
765 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
766 	if (rv == KERN_SUCCESS && prot != maxprot)
767 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
768 	if (rv != KERN_SUCCESS) {
769 		(void) vm_deallocate(map, *addr, size);
770 		goto out;
771 	}
772 	/*
773 	 * Shared memory is also shared with children.
774 	 */
775 	if (flags & MAP_SHARED) {
776 		rv = vm_inherit(map, *addr, size, VM_INHERIT_SHARE);
777 		if (rv != KERN_SUCCESS) {
778 			(void) vm_deallocate(map, *addr, size);
779 			goto out;
780 		}
781 	}
782 out:
783 #ifdef DEBUG
784 	if (mmapdebug & MDB_MAPIT)
785 		printf("vm_mmap: rv %d\n", rv);
786 #endif
787 	switch (rv) {
788 	case KERN_SUCCESS:
789 		return (0);
790 	case KERN_INVALID_ADDRESS:
791 	case KERN_NO_SPACE:
792 		return (ENOMEM);
793 	case KERN_PROTECTION_FAILURE:
794 		return (EACCES);
795 	default:
796 		return (EINVAL);
797 	}
798 }
799 
800 /*
801  * Internal bastardized version of MACHs vm_region system call.
802  * Given address and size it returns map attributes as well
803  * as the (locked) object mapped at that location.
804  */
805 int
806 vm_region(map, addr, size, prot, max_prot, inheritance, shared, object, objoff)
807 	vm_map_t	map;
808 	vm_offset_t	*addr;		/* IN/OUT */
809 	vm_size_t	*size;		/* OUT */
810 	vm_prot_t	*prot;		/* OUT */
811 	vm_prot_t	*max_prot;	/* OUT */
812 	vm_inherit_t	*inheritance;	/* OUT */
813 	boolean_t	*shared;	/* OUT */
814 	vm_object_t	*object;	/* OUT */
815 	vm_offset_t	*objoff;	/* OUT */
816 {
817 	vm_map_entry_t	tmp_entry;
818 	register
819 	vm_map_entry_t	entry;
820 	register
821 	vm_offset_t	tmp_offset;
822 	vm_offset_t	start;
823 
824 	if (map == NULL)
825 		return(KERN_INVALID_ARGUMENT);
826 
827 	start = *addr;
828 
829 	vm_map_lock_read(map);
830 	if (!vm_map_lookup_entry(map, start, &tmp_entry)) {
831 		if ((entry = tmp_entry->next) == &map->header) {
832 			vm_map_unlock_read(map);
833 		   	return(KERN_NO_SPACE);
834 		}
835 		start = entry->start;
836 		*addr = start;
837 	} else
838 		entry = tmp_entry;
839 
840 	*prot = entry->protection;
841 	*max_prot = entry->max_protection;
842 	*inheritance = entry->inheritance;
843 
844 	tmp_offset = entry->offset + (start - entry->start);
845 	*size = (entry->end - start);
846 
847 	if (entry->is_a_map) {
848 		register vm_map_t share_map;
849 		vm_size_t share_size;
850 
851 		share_map = entry->object.share_map;
852 
853 		vm_map_lock_read(share_map);
854 		(void) vm_map_lookup_entry(share_map, tmp_offset, &tmp_entry);
855 
856 		if ((share_size = (tmp_entry->end - tmp_offset)) < *size)
857 			*size = share_size;
858 
859 		vm_object_lock(tmp_entry->object);
860 		*object = tmp_entry->object.vm_object;
861 		*objoff = tmp_entry->offset + (tmp_offset - tmp_entry->start);
862 
863 		*shared = (share_map->ref_count != 1);
864 		vm_map_unlock_read(share_map);
865 	} else {
866 		vm_object_lock(entry->object);
867 		*object = entry->object.vm_object;
868 		*objoff = tmp_offset;
869 
870 		*shared = FALSE;
871 	}
872 
873 	vm_map_unlock_read(map);
874 
875 	return(KERN_SUCCESS);
876 }
877 
878 /*
879  * Yet another bastard routine.
880  */
881 int
882 vm_allocate_with_pager(map, addr, size, fitit, pager, poffset, internal)
883 	register vm_map_t	map;
884 	register vm_offset_t	*addr;
885 	register vm_size_t	size;
886 	boolean_t		fitit;
887 	vm_pager_t		pager;
888 	vm_offset_t		poffset;
889 	boolean_t		internal;
890 {
891 	register vm_object_t	object;
892 	register int		result;
893 
894 	if (map == NULL)
895 		return(KERN_INVALID_ARGUMENT);
896 
897 	*addr = trunc_page(*addr);
898 	size = round_page(size);
899 
900 	/*
901 	 *	Lookup the pager/paging-space in the object cache.
902 	 *	If it's not there, then create a new object and cache
903 	 *	it.
904 	 */
905 	object = vm_object_lookup(pager);
906 	cnt.v_lookups++;
907 	if (object == NULL) {
908 		object = vm_object_allocate(size);
909 		/*
910 		 * From Mike Hibler: "unnamed anonymous objects should never
911 		 * be on the hash list ... For now you can just change
912 		 * vm_allocate_with_pager to not do vm_object_enter if this
913 		 * is an internal object ..."
914 		 */
915 		if (!internal)
916 			vm_object_enter(object, pager);
917 	} else
918 		cnt.v_hits++;
919 	if (internal)
920 		object->flags |= OBJ_INTERNAL;
921 	else
922 		object->flags &= ~OBJ_INTERNAL;
923 
924 	result = vm_map_find(map, object, poffset, addr, size, fitit);
925 	if (result != KERN_SUCCESS)
926 		vm_object_deallocate(object);
927 	else if (pager != NULL)
928 		vm_object_setpager(object, pager, (vm_offset_t) 0, TRUE);
929 	return(result);
930 }
931 
932 /*
933  * XXX: this routine belongs in vm_map.c.
934  *
935  * Returns TRUE if the range [start - end) is allocated in either
936  * a single entry (single_entry == TRUE) or multiple contiguous
937  * entries (single_entry == FALSE).
938  *
939  * start and end should be page aligned.
940  */
941 boolean_t
942 vm_map_is_allocated(map, start, end, single_entry)
943 	vm_map_t map;
944 	vm_offset_t start, end;
945 	boolean_t single_entry;
946 {
947 	vm_map_entry_t mapent;
948 	register vm_offset_t nend;
949 
950 	vm_map_lock_read(map);
951 
952 	/*
953 	 * Start address not in any entry
954 	 */
955 	if (!vm_map_lookup_entry(map, start, &mapent)) {
956 		vm_map_unlock_read(map);
957 		return (FALSE);
958 	}
959 	/*
960 	 * Find the maximum stretch of contiguously allocated space
961 	 */
962 	nend = mapent->end;
963 	if (!single_entry) {
964 		mapent = mapent->next;
965 		while (mapent != &map->header && mapent->start == nend) {
966 			nend = mapent->end;
967 			mapent = mapent->next;
968 		}
969 	}
970 
971 	vm_map_unlock_read(map);
972 	return (end <= nend);
973 }
974