xref: /original-bsd/sys/vm/vm_mmap.c (revision fac0c393)
1 /*
2  * Copyright (c) 1988 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
13  *
14  *	@(#)vm_mmap.c	8.10 (Berkeley) 02/19/95
15  */
16 
17 /*
18  * Mapped file (mmap) interface to VM
19  */
20 
21 #include <sys/param.h>
22 #include <sys/systm.h>
23 #include <sys/filedesc.h>
24 #include <sys/resourcevar.h>
25 #include <sys/proc.h>
26 #include <sys/vnode.h>
27 #include <sys/file.h>
28 #include <sys/mman.h>
29 #include <sys/conf.h>
30 
31 #include <sys/mount.h>
32 #include <sys/syscallargs.h>
33 
34 #include <miscfs/specfs/specdev.h>
35 
36 #include <vm/vm.h>
37 #include <vm/vm_pager.h>
38 #include <vm/vm_prot.h>
39 
40 #ifdef DEBUG
41 int mmapdebug = 0;
42 #define MDB_FOLLOW	0x01
43 #define MDB_SYNC	0x02
44 #define MDB_MAPIT	0x04
45 #endif
46 
47 /* ARGSUSED */
48 int
49 sbrk(p, uap, retval)
50 	struct proc *p;
51 	struct sbrk_args /* {
52 		syscallarg(int) incr;
53 	} */ *uap;
54 	register_t *retval;
55 {
56 
57 	/* Not yet implemented */
58 	return (EOPNOTSUPP);
59 }
60 
61 /* ARGSUSED */
62 int
63 sstk(p, uap, retval)
64 	struct proc *p;
65 	struct sstk_args /* {
66 		syscallarg(int) incr;
67 	} */ *uap;
68 	register_t *retval;
69 {
70 
71 	/* Not yet implemented */
72 	return (EOPNOTSUPP);
73 }
74 
75 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
76 /* ARGSUSED */
77 int
78 compat_43_getpagesize(p, uap, retval)
79 	struct proc *p;
80 	void *uap;
81 	register_t *retval;
82 {
83 
84 	*retval = PAGE_SIZE;
85 	return (0);
86 }
87 #endif /* COMPAT_43 || COMPAT_SUNOS */
88 
89 #ifdef COMPAT_43
90 int
91 compat_43_mmap(p, uap, retval)
92 	struct proc *p;
93 	register struct compat_43_mmap_args /* {
94 		syscallarg(caddr_t) addr;
95 		syscallarg(int) len;
96 		syscallarg(int) prot;
97 		syscallarg(int) flags;
98 		syscallarg(int) fd;
99 		syscallarg(long) pos;
100 	} */ *uap;
101 	register_t *retval;
102 {
103 	struct mmap_args /* {
104 		syscallarg(caddr_t) addr;
105 		syscallarg(size_t) len;
106 		syscallarg(int) prot;
107 		syscallarg(int) flags;
108 		syscallarg(int) fd;
109 		syscallarg(long) pad;
110 		syscallarg(off_t) pos;
111 	} */ nargs;
112 	static const char cvtbsdprot[8] = {
113 		0,
114 		PROT_EXEC,
115 		PROT_WRITE,
116 		PROT_EXEC|PROT_WRITE,
117 		PROT_READ,
118 		PROT_EXEC|PROT_READ,
119 		PROT_WRITE|PROT_READ,
120 		PROT_EXEC|PROT_WRITE|PROT_READ,
121 	};
122 #define	OMAP_ANON	0x0002
123 #define	OMAP_COPY	0x0020
124 #define	OMAP_SHARED	0x0010
125 #define	OMAP_FIXED	0x0100
126 #define	OMAP_INHERIT	0x0800
127 
128 	SCARG(&nargs, addr) = SCARG(uap, addr);
129 	SCARG(&nargs, len) = SCARG(uap, len);
130 	SCARG(&nargs, prot) = cvtbsdprot[SCARG(uap, prot)&0x7];
131 	SCARG(&nargs, flags) = 0;
132 	if (SCARG(uap, flags) & OMAP_ANON)
133 		SCARG(&nargs, flags) |= MAP_ANON;
134 	if (SCARG(uap, flags) & OMAP_COPY)
135 		SCARG(&nargs, flags) |= MAP_COPY;
136 	if (SCARG(uap, flags) & OMAP_SHARED)
137 		SCARG(&nargs, flags) |= MAP_SHARED;
138 	else
139 		SCARG(&nargs, flags) |= MAP_PRIVATE;
140 	if (SCARG(uap, flags) & OMAP_FIXED)
141 		SCARG(&nargs, flags) |= MAP_FIXED;
142 	if (SCARG(uap, flags) & OMAP_INHERIT)
143 		SCARG(&nargs, flags) |= MAP_INHERIT;
144 	SCARG(&nargs, fd) = SCARG(uap, fd);
145 	SCARG(&nargs, pos) = SCARG(uap, pos);
146 	return (mmap(p, &nargs, retval));
147 }
148 #endif
149 
150 int
151 mmap(p, uap, retval)
152 	struct proc *p;
153 	register struct mmap_args /* {
154 		syscallarg(caddr_t) addr;
155 		syscallarg(size_t) len;
156 		syscallarg(int) prot;
157 		syscallarg(int) flags;
158 		syscallarg(int) fd;
159 		syscallarg(long) pad;
160 		syscallarg(off_t) pos;
161 	} */ *uap;
162 	register_t *retval;
163 {
164 	register struct filedesc *fdp = p->p_fd;
165 	register struct file *fp;
166 	struct vnode *vp;
167 	vm_offset_t addr, pos;
168 	vm_size_t size;
169 	vm_prot_t prot, maxprot;
170 	caddr_t handle;
171 	int flags, error;
172 
173 	prot = SCARG(uap, prot) & VM_PROT_ALL;
174 	flags = SCARG(uap, flags);
175 	pos = SCARG(uap, pos);
176 #ifdef DEBUG
177 	if (mmapdebug & MDB_FOLLOW)
178 		printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
179 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len), prot,
180 		       flags, SCARG(uap, fd), pos);
181 #endif
182 	/*
183 	 * Address (if FIXED) must be page aligned.
184 	 * Size is implicitly rounded to a page boundary.
185 	 *
186 	 * XXX most (all?) vendors require that the file offset be
187 	 * page aligned as well.  However, we already have applications
188 	 * (e.g. nlist) that rely on unrestricted alignment.  Since we
189 	 * support it, let it happen.
190 	 */
191 	addr = (vm_offset_t) SCARG(uap, addr);
192 	if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
193 #if 0
194 	    ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
195 #endif
196 	    (ssize_t)SCARG(uap, len) < 0 || ((flags & MAP_ANON) && SCARG(uap, fd) != -1))
197 		return (EINVAL);
198 	size = (vm_size_t) round_page(SCARG(uap, len));
199 	/*
200 	 * Check for illegal addresses.  Watch out for address wrap...
201 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
202 	 */
203 	if (flags & MAP_FIXED) {
204 		if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
205 			return (EINVAL);
206 		if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
207 			return (EINVAL);
208 		if (addr > addr + size)
209 			return (EINVAL);
210 	}
211 	/*
212 	 * XXX for non-fixed mappings where no hint is provided or
213 	 * the hint would fall in the potential heap space,
214 	 * place it after the end of the largest possible heap.
215 	 *
216 	 * There should really be a pmap call to determine a reasonable
217 	 * location.
218 	 */
219 	else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
220 		addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
221 	if (flags & MAP_ANON) {
222 		/*
223 		 * Mapping blank space is trivial.
224 		 */
225 		handle = NULL;
226 		maxprot = VM_PROT_ALL;
227 		pos = 0;
228 	} else {
229 		/*
230 		 * Mapping file, get fp for validation.
231 		 * Obtain vnode and make sure it is of appropriate type.
232 		 */
233 		if (((unsigned)SCARG(uap, fd)) >= fdp->fd_nfiles ||
234 		    (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
235 			return (EBADF);
236 		if (fp->f_type != DTYPE_VNODE)
237 			return (EINVAL);
238 		vp = (struct vnode *)fp->f_data;
239 		if (vp->v_type != VREG && vp->v_type != VCHR)
240 			return (EINVAL);
241 		/*
242 		 * XXX hack to handle use of /dev/zero to map anon
243 		 * memory (ala SunOS).
244 		 */
245 		if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
246 			handle = NULL;
247 			maxprot = VM_PROT_ALL;
248 			flags |= MAP_ANON;
249 		} else {
250 			/*
251 			 * Ensure that file and memory protections are
252 			 * compatible.  Note that we only worry about
253 			 * writability if mapping is shared; in this case,
254 			 * current and max prot are dictated by the open file.
255 			 * XXX use the vnode instead?  Problem is: what
256 			 * credentials do we use for determination?
257 			 * What if proc does a setuid?
258 			 */
259 			maxprot = VM_PROT_EXECUTE;	/* ??? */
260 			if (fp->f_flag & FREAD)
261 				maxprot |= VM_PROT_READ;
262 			else if (prot & PROT_READ)
263 				return (EACCES);
264 			if (flags & MAP_SHARED) {
265 				if (fp->f_flag & FWRITE)
266 					maxprot |= VM_PROT_WRITE;
267 				else if (prot & PROT_WRITE)
268 					return (EACCES);
269 			} else
270 				maxprot |= VM_PROT_WRITE;
271 			handle = (caddr_t)vp;
272 		}
273 	}
274 	error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
275 	    flags, handle, pos);
276 	if (error == 0)
277 		*retval = (register_t)addr;
278 	return (error);
279 }
280 
281 int
282 msync(p, uap, retval)
283 	struct proc *p;
284 	struct msync_args /* {
285 		syscallarg(caddr_t) addr;
286 		syscallarg(int) len;
287 	} */ *uap;
288 	register_t *retval;
289 {
290 	vm_offset_t addr;
291 	vm_size_t size;
292 	vm_map_t map;
293 	int rv;
294 	boolean_t syncio, invalidate;
295 
296 #ifdef DEBUG
297 	if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
298 		printf("msync(%d): addr %x len %x\n",
299 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
300 #endif
301 	if (((vm_offset_t)SCARG(uap, addr) & PAGE_MASK) ||
302 	    SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
303 		return (EINVAL);
304 	map = &p->p_vmspace->vm_map;
305 	addr = (vm_offset_t)SCARG(uap, addr);
306 	size = (vm_size_t)SCARG(uap, len);
307 	/*
308 	 * XXX Gak!  If size is zero we are supposed to sync "all modified
309 	 * pages with the region containing addr".  Unfortunately, we
310 	 * don't really keep track of individual mmaps so we approximate
311 	 * by flushing the range of the map entry containing addr.
312 	 * This can be incorrect if the region splits or is coalesced
313 	 * with a neighbor.
314 	 */
315 	if (size == 0) {
316 		vm_map_entry_t entry;
317 
318 		vm_map_lock_read(map);
319 		rv = vm_map_lookup_entry(map, addr, &entry);
320 		vm_map_unlock_read(map);
321 		if (!rv)
322 			return (EINVAL);
323 		addr = entry->start;
324 		size = entry->end - entry->start;
325 	}
326 #ifdef DEBUG
327 	if (mmapdebug & MDB_SYNC)
328 		printf("msync: cleaning/flushing address range [%x-%x)\n",
329 		       addr, addr+size);
330 #endif
331 	/*
332 	 * Could pass this in as a third flag argument to implement
333 	 * Sun's MS_ASYNC.
334 	 */
335 	syncio = TRUE;
336 	/*
337 	 * XXX bummer, gotta flush all cached pages to ensure
338 	 * consistency with the file system cache.  Otherwise, we could
339 	 * pass this in to implement Sun's MS_INVALIDATE.
340 	 */
341 	invalidate = TRUE;
342 	/*
343 	 * Clean the pages and interpret the return value.
344 	 */
345 	rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
346 	switch (rv) {
347 	case KERN_SUCCESS:
348 		break;
349 	case KERN_INVALID_ADDRESS:
350 		return (EINVAL);	/* Sun returns ENOMEM? */
351 	case KERN_FAILURE:
352 		return (EIO);
353 	default:
354 		return (EINVAL);
355 	}
356 	return (0);
357 }
358 
359 int
360 munmap(p, uap, retval)
361 	register struct proc *p;
362 	register struct munmap_args /* {
363 		syscallarg(caddr_t) addr;
364 		syscallarg(int) len;
365 	} */ *uap;
366 	register_t *retval;
367 {
368 	vm_offset_t addr;
369 	vm_size_t size;
370 	vm_map_t map;
371 
372 #ifdef DEBUG
373 	if (mmapdebug & MDB_FOLLOW)
374 		printf("munmap(%d): addr %x len %x\n",
375 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
376 #endif
377 
378 	addr = (vm_offset_t) SCARG(uap, addr);
379 	if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
380 		return(EINVAL);
381 	size = (vm_size_t) round_page(SCARG(uap, len));
382 	if (size == 0)
383 		return(0);
384 	/*
385 	 * Check for illegal addresses.  Watch out for address wrap...
386 	 * Note that VM_*_ADDRESS are not constants due to casts (argh).
387 	 */
388 	if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
389 		return (EINVAL);
390 	if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
391 		return (EINVAL);
392 	if (addr > addr + size)
393 		return (EINVAL);
394 	map = &p->p_vmspace->vm_map;
395 	/*
396 	 * Make sure entire range is allocated.
397 	 * XXX this seemed overly restrictive, so we relaxed it.
398 	 */
399 #if 0
400 	if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
401 		return(EINVAL);
402 #endif
403 	/* returns nothing but KERN_SUCCESS anyway */
404 	(void) vm_map_remove(map, addr, addr+size);
405 	return(0);
406 }
407 
408 void
409 munmapfd(p, fd)
410 	struct proc *p;
411 	int fd;
412 {
413 #ifdef DEBUG
414 	if (mmapdebug & MDB_FOLLOW)
415 		printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
416 #endif
417 
418 	/*
419 	 * XXX should vm_deallocate any regions mapped to this file
420 	 */
421 	p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
422 }
423 
424 int
425 mprotect(p, uap, retval)
426 	struct proc *p;
427 	struct mprotect_args /* {
428 		syscallarg(caddr_t) addr;
429 		syscallarg(int) len;
430 		syscallarg(int) prot;
431 	} */ *uap;
432 	register_t *retval;
433 {
434 	vm_offset_t addr;
435 	vm_size_t size;
436 	register vm_prot_t prot;
437 
438 #ifdef DEBUG
439 	if (mmapdebug & MDB_FOLLOW)
440 		printf("mprotect(%d): addr %x len %x prot %d\n",
441 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len), SCARG(uap, prot));
442 #endif
443 
444 	addr = (vm_offset_t)SCARG(uap, addr);
445 	if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
446 		return(EINVAL);
447 	size = (vm_size_t)SCARG(uap, len);
448 	prot = SCARG(uap, prot) & VM_PROT_ALL;
449 
450 	switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
451 	    FALSE)) {
452 	case KERN_SUCCESS:
453 		return (0);
454 	case KERN_PROTECTION_FAILURE:
455 		return (EACCES);
456 	}
457 	return (EINVAL);
458 }
459 
460 /* ARGSUSED */
461 int
462 madvise(p, uap, retval)
463 	struct proc *p;
464 	struct madvise_args /* {
465 		syscallarg(caddr_t) addr;
466 		syscallarg(int) len;
467 		syscallarg(int) behav;
468 	} */ *uap;
469 	register_t *retval;
470 {
471 
472 	/* Not yet implemented */
473 	return (EOPNOTSUPP);
474 }
475 
476 /* ARGSUSED */
477 int
478 mincore(p, uap, retval)
479 	struct proc *p;
480 	struct mincore_args /* {
481 		syscallarg(caddr_t) addr;
482 		syscallarg(int) len;
483 		syscallarg(char *) vec;
484 	} */ *uap;
485 	register_t *retval;
486 {
487 
488 	/* Not yet implemented */
489 	return (EOPNOTSUPP);
490 }
491 
492 int
493 mlock(p, uap, retval)
494 	struct proc *p;
495 	struct mlock_args /* {
496 		syscallarg(caddr_t) addr;
497 		syscallarg(size_t) len;
498 	} */ *uap;
499 	register_t *retval;
500 {
501 	vm_offset_t addr;
502 	vm_size_t size;
503 	int error;
504 	extern int vm_page_max_wired;
505 
506 #ifdef DEBUG
507 	if (mmapdebug & MDB_FOLLOW)
508 		printf("mlock(%d): addr %x len %x\n",
509 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
510 #endif
511 	addr = (vm_offset_t)SCARG(uap, addr);
512 	if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
513 		return (EINVAL);
514 	size = round_page((vm_size_t)SCARG(uap, len));
515 	if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
516 		return (EAGAIN);
517 #ifdef pmap_wired_count
518 	if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
519 	    p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
520 		return (EAGAIN);
521 #else
522 	if (error = suser(p->p_ucred, &p->p_acflag))
523 		return (error);
524 #endif
525 
526 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
527 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
528 }
529 
530 int
531 munlock(p, uap, retval)
532 	struct proc *p;
533 	struct munlock_args /* {
534 		syscallarg(caddr_t) addr;
535 		syscallarg(size_t) len;
536 	} */ *uap;
537 	register_t *retval;
538 {
539 	vm_offset_t addr;
540 	vm_size_t size;
541 	int error;
542 
543 #ifdef DEBUG
544 	if (mmapdebug & MDB_FOLLOW)
545 		printf("munlock(%d): addr %x len %x\n",
546 		       p->p_pid, SCARG(uap, addr), SCARG(uap, len));
547 #endif
548 	addr = (vm_offset_t)SCARG(uap, addr);
549 	if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
550 		return (EINVAL);
551 #ifndef pmap_wired_count
552 	if (error = suser(p->p_ucred, &p->p_acflag))
553 		return (error);
554 #endif
555 	size = round_page((vm_size_t)SCARG(uap, len));
556 
557 	error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
558 	return (error == KERN_SUCCESS ? 0 : ENOMEM);
559 }
560 
561 /*
562  * Internal version of mmap.
563  * Currently used by mmap, exec, and sys5 shared memory.
564  * Handle is either a vnode pointer or NULL for MAP_ANON.
565  */
566 int
567 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
568 	register vm_map_t map;
569 	register vm_offset_t *addr;
570 	register vm_size_t size;
571 	vm_prot_t prot, maxprot;
572 	register int flags;
573 	caddr_t handle;		/* XXX should be vp */
574 	vm_offset_t foff;
575 {
576 	register vm_pager_t pager;
577 	boolean_t fitit;
578 	vm_object_t object;
579 	struct vnode *vp = NULL;
580 	int type;
581 	int rv = KERN_SUCCESS;
582 
583 	if (size == 0)
584 		return (0);
585 
586 	if ((flags & MAP_FIXED) == 0) {
587 		fitit = TRUE;
588 		*addr = round_page(*addr);
589 	} else {
590 		fitit = FALSE;
591 		(void)vm_deallocate(map, *addr, size);
592 	}
593 
594 	/*
595 	 * Lookup/allocate pager.  All except an unnamed anonymous lookup
596 	 * gain a reference to ensure continued existance of the object.
597 	 * (XXX the exception is to appease the pageout daemon)
598 	 */
599 	if (flags & MAP_ANON)
600 		type = PG_DFLT;
601 	else {
602 		vp = (struct vnode *)handle;
603 		if (vp->v_type == VCHR) {
604 			type = PG_DEVICE;
605 			handle = (caddr_t)vp->v_rdev;
606 		} else
607 			type = PG_VNODE;
608 	}
609 	pager = vm_pager_allocate(type, handle, size, prot, foff);
610 	if (pager == NULL)
611 		return (type == PG_DEVICE ? EINVAL : ENOMEM);
612 	/*
613 	 * Find object and release extra reference gained by lookup
614 	 */
615 	object = vm_object_lookup(pager);
616 	vm_object_deallocate(object);
617 
618 	/*
619 	 * Anonymous memory.
620 	 */
621 	if (flags & MAP_ANON) {
622 		rv = vm_allocate_with_pager(map, addr, size, fitit,
623 					    pager, foff, TRUE);
624 		if (rv != KERN_SUCCESS) {
625 			if (handle == NULL)
626 				vm_pager_deallocate(pager);
627 			else
628 				vm_object_deallocate(object);
629 			goto out;
630 		}
631 		/*
632 		 * Don't cache anonymous objects.
633 		 * Loses the reference gained by vm_pager_allocate.
634 		 * Note that object will be NULL when handle == NULL,
635 		 * this is ok since vm_allocate_with_pager has made
636 		 * sure that these objects are uncached.
637 		 */
638 		(void) pager_cache(object, FALSE);
639 #ifdef DEBUG
640 		if (mmapdebug & MDB_MAPIT)
641 			printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
642 			       curproc->p_pid, *addr, size, pager);
643 #endif
644 	}
645 	/*
646 	 * Must be a mapped file.
647 	 * Distinguish between character special and regular files.
648 	 */
649 	else if (vp->v_type == VCHR) {
650 		rv = vm_allocate_with_pager(map, addr, size, fitit,
651 					    pager, foff, FALSE);
652 		/*
653 		 * Uncache the object and lose the reference gained
654 		 * by vm_pager_allocate().  If the call to
655 		 * vm_allocate_with_pager() was sucessful, then we
656 		 * gained an additional reference ensuring the object
657 		 * will continue to exist.  If the call failed then
658 		 * the deallocate call below will terminate the
659 		 * object which is fine.
660 		 */
661 		(void) pager_cache(object, FALSE);
662 		if (rv != KERN_SUCCESS)
663 			goto out;
664 	}
665 	/*
666 	 * A regular file
667 	 */
668 	else {
669 #ifdef DEBUG
670 		if (object == NULL)
671 			printf("vm_mmap: no object: vp %x, pager %x\n",
672 			       vp, pager);
673 #endif
674 		/*
675 		 * Map it directly.
676 		 * Allows modifications to go out to the vnode.
677 		 */
678 		if (flags & MAP_SHARED) {
679 			rv = vm_allocate_with_pager(map, addr, size,
680 						    fitit, pager,
681 						    foff, FALSE);
682 			if (rv != KERN_SUCCESS) {
683 				vm_object_deallocate(object);
684 				goto out;
685 			}
686 			/*
687 			 * Don't cache the object.  This is the easiest way
688 			 * of ensuring that data gets back to the filesystem
689 			 * because vnode_pager_deallocate() will fsync the
690 			 * vnode.  pager_cache() will lose the extra ref.
691 			 */
692 			if (prot & VM_PROT_WRITE)
693 				pager_cache(object, FALSE);
694 			else
695 				vm_object_deallocate(object);
696 		}
697 		/*
698 		 * Copy-on-write of file.  Two flavors.
699 		 * MAP_COPY is true COW, you essentially get a snapshot of
700 		 * the region at the time of mapping.  MAP_PRIVATE means only
701 		 * that your changes are not reflected back to the object.
702 		 * Changes made by others will be seen.
703 		 */
704 		else {
705 			vm_map_t tmap;
706 			vm_offset_t off;
707 
708 			/* locate and allocate the target address space */
709 			rv = vm_map_find(map, NULL, (vm_offset_t)0,
710 					 addr, size, fitit);
711 			if (rv != KERN_SUCCESS) {
712 				vm_object_deallocate(object);
713 				goto out;
714 			}
715 			tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
716 					     VM_MIN_ADDRESS+size, TRUE);
717 			off = VM_MIN_ADDRESS;
718 			rv = vm_allocate_with_pager(tmap, &off, size,
719 						    TRUE, pager,
720 						    foff, FALSE);
721 			if (rv != KERN_SUCCESS) {
722 				vm_object_deallocate(object);
723 				vm_map_deallocate(tmap);
724 				goto out;
725 			}
726 			/*
727 			 * (XXX)
728 			 * MAP_PRIVATE implies that we see changes made by
729 			 * others.  To ensure that we need to guarentee that
730 			 * no copy object is created (otherwise original
731 			 * pages would be pushed to the copy object and we
732 			 * would never see changes made by others).  We
733 			 * totally sleeze it right now by marking the object
734 			 * internal temporarily.
735 			 */
736 			if ((flags & MAP_COPY) == 0)
737 				object->flags |= OBJ_INTERNAL;
738 			rv = vm_map_copy(map, tmap, *addr, size, off,
739 					 FALSE, FALSE);
740 			object->flags &= ~OBJ_INTERNAL;
741 			/*
742 			 * (XXX)
743 			 * My oh my, this only gets worse...
744 			 * Force creation of a shadow object so that
745 			 * vm_map_fork will do the right thing.
746 			 */
747 			if ((flags & MAP_COPY) == 0) {
748 				vm_map_t tmap;
749 				vm_map_entry_t tentry;
750 				vm_object_t tobject;
751 				vm_offset_t toffset;
752 				vm_prot_t tprot;
753 				boolean_t twired, tsu;
754 
755 				tmap = map;
756 				vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
757 					      &tentry, &tobject, &toffset,
758 					      &tprot, &twired, &tsu);
759 				vm_map_lookup_done(tmap, tentry);
760 			}
761 			/*
762 			 * (XXX)
763 			 * Map copy code cannot detect sharing unless a
764 			 * sharing map is involved.  So we cheat and write
765 			 * protect everything ourselves.
766 			 */
767 			vm_object_pmap_copy(object, foff, foff + size);
768 			vm_object_deallocate(object);
769 			vm_map_deallocate(tmap);
770 			if (rv != KERN_SUCCESS)
771 				goto out;
772 		}
773 #ifdef DEBUG
774 		if (mmapdebug & MDB_MAPIT)
775 			printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
776 			       curproc->p_pid, *addr, size, pager);
777 #endif
778 	}
779 	/*
780 	 * Correct protection (default is VM_PROT_ALL).
781 	 * If maxprot is different than prot, we must set both explicitly.
782 	 */
783 	rv = KERN_SUCCESS;
784 	if (maxprot != VM_PROT_ALL)
785 		rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
786 	if (rv == KERN_SUCCESS && prot != maxprot)
787 		rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
788 	if (rv != KERN_SUCCESS) {
789 		(void) vm_deallocate(map, *addr, size);
790 		goto out;
791 	}
792 	/*
793 	 * Shared memory is also shared with children.
794 	 */
795 	if (flags & MAP_SHARED) {
796 		rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
797 		if (rv != KERN_SUCCESS) {
798 			(void) vm_deallocate(map, *addr, size);
799 			goto out;
800 		}
801 	}
802 out:
803 #ifdef DEBUG
804 	if (mmapdebug & MDB_MAPIT)
805 		printf("vm_mmap: rv %d\n", rv);
806 #endif
807 	switch (rv) {
808 	case KERN_SUCCESS:
809 		return (0);
810 	case KERN_INVALID_ADDRESS:
811 	case KERN_NO_SPACE:
812 		return (ENOMEM);
813 	case KERN_PROTECTION_FAILURE:
814 		return (EACCES);
815 	default:
816 		return (EINVAL);
817 	}
818 }
819