1808a4de1Smckusick /*
2808a4de1Smckusick * Copyright (c) 1988 University of Utah.
313c4e08cSbostic * Copyright (c) 1991, 1993
413c4e08cSbostic * The Regents of the University of California. All rights reserved.
5808a4de1Smckusick *
6808a4de1Smckusick * This code is derived from software contributed to Berkeley by
7808a4de1Smckusick * the Systems Programming Group of the University of Utah Computer
8808a4de1Smckusick * Science Department.
9808a4de1Smckusick *
10808a4de1Smckusick * %sccs.include.redist.c%
11808a4de1Smckusick *
12ff2bfc3fShibler * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
13808a4de1Smckusick *
14*7ee749f6Scgd * @(#)vm_mmap.c 8.10 (Berkeley) 02/19/95
15808a4de1Smckusick */
16808a4de1Smckusick
17808a4de1Smckusick /*
18808a4de1Smckusick * Mapped file (mmap) interface to VM
19808a4de1Smckusick */
20808a4de1Smckusick
2164f61df8Sbostic #include <sys/param.h>
2264f61df8Sbostic #include <sys/systm.h>
2364f61df8Sbostic #include <sys/filedesc.h>
2480376accShibler #include <sys/resourcevar.h>
2564f61df8Sbostic #include <sys/proc.h>
2664f61df8Sbostic #include <sys/vnode.h>
2764f61df8Sbostic #include <sys/file.h>
2864f61df8Sbostic #include <sys/mman.h>
2964f61df8Sbostic #include <sys/conf.h>
30808a4de1Smckusick
31*7ee749f6Scgd #include <sys/mount.h>
32*7ee749f6Scgd #include <sys/syscallargs.h>
33*7ee749f6Scgd
344938844dSpendry #include <miscfs/specfs/specdev.h>
354938844dSpendry
3664f61df8Sbostic #include <vm/vm.h>
3764f61df8Sbostic #include <vm/vm_pager.h>
3864f61df8Sbostic #include <vm/vm_prot.h>
39808a4de1Smckusick
40808a4de1Smckusick #ifdef DEBUG
41808a4de1Smckusick int mmapdebug = 0;
42808a4de1Smckusick #define MDB_FOLLOW 0x01
43808a4de1Smckusick #define MDB_SYNC 0x02
44808a4de1Smckusick #define MDB_MAPIT 0x04
45808a4de1Smckusick #endif
46808a4de1Smckusick
47808a4de1Smckusick /* ARGSUSED */
4864f61df8Sbostic int
sbrk(p,uap,retval)49808a4de1Smckusick sbrk(p, uap, retval)
50808a4de1Smckusick struct proc *p;
51*7ee749f6Scgd struct sbrk_args /* {
52*7ee749f6Scgd syscallarg(int) incr;
53*7ee749f6Scgd } */ *uap;
54*7ee749f6Scgd register_t *retval;
55808a4de1Smckusick {
56808a4de1Smckusick
57808a4de1Smckusick /* Not yet implemented */
58808a4de1Smckusick return (EOPNOTSUPP);
59808a4de1Smckusick }
60808a4de1Smckusick
61808a4de1Smckusick /* ARGSUSED */
6264f61df8Sbostic int
sstk(p,uap,retval)63808a4de1Smckusick sstk(p, uap, retval)
64808a4de1Smckusick struct proc *p;
65*7ee749f6Scgd struct sstk_args /* {
66*7ee749f6Scgd syscallarg(int) incr;
67*7ee749f6Scgd } */ *uap;
68*7ee749f6Scgd register_t *retval;
69808a4de1Smckusick {
70808a4de1Smckusick
71808a4de1Smckusick /* Not yet implemented */
72808a4de1Smckusick return (EOPNOTSUPP);
73808a4de1Smckusick }
74808a4de1Smckusick
756ad25c8eStorek #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
764fc8e28eSmckusick /* ARGSUSED */
774fc8e28eSmckusick int
compat_43_getpagesize(p,uap,retval)78*7ee749f6Scgd compat_43_getpagesize(p, uap, retval)
794fc8e28eSmckusick struct proc *p;
80*7ee749f6Scgd void *uap;
81*7ee749f6Scgd register_t *retval;
824fc8e28eSmckusick {
834fc8e28eSmckusick
844fc8e28eSmckusick *retval = PAGE_SIZE;
854fc8e28eSmckusick return (0);
864fc8e28eSmckusick }
876ad25c8eStorek #endif /* COMPAT_43 || COMPAT_SUNOS */
884fc8e28eSmckusick
896ad25c8eStorek #ifdef COMPAT_43
90499c7c41Storek int
compat_43_mmap(p,uap,retval)91*7ee749f6Scgd compat_43_mmap(p, uap, retval)
92499c7c41Storek struct proc *p;
93*7ee749f6Scgd register struct compat_43_mmap_args /* {
94*7ee749f6Scgd syscallarg(caddr_t) addr;
95*7ee749f6Scgd syscallarg(int) len;
96*7ee749f6Scgd syscallarg(int) prot;
97*7ee749f6Scgd syscallarg(int) flags;
98*7ee749f6Scgd syscallarg(int) fd;
99*7ee749f6Scgd syscallarg(long) pos;
100*7ee749f6Scgd } */ *uap;
101*7ee749f6Scgd register_t *retval;
102808a4de1Smckusick {
103*7ee749f6Scgd struct mmap_args /* {
104*7ee749f6Scgd syscallarg(caddr_t) addr;
105*7ee749f6Scgd syscallarg(size_t) len;
106*7ee749f6Scgd syscallarg(int) prot;
107*7ee749f6Scgd syscallarg(int) flags;
108*7ee749f6Scgd syscallarg(int) fd;
109*7ee749f6Scgd syscallarg(long) pad;
110*7ee749f6Scgd syscallarg(off_t) pos;
111*7ee749f6Scgd } */ nargs;
1129cf4a8a8Smckusick static const char cvtbsdprot[8] = {
1139cf4a8a8Smckusick 0,
1149cf4a8a8Smckusick PROT_EXEC,
1159cf4a8a8Smckusick PROT_WRITE,
1169cf4a8a8Smckusick PROT_EXEC|PROT_WRITE,
1179cf4a8a8Smckusick PROT_READ,
1189cf4a8a8Smckusick PROT_EXEC|PROT_READ,
1199cf4a8a8Smckusick PROT_WRITE|PROT_READ,
1209cf4a8a8Smckusick PROT_EXEC|PROT_WRITE|PROT_READ,
1219cf4a8a8Smckusick };
1229cf4a8a8Smckusick #define OMAP_ANON 0x0002
1239cf4a8a8Smckusick #define OMAP_COPY 0x0020
1249cf4a8a8Smckusick #define OMAP_SHARED 0x0010
1259cf4a8a8Smckusick #define OMAP_FIXED 0x0100
1269cf4a8a8Smckusick #define OMAP_INHERIT 0x0800
1279cf4a8a8Smckusick
128*7ee749f6Scgd SCARG(&nargs, addr) = SCARG(uap, addr);
129*7ee749f6Scgd SCARG(&nargs, len) = SCARG(uap, len);
130*7ee749f6Scgd SCARG(&nargs, prot) = cvtbsdprot[SCARG(uap, prot)&0x7];
131*7ee749f6Scgd SCARG(&nargs, flags) = 0;
132*7ee749f6Scgd if (SCARG(uap, flags) & OMAP_ANON)
133*7ee749f6Scgd SCARG(&nargs, flags) |= MAP_ANON;
134*7ee749f6Scgd if (SCARG(uap, flags) & OMAP_COPY)
135*7ee749f6Scgd SCARG(&nargs, flags) |= MAP_COPY;
136*7ee749f6Scgd if (SCARG(uap, flags) & OMAP_SHARED)
137*7ee749f6Scgd SCARG(&nargs, flags) |= MAP_SHARED;
1389cf4a8a8Smckusick else
139*7ee749f6Scgd SCARG(&nargs, flags) |= MAP_PRIVATE;
140*7ee749f6Scgd if (SCARG(uap, flags) & OMAP_FIXED)
141*7ee749f6Scgd SCARG(&nargs, flags) |= MAP_FIXED;
142*7ee749f6Scgd if (SCARG(uap, flags) & OMAP_INHERIT)
143*7ee749f6Scgd SCARG(&nargs, flags) |= MAP_INHERIT;
144*7ee749f6Scgd SCARG(&nargs, fd) = SCARG(uap, fd);
145*7ee749f6Scgd SCARG(&nargs, pos) = SCARG(uap, pos);
1463d556935Sbostic return (mmap(p, &nargs, retval));
1479cf4a8a8Smckusick }
1489cf4a8a8Smckusick #endif
1499cf4a8a8Smckusick
1509cf4a8a8Smckusick int
mmap(p,uap,retval)1513d556935Sbostic mmap(p, uap, retval)
1529cf4a8a8Smckusick struct proc *p;
153*7ee749f6Scgd register struct mmap_args /* {
154*7ee749f6Scgd syscallarg(caddr_t) addr;
155*7ee749f6Scgd syscallarg(size_t) len;
156*7ee749f6Scgd syscallarg(int) prot;
157*7ee749f6Scgd syscallarg(int) flags;
158*7ee749f6Scgd syscallarg(int) fd;
159*7ee749f6Scgd syscallarg(long) pad;
160*7ee749f6Scgd syscallarg(off_t) pos;
161*7ee749f6Scgd } */ *uap;
162*7ee749f6Scgd register_t *retval;
1639cf4a8a8Smckusick {
164b5a4ea96Smckusick register struct filedesc *fdp = p->p_fd;
165b5a4ea96Smckusick register struct file *fp;
166808a4de1Smckusick struct vnode *vp;
167ef7b4811Shibler vm_offset_t addr, pos;
168808a4de1Smckusick vm_size_t size;
169525ac35aShibler vm_prot_t prot, maxprot;
170808a4de1Smckusick caddr_t handle;
1718c153b3cSmckusick int flags, error;
172808a4de1Smckusick
173*7ee749f6Scgd prot = SCARG(uap, prot) & VM_PROT_ALL;
174*7ee749f6Scgd flags = SCARG(uap, flags);
175*7ee749f6Scgd pos = SCARG(uap, pos);
176808a4de1Smckusick #ifdef DEBUG
177808a4de1Smckusick if (mmapdebug & MDB_FOLLOW)
178808a4de1Smckusick printf("mmap(%d): addr %x len %x pro %x flg %x fd %d pos %x\n",
179*7ee749f6Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len), prot,
180*7ee749f6Scgd flags, SCARG(uap, fd), pos);
181808a4de1Smckusick #endif
182808a4de1Smckusick /*
1830c4a53bbShibler * Address (if FIXED) must be page aligned.
1840c4a53bbShibler * Size is implicitly rounded to a page boundary.
185888046efShibler *
186888046efShibler * XXX most (all?) vendors require that the file offset be
187888046efShibler * page aligned as well. However, we already have applications
188888046efShibler * (e.g. nlist) that rely on unrestricted alignment. Since we
189888046efShibler * support it, let it happen.
190808a4de1Smckusick */
191*7ee749f6Scgd addr = (vm_offset_t) SCARG(uap, addr);
1929f95614cSmckusick if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
193888046efShibler #if 0
194ca65b693Shibler ((flags & MAP_ANON) == 0 && (pos & PAGE_MASK)) ||
195888046efShibler #endif
196*7ee749f6Scgd (ssize_t)SCARG(uap, len) < 0 || ((flags & MAP_ANON) && SCARG(uap, fd) != -1))
197808a4de1Smckusick return (EINVAL);
198*7ee749f6Scgd size = (vm_size_t) round_page(SCARG(uap, len));
1993e8d6014Smckusick /*
2003e8d6014Smckusick * Check for illegal addresses. Watch out for address wrap...
2013e8d6014Smckusick * Note that VM_*_ADDRESS are not constants due to casts (argh).
2023e8d6014Smckusick */
2038c153b3cSmckusick if (flags & MAP_FIXED) {
2043e8d6014Smckusick if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
20549e1a72aSmckusick return (EINVAL);
2063e8d6014Smckusick if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
2073e8d6014Smckusick return (EINVAL);
2083e8d6014Smckusick if (addr > addr + size)
2093e8d6014Smckusick return (EINVAL);
2103e8d6014Smckusick }
2110c4a53bbShibler /*
212ef7b4811Shibler * XXX for non-fixed mappings where no hint is provided or
213ef7b4811Shibler * the hint would fall in the potential heap space,
214ef7b4811Shibler * place it after the end of the largest possible heap.
2150c4a53bbShibler *
2160c4a53bbShibler * There should really be a pmap call to determine a reasonable
2170c4a53bbShibler * location.
2180c4a53bbShibler */
219ef7b4811Shibler else if (addr < round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
2200c4a53bbShibler addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
221525ac35aShibler if (flags & MAP_ANON) {
2229f95614cSmckusick /*
2239f95614cSmckusick * Mapping blank space is trivial.
2249f95614cSmckusick */
2259cf4a8a8Smckusick handle = NULL;
226525ac35aShibler maxprot = VM_PROT_ALL;
227ef7b4811Shibler pos = 0;
228525ac35aShibler } else {
229808a4de1Smckusick /*
2309cf4a8a8Smckusick * Mapping file, get fp for validation.
2319f95614cSmckusick * Obtain vnode and make sure it is of appropriate type.
232808a4de1Smckusick */
233*7ee749f6Scgd if (((unsigned)SCARG(uap, fd)) >= fdp->fd_nfiles ||
234*7ee749f6Scgd (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL)
2359cf4a8a8Smckusick return (EBADF);
236808a4de1Smckusick if (fp->f_type != DTYPE_VNODE)
237808a4de1Smckusick return (EINVAL);
238808a4de1Smckusick vp = (struct vnode *)fp->f_data;
239808a4de1Smckusick if (vp->v_type != VREG && vp->v_type != VCHR)
240808a4de1Smckusick return (EINVAL);
241808a4de1Smckusick /*
242d08ee885Shibler * XXX hack to handle use of /dev/zero to map anon
243d08ee885Shibler * memory (ala SunOS).
244d08ee885Shibler */
245d08ee885Shibler if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
246d08ee885Shibler handle = NULL;
247d08ee885Shibler maxprot = VM_PROT_ALL;
248d08ee885Shibler flags |= MAP_ANON;
249d08ee885Shibler } else {
250d08ee885Shibler /*
251d08ee885Shibler * Ensure that file and memory protections are
252d08ee885Shibler * compatible. Note that we only worry about
253d08ee885Shibler * writability if mapping is shared; in this case,
254d08ee885Shibler * current and max prot are dictated by the open file.
255d08ee885Shibler * XXX use the vnode instead? Problem is: what
256d08ee885Shibler * credentials do we use for determination?
257d08ee885Shibler * What if proc does a setuid?
258808a4de1Smckusick */
2599f95614cSmckusick maxprot = VM_PROT_EXECUTE; /* ??? */
260525ac35aShibler if (fp->f_flag & FREAD)
2619f95614cSmckusick maxprot |= VM_PROT_READ;
2629f95614cSmckusick else if (prot & PROT_READ)
2639f95614cSmckusick return (EACCES);
2649f95614cSmckusick if (flags & MAP_SHARED) {
265525ac35aShibler if (fp->f_flag & FWRITE)
266525ac35aShibler maxprot |= VM_PROT_WRITE;
2679f95614cSmckusick else if (prot & PROT_WRITE)
2689f95614cSmckusick return (EACCES);
2699f95614cSmckusick } else
2709f95614cSmckusick maxprot |= VM_PROT_WRITE;
2719f95614cSmckusick handle = (caddr_t)vp;
272525ac35aShibler }
273d08ee885Shibler }
274525ac35aShibler error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
275ca65b693Shibler flags, handle, pos);
276808a4de1Smckusick if (error == 0)
277*7ee749f6Scgd *retval = (register_t)addr;
278808a4de1Smckusick return (error);
279808a4de1Smckusick }
280808a4de1Smckusick
28164f61df8Sbostic int
msync(p,uap,retval)282808a4de1Smckusick msync(p, uap, retval)
283808a4de1Smckusick struct proc *p;
284*7ee749f6Scgd struct msync_args /* {
285*7ee749f6Scgd syscallarg(caddr_t) addr;
286*7ee749f6Scgd syscallarg(int) len;
287*7ee749f6Scgd } */ *uap;
288*7ee749f6Scgd register_t *retval;
289808a4de1Smckusick {
29009b1e4f3Shibler vm_offset_t addr;
29109b1e4f3Shibler vm_size_t size;
29209b1e4f3Shibler vm_map_t map;
293808a4de1Smckusick int rv;
29409b1e4f3Shibler boolean_t syncio, invalidate;
295808a4de1Smckusick
296808a4de1Smckusick #ifdef DEBUG
297808a4de1Smckusick if (mmapdebug & (MDB_FOLLOW|MDB_SYNC))
298808a4de1Smckusick printf("msync(%d): addr %x len %x\n",
299*7ee749f6Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
300808a4de1Smckusick #endif
301*7ee749f6Scgd if (((vm_offset_t)SCARG(uap, addr) & PAGE_MASK) ||
302*7ee749f6Scgd SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
303808a4de1Smckusick return (EINVAL);
30409b1e4f3Shibler map = &p->p_vmspace->vm_map;
305*7ee749f6Scgd addr = (vm_offset_t)SCARG(uap, addr);
306*7ee749f6Scgd size = (vm_size_t)SCARG(uap, len);
307808a4de1Smckusick /*
30809b1e4f3Shibler * XXX Gak! If size is zero we are supposed to sync "all modified
30909b1e4f3Shibler * pages with the region containing addr". Unfortunately, we
31009b1e4f3Shibler * don't really keep track of individual mmaps so we approximate
31109b1e4f3Shibler * by flushing the range of the map entry containing addr.
31209b1e4f3Shibler * This can be incorrect if the region splits or is coalesced
31309b1e4f3Shibler * with a neighbor.
314808a4de1Smckusick */
31509b1e4f3Shibler if (size == 0) {
31609b1e4f3Shibler vm_map_entry_t entry;
31709b1e4f3Shibler
31809b1e4f3Shibler vm_map_lock_read(map);
31909b1e4f3Shibler rv = vm_map_lookup_entry(map, addr, &entry);
32009b1e4f3Shibler vm_map_unlock_read(map);
321213f695aShibler if (!rv)
322808a4de1Smckusick return (EINVAL);
32309b1e4f3Shibler addr = entry->start;
32409b1e4f3Shibler size = entry->end - entry->start;
32509b1e4f3Shibler }
326808a4de1Smckusick #ifdef DEBUG
327808a4de1Smckusick if (mmapdebug & MDB_SYNC)
32809b1e4f3Shibler printf("msync: cleaning/flushing address range [%x-%x)\n",
32909b1e4f3Shibler addr, addr+size);
330808a4de1Smckusick #endif
331808a4de1Smckusick /*
33209b1e4f3Shibler * Could pass this in as a third flag argument to implement
33309b1e4f3Shibler * Sun's MS_ASYNC.
334808a4de1Smckusick */
33509b1e4f3Shibler syncio = TRUE;
33609b1e4f3Shibler /*
33709b1e4f3Shibler * XXX bummer, gotta flush all cached pages to ensure
33809b1e4f3Shibler * consistency with the file system cache. Otherwise, we could
33909b1e4f3Shibler * pass this in to implement Sun's MS_INVALIDATE.
34009b1e4f3Shibler */
34109b1e4f3Shibler invalidate = TRUE;
34209b1e4f3Shibler /*
34309b1e4f3Shibler * Clean the pages and interpret the return value.
34409b1e4f3Shibler */
34509b1e4f3Shibler rv = vm_map_clean(map, addr, addr+size, syncio, invalidate);
34609b1e4f3Shibler switch (rv) {
34709b1e4f3Shibler case KERN_SUCCESS:
34809b1e4f3Shibler break;
34909b1e4f3Shibler case KERN_INVALID_ADDRESS:
35009b1e4f3Shibler return (EINVAL); /* Sun returns ENOMEM? */
35109b1e4f3Shibler case KERN_FAILURE:
35209b1e4f3Shibler return (EIO);
35309b1e4f3Shibler default:
354808a4de1Smckusick return (EINVAL);
355808a4de1Smckusick }
356808a4de1Smckusick return (0);
357808a4de1Smckusick }
358808a4de1Smckusick
35964f61df8Sbostic int
munmap(p,uap,retval)360808a4de1Smckusick munmap(p, uap, retval)
361808a4de1Smckusick register struct proc *p;
362*7ee749f6Scgd register struct munmap_args /* {
363*7ee749f6Scgd syscallarg(caddr_t) addr;
364*7ee749f6Scgd syscallarg(int) len;
365*7ee749f6Scgd } */ *uap;
366*7ee749f6Scgd register_t *retval;
367808a4de1Smckusick {
368808a4de1Smckusick vm_offset_t addr;
369808a4de1Smckusick vm_size_t size;
37009b1e4f3Shibler vm_map_t map;
371808a4de1Smckusick
372808a4de1Smckusick #ifdef DEBUG
373808a4de1Smckusick if (mmapdebug & MDB_FOLLOW)
374808a4de1Smckusick printf("munmap(%d): addr %x len %x\n",
375*7ee749f6Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
376808a4de1Smckusick #endif
377808a4de1Smckusick
378*7ee749f6Scgd addr = (vm_offset_t) SCARG(uap, addr);
379*7ee749f6Scgd if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
380808a4de1Smckusick return(EINVAL);
381*7ee749f6Scgd size = (vm_size_t) round_page(SCARG(uap, len));
382808a4de1Smckusick if (size == 0)
383808a4de1Smckusick return(0);
3843e8d6014Smckusick /*
3853e8d6014Smckusick * Check for illegal addresses. Watch out for address wrap...
3863e8d6014Smckusick * Note that VM_*_ADDRESS are not constants due to casts (argh).
3873e8d6014Smckusick */
3883e8d6014Smckusick if (VM_MAXUSER_ADDRESS > 0 && addr + size >= VM_MAXUSER_ADDRESS)
3893e8d6014Smckusick return (EINVAL);
3903e8d6014Smckusick if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
3913e8d6014Smckusick return (EINVAL);
3923e8d6014Smckusick if (addr > addr + size)
3933e8d6014Smckusick return (EINVAL);
39409b1e4f3Shibler map = &p->p_vmspace->vm_map;
39509b1e4f3Shibler /*
39609b1e4f3Shibler * Make sure entire range is allocated.
397888046efShibler * XXX this seemed overly restrictive, so we relaxed it.
39809b1e4f3Shibler */
399888046efShibler #if 0
40009b1e4f3Shibler if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
401808a4de1Smckusick return(EINVAL);
402888046efShibler #endif
403808a4de1Smckusick /* returns nothing but KERN_SUCCESS anyway */
40409b1e4f3Shibler (void) vm_map_remove(map, addr, addr+size);
405808a4de1Smckusick return(0);
406808a4de1Smckusick }
407808a4de1Smckusick
40864f61df8Sbostic void
munmapfd(p,fd)4097cc71247Scgd munmapfd(p, fd)
4107cc71247Scgd struct proc *p;
411a5fa67a9Smckusick int fd;
412808a4de1Smckusick {
413808a4de1Smckusick #ifdef DEBUG
414808a4de1Smckusick if (mmapdebug & MDB_FOLLOW)
4157cc71247Scgd printf("munmapfd(%d): fd %d\n", p->p_pid, fd);
416808a4de1Smckusick #endif
417808a4de1Smckusick
418808a4de1Smckusick /*
41909b1e4f3Shibler * XXX should vm_deallocate any regions mapped to this file
420808a4de1Smckusick */
4217cc71247Scgd p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
422808a4de1Smckusick }
423808a4de1Smckusick
424499c7c41Storek int
mprotect(p,uap,retval)425499c7c41Storek mprotect(p, uap, retval)
426499c7c41Storek struct proc *p;
427*7ee749f6Scgd struct mprotect_args /* {
428*7ee749f6Scgd syscallarg(caddr_t) addr;
429*7ee749f6Scgd syscallarg(int) len;
430*7ee749f6Scgd syscallarg(int) prot;
431*7ee749f6Scgd } */ *uap;
432*7ee749f6Scgd register_t *retval;
433808a4de1Smckusick {
434808a4de1Smckusick vm_offset_t addr;
435808a4de1Smckusick vm_size_t size;
436808a4de1Smckusick register vm_prot_t prot;
437808a4de1Smckusick
438808a4de1Smckusick #ifdef DEBUG
439808a4de1Smckusick if (mmapdebug & MDB_FOLLOW)
440808a4de1Smckusick printf("mprotect(%d): addr %x len %x prot %d\n",
441*7ee749f6Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len), SCARG(uap, prot));
442808a4de1Smckusick #endif
443808a4de1Smckusick
444*7ee749f6Scgd addr = (vm_offset_t)SCARG(uap, addr);
445*7ee749f6Scgd if ((addr & PAGE_MASK) || SCARG(uap, len) < 0)
446808a4de1Smckusick return(EINVAL);
447*7ee749f6Scgd size = (vm_size_t)SCARG(uap, len);
448*7ee749f6Scgd prot = SCARG(uap, prot) & VM_PROT_ALL;
449808a4de1Smckusick
450d2b14339Skarels switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot,
451d2b14339Skarels FALSE)) {
452808a4de1Smckusick case KERN_SUCCESS:
453808a4de1Smckusick return (0);
454808a4de1Smckusick case KERN_PROTECTION_FAILURE:
455808a4de1Smckusick return (EACCES);
456808a4de1Smckusick }
457808a4de1Smckusick return (EINVAL);
458808a4de1Smckusick }
459808a4de1Smckusick
460808a4de1Smckusick /* ARGSUSED */
46164f61df8Sbostic int
madvise(p,uap,retval)462808a4de1Smckusick madvise(p, uap, retval)
463808a4de1Smckusick struct proc *p;
464*7ee749f6Scgd struct madvise_args /* {
465*7ee749f6Scgd syscallarg(caddr_t) addr;
466*7ee749f6Scgd syscallarg(int) len;
467*7ee749f6Scgd syscallarg(int) behav;
468*7ee749f6Scgd } */ *uap;
469*7ee749f6Scgd register_t *retval;
470808a4de1Smckusick {
471808a4de1Smckusick
472808a4de1Smckusick /* Not yet implemented */
473808a4de1Smckusick return (EOPNOTSUPP);
474808a4de1Smckusick }
475808a4de1Smckusick
476808a4de1Smckusick /* ARGSUSED */
47764f61df8Sbostic int
mincore(p,uap,retval)478808a4de1Smckusick mincore(p, uap, retval)
479808a4de1Smckusick struct proc *p;
480*7ee749f6Scgd struct mincore_args /* {
481*7ee749f6Scgd syscallarg(caddr_t) addr;
482*7ee749f6Scgd syscallarg(int) len;
483*7ee749f6Scgd syscallarg(char *) vec;
484*7ee749f6Scgd } */ *uap;
485*7ee749f6Scgd register_t *retval;
486808a4de1Smckusick {
487808a4de1Smckusick
488808a4de1Smckusick /* Not yet implemented */
489808a4de1Smckusick return (EOPNOTSUPP);
490808a4de1Smckusick }
491808a4de1Smckusick
49280376accShibler int
mlock(p,uap,retval)49380376accShibler mlock(p, uap, retval)
49480376accShibler struct proc *p;
495*7ee749f6Scgd struct mlock_args /* {
496*7ee749f6Scgd syscallarg(caddr_t) addr;
497*7ee749f6Scgd syscallarg(size_t) len;
498*7ee749f6Scgd } */ *uap;
499*7ee749f6Scgd register_t *retval;
50080376accShibler {
50180376accShibler vm_offset_t addr;
50280376accShibler vm_size_t size;
50380376accShibler int error;
50480376accShibler extern int vm_page_max_wired;
50580376accShibler
50680376accShibler #ifdef DEBUG
50780376accShibler if (mmapdebug & MDB_FOLLOW)
50880376accShibler printf("mlock(%d): addr %x len %x\n",
509*7ee749f6Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
51080376accShibler #endif
511*7ee749f6Scgd addr = (vm_offset_t)SCARG(uap, addr);
512*7ee749f6Scgd if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
51380376accShibler return (EINVAL);
514*7ee749f6Scgd size = round_page((vm_size_t)SCARG(uap, len));
51580376accShibler if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
5168cb0bc2dShibler return (EAGAIN);
51780376accShibler #ifdef pmap_wired_count
51880376accShibler if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
51980376accShibler p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
5208cb0bc2dShibler return (EAGAIN);
52180376accShibler #else
52280376accShibler if (error = suser(p->p_ucred, &p->p_acflag))
52380376accShibler return (error);
52480376accShibler #endif
52580376accShibler
52680376accShibler error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE);
52780376accShibler return (error == KERN_SUCCESS ? 0 : ENOMEM);
52880376accShibler }
52980376accShibler
53080376accShibler int
munlock(p,uap,retval)53180376accShibler munlock(p, uap, retval)
53280376accShibler struct proc *p;
533*7ee749f6Scgd struct munlock_args /* {
534*7ee749f6Scgd syscallarg(caddr_t) addr;
535*7ee749f6Scgd syscallarg(size_t) len;
536*7ee749f6Scgd } */ *uap;
537*7ee749f6Scgd register_t *retval;
53880376accShibler {
53980376accShibler vm_offset_t addr;
54080376accShibler vm_size_t size;
54180376accShibler int error;
54280376accShibler
54380376accShibler #ifdef DEBUG
54480376accShibler if (mmapdebug & MDB_FOLLOW)
54580376accShibler printf("munlock(%d): addr %x len %x\n",
546*7ee749f6Scgd p->p_pid, SCARG(uap, addr), SCARG(uap, len));
54780376accShibler #endif
548*7ee749f6Scgd addr = (vm_offset_t)SCARG(uap, addr);
549*7ee749f6Scgd if ((addr & PAGE_MASK) || SCARG(uap, addr) + SCARG(uap, len) < SCARG(uap, addr))
55080376accShibler return (EINVAL);
55180376accShibler #ifndef pmap_wired_count
55280376accShibler if (error = suser(p->p_ucred, &p->p_acflag))
55380376accShibler return (error);
55480376accShibler #endif
555*7ee749f6Scgd size = round_page((vm_size_t)SCARG(uap, len));
55680376accShibler
55780376accShibler error = vm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE);
55880376accShibler return (error == KERN_SUCCESS ? 0 : ENOMEM);
55980376accShibler }
56080376accShibler
561808a4de1Smckusick /*
562808a4de1Smckusick * Internal version of mmap.
563808a4de1Smckusick * Currently used by mmap, exec, and sys5 shared memory.
5649cf4a8a8Smckusick * Handle is either a vnode pointer or NULL for MAP_ANON.
565808a4de1Smckusick */
56664f61df8Sbostic int
vm_mmap(map,addr,size,prot,maxprot,flags,handle,foff)567525ac35aShibler vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
568808a4de1Smckusick register vm_map_t map;
569808a4de1Smckusick register vm_offset_t *addr;
570808a4de1Smckusick register vm_size_t size;
571525ac35aShibler vm_prot_t prot, maxprot;
572808a4de1Smckusick register int flags;
573808a4de1Smckusick caddr_t handle; /* XXX should be vp */
574808a4de1Smckusick vm_offset_t foff;
575808a4de1Smckusick {
576808a4de1Smckusick register vm_pager_t pager;
577808a4de1Smckusick boolean_t fitit;
578808a4de1Smckusick vm_object_t object;
579bffd8654Smckusick struct vnode *vp = NULL;
580808a4de1Smckusick int type;
581808a4de1Smckusick int rv = KERN_SUCCESS;
582808a4de1Smckusick
583808a4de1Smckusick if (size == 0)
584808a4de1Smckusick return (0);
585808a4de1Smckusick
586808a4de1Smckusick if ((flags & MAP_FIXED) == 0) {
587808a4de1Smckusick fitit = TRUE;
588808a4de1Smckusick *addr = round_page(*addr);
589808a4de1Smckusick } else {
590808a4de1Smckusick fitit = FALSE;
591808a4de1Smckusick (void)vm_deallocate(map, *addr, size);
592808a4de1Smckusick }
593808a4de1Smckusick
594808a4de1Smckusick /*
595808a4de1Smckusick * Lookup/allocate pager. All except an unnamed anonymous lookup
596808a4de1Smckusick * gain a reference to ensure continued existance of the object.
597808a4de1Smckusick * (XXX the exception is to appease the pageout daemon)
598808a4de1Smckusick */
5999cf4a8a8Smckusick if (flags & MAP_ANON)
600808a4de1Smckusick type = PG_DFLT;
601808a4de1Smckusick else {
602808a4de1Smckusick vp = (struct vnode *)handle;
603808a4de1Smckusick if (vp->v_type == VCHR) {
604808a4de1Smckusick type = PG_DEVICE;
605808a4de1Smckusick handle = (caddr_t)vp->v_rdev;
606808a4de1Smckusick } else
607808a4de1Smckusick type = PG_VNODE;
608808a4de1Smckusick }
609eed4f36eStorek pager = vm_pager_allocate(type, handle, size, prot, foff);
610d2b14339Skarels if (pager == NULL)
611808a4de1Smckusick return (type == PG_DEVICE ? EINVAL : ENOMEM);
612808a4de1Smckusick /*
613808a4de1Smckusick * Find object and release extra reference gained by lookup
614808a4de1Smckusick */
615808a4de1Smckusick object = vm_object_lookup(pager);
616808a4de1Smckusick vm_object_deallocate(object);
617808a4de1Smckusick
618808a4de1Smckusick /*
619808a4de1Smckusick * Anonymous memory.
620808a4de1Smckusick */
6219cf4a8a8Smckusick if (flags & MAP_ANON) {
622808a4de1Smckusick rv = vm_allocate_with_pager(map, addr, size, fitit,
623af75fa5dSralph pager, foff, TRUE);
624808a4de1Smckusick if (rv != KERN_SUCCESS) {
625808a4de1Smckusick if (handle == NULL)
626808a4de1Smckusick vm_pager_deallocate(pager);
627808a4de1Smckusick else
628808a4de1Smckusick vm_object_deallocate(object);
629808a4de1Smckusick goto out;
630808a4de1Smckusick }
631808a4de1Smckusick /*
632808a4de1Smckusick * Don't cache anonymous objects.
633808a4de1Smckusick * Loses the reference gained by vm_pager_allocate.
634525ac35aShibler * Note that object will be NULL when handle == NULL,
635525ac35aShibler * this is ok since vm_allocate_with_pager has made
636525ac35aShibler * sure that these objects are uncached.
637808a4de1Smckusick */
638808a4de1Smckusick (void) pager_cache(object, FALSE);
639808a4de1Smckusick #ifdef DEBUG
640808a4de1Smckusick if (mmapdebug & MDB_MAPIT)
641808a4de1Smckusick printf("vm_mmap(%d): ANON *addr %x size %x pager %x\n",
642d2b14339Skarels curproc->p_pid, *addr, size, pager);
643808a4de1Smckusick #endif
644808a4de1Smckusick }
645808a4de1Smckusick /*
6469cf4a8a8Smckusick * Must be a mapped file.
647808a4de1Smckusick * Distinguish between character special and regular files.
648808a4de1Smckusick */
649808a4de1Smckusick else if (vp->v_type == VCHR) {
650808a4de1Smckusick rv = vm_allocate_with_pager(map, addr, size, fitit,
651af75fa5dSralph pager, foff, FALSE);
652808a4de1Smckusick /*
653808a4de1Smckusick * Uncache the object and lose the reference gained
654808a4de1Smckusick * by vm_pager_allocate(). If the call to
655808a4de1Smckusick * vm_allocate_with_pager() was sucessful, then we
656808a4de1Smckusick * gained an additional reference ensuring the object
657808a4de1Smckusick * will continue to exist. If the call failed then
658808a4de1Smckusick * the deallocate call below will terminate the
659808a4de1Smckusick * object which is fine.
660808a4de1Smckusick */
661808a4de1Smckusick (void) pager_cache(object, FALSE);
662808a4de1Smckusick if (rv != KERN_SUCCESS)
663808a4de1Smckusick goto out;
664808a4de1Smckusick }
665808a4de1Smckusick /*
666808a4de1Smckusick * A regular file
667808a4de1Smckusick */
668808a4de1Smckusick else {
669808a4de1Smckusick #ifdef DEBUG
670d2b14339Skarels if (object == NULL)
671808a4de1Smckusick printf("vm_mmap: no object: vp %x, pager %x\n",
672808a4de1Smckusick vp, pager);
673808a4de1Smckusick #endif
674808a4de1Smckusick /*
675808a4de1Smckusick * Map it directly.
676808a4de1Smckusick * Allows modifications to go out to the vnode.
677808a4de1Smckusick */
678808a4de1Smckusick if (flags & MAP_SHARED) {
679808a4de1Smckusick rv = vm_allocate_with_pager(map, addr, size,
680808a4de1Smckusick fitit, pager,
681af75fa5dSralph foff, FALSE);
682808a4de1Smckusick if (rv != KERN_SUCCESS) {
683808a4de1Smckusick vm_object_deallocate(object);
684808a4de1Smckusick goto out;
685808a4de1Smckusick }
686808a4de1Smckusick /*
687808a4de1Smckusick * Don't cache the object. This is the easiest way
688808a4de1Smckusick * of ensuring that data gets back to the filesystem
689808a4de1Smckusick * because vnode_pager_deallocate() will fsync the
690808a4de1Smckusick * vnode. pager_cache() will lose the extra ref.
691808a4de1Smckusick */
692808a4de1Smckusick if (prot & VM_PROT_WRITE)
693808a4de1Smckusick pager_cache(object, FALSE);
694808a4de1Smckusick else
695808a4de1Smckusick vm_object_deallocate(object);
696808a4de1Smckusick }
697808a4de1Smckusick /*
698808a4de1Smckusick * Copy-on-write of file. Two flavors.
699808a4de1Smckusick * MAP_COPY is true COW, you essentially get a snapshot of
700808a4de1Smckusick * the region at the time of mapping. MAP_PRIVATE means only
701808a4de1Smckusick * that your changes are not reflected back to the object.
702808a4de1Smckusick * Changes made by others will be seen.
703808a4de1Smckusick */
704808a4de1Smckusick else {
705808a4de1Smckusick vm_map_t tmap;
706808a4de1Smckusick vm_offset_t off;
707808a4de1Smckusick
708808a4de1Smckusick /* locate and allocate the target address space */
709d2b14339Skarels rv = vm_map_find(map, NULL, (vm_offset_t)0,
710808a4de1Smckusick addr, size, fitit);
711808a4de1Smckusick if (rv != KERN_SUCCESS) {
712808a4de1Smckusick vm_object_deallocate(object);
713808a4de1Smckusick goto out;
714808a4de1Smckusick }
715808a4de1Smckusick tmap = vm_map_create(pmap_create(size), VM_MIN_ADDRESS,
716808a4de1Smckusick VM_MIN_ADDRESS+size, TRUE);
717808a4de1Smckusick off = VM_MIN_ADDRESS;
718808a4de1Smckusick rv = vm_allocate_with_pager(tmap, &off, size,
719808a4de1Smckusick TRUE, pager,
720af75fa5dSralph foff, FALSE);
721808a4de1Smckusick if (rv != KERN_SUCCESS) {
722808a4de1Smckusick vm_object_deallocate(object);
723808a4de1Smckusick vm_map_deallocate(tmap);
724808a4de1Smckusick goto out;
725808a4de1Smckusick }
726808a4de1Smckusick /*
727808a4de1Smckusick * (XXX)
728808a4de1Smckusick * MAP_PRIVATE implies that we see changes made by
729808a4de1Smckusick * others. To ensure that we need to guarentee that
730808a4de1Smckusick * no copy object is created (otherwise original
731808a4de1Smckusick * pages would be pushed to the copy object and we
732808a4de1Smckusick * would never see changes made by others). We
733808a4de1Smckusick * totally sleeze it right now by marking the object
734808a4de1Smckusick * internal temporarily.
735808a4de1Smckusick */
736808a4de1Smckusick if ((flags & MAP_COPY) == 0)
7376fa1e730Smckusick object->flags |= OBJ_INTERNAL;
738808a4de1Smckusick rv = vm_map_copy(map, tmap, *addr, size, off,
739808a4de1Smckusick FALSE, FALSE);
7406fa1e730Smckusick object->flags &= ~OBJ_INTERNAL;
741808a4de1Smckusick /*
742808a4de1Smckusick * (XXX)
743808a4de1Smckusick * My oh my, this only gets worse...
744808a4de1Smckusick * Force creation of a shadow object so that
745808a4de1Smckusick * vm_map_fork will do the right thing.
746808a4de1Smckusick */
747808a4de1Smckusick if ((flags & MAP_COPY) == 0) {
748808a4de1Smckusick vm_map_t tmap;
749808a4de1Smckusick vm_map_entry_t tentry;
750808a4de1Smckusick vm_object_t tobject;
751808a4de1Smckusick vm_offset_t toffset;
752808a4de1Smckusick vm_prot_t tprot;
753808a4de1Smckusick boolean_t twired, tsu;
754808a4de1Smckusick
755808a4de1Smckusick tmap = map;
756808a4de1Smckusick vm_map_lookup(&tmap, *addr, VM_PROT_WRITE,
757808a4de1Smckusick &tentry, &tobject, &toffset,
758808a4de1Smckusick &tprot, &twired, &tsu);
759808a4de1Smckusick vm_map_lookup_done(tmap, tentry);
760808a4de1Smckusick }
761808a4de1Smckusick /*
762808a4de1Smckusick * (XXX)
763808a4de1Smckusick * Map copy code cannot detect sharing unless a
764808a4de1Smckusick * sharing map is involved. So we cheat and write
76586863962Shibler * protect everything ourselves.
766808a4de1Smckusick */
767af75fa5dSralph vm_object_pmap_copy(object, foff, foff + size);
768808a4de1Smckusick vm_object_deallocate(object);
769808a4de1Smckusick vm_map_deallocate(tmap);
770808a4de1Smckusick if (rv != KERN_SUCCESS)
771808a4de1Smckusick goto out;
772808a4de1Smckusick }
773808a4de1Smckusick #ifdef DEBUG
774808a4de1Smckusick if (mmapdebug & MDB_MAPIT)
775808a4de1Smckusick printf("vm_mmap(%d): FILE *addr %x size %x pager %x\n",
776d2b14339Skarels curproc->p_pid, *addr, size, pager);
777808a4de1Smckusick #endif
778808a4de1Smckusick }
779808a4de1Smckusick /*
780808a4de1Smckusick * Correct protection (default is VM_PROT_ALL).
781525ac35aShibler * If maxprot is different than prot, we must set both explicitly.
782808a4de1Smckusick */
783525ac35aShibler rv = KERN_SUCCESS;
784525ac35aShibler if (maxprot != VM_PROT_ALL)
785525ac35aShibler rv = vm_map_protect(map, *addr, *addr+size, maxprot, TRUE);
786525ac35aShibler if (rv == KERN_SUCCESS && prot != maxprot)
787808a4de1Smckusick rv = vm_map_protect(map, *addr, *addr+size, prot, FALSE);
788808a4de1Smckusick if (rv != KERN_SUCCESS) {
789808a4de1Smckusick (void) vm_deallocate(map, *addr, size);
790808a4de1Smckusick goto out;
791808a4de1Smckusick }
792808a4de1Smckusick /*
793808a4de1Smckusick * Shared memory is also shared with children.
794808a4de1Smckusick */
795808a4de1Smckusick if (flags & MAP_SHARED) {
79609b1e4f3Shibler rv = vm_map_inherit(map, *addr, *addr+size, VM_INHERIT_SHARE);
797808a4de1Smckusick if (rv != KERN_SUCCESS) {
798808a4de1Smckusick (void) vm_deallocate(map, *addr, size);
799808a4de1Smckusick goto out;
800808a4de1Smckusick }
801808a4de1Smckusick }
802808a4de1Smckusick out:
803808a4de1Smckusick #ifdef DEBUG
804808a4de1Smckusick if (mmapdebug & MDB_MAPIT)
805808a4de1Smckusick printf("vm_mmap: rv %d\n", rv);
806808a4de1Smckusick #endif
807808a4de1Smckusick switch (rv) {
808808a4de1Smckusick case KERN_SUCCESS:
809808a4de1Smckusick return (0);
810808a4de1Smckusick case KERN_INVALID_ADDRESS:
811808a4de1Smckusick case KERN_NO_SPACE:
812808a4de1Smckusick return (ENOMEM);
813808a4de1Smckusick case KERN_PROTECTION_FAILURE:
814808a4de1Smckusick return (EACCES);
815808a4de1Smckusick default:
816808a4de1Smckusick return (EINVAL);
817808a4de1Smckusick }
818808a4de1Smckusick }
819