xref: /original-bsd/sys/vm/vnode_pager.c (revision 08a3d239)
1 /*
2  * Copyright (c) 1990 University of Utah.
3  * Copyright (c) 1991, 1993
4  *	The Regents of the University of California.  All rights reserved.
5  *
6  * This code is derived from software contributed to Berkeley by
7  * the Systems Programming Group of the University of Utah Computer
8  * Science Department.
9  *
10  * %sccs.include.redist.c%
11  *
12  *	@(#)vnode_pager.c	8.7 (Berkeley) 02/13/94
13  */
14 
15  * Page to/from files (vnodes).
16  *
17  * TODO:
18  *	pageouts
19  *	fix credential use (uses current process credentials now)
20  */
21 
22 #include <sys/param.h>
23 #include <sys/systm.h>
24 #include <sys/proc.h>
25 #include <sys/malloc.h>
26 #include <sys/vnode.h>
27 #include <sys/uio.h>
28 #include <sys/mount.h>
29 
30 #include <vm/vm.h>
31 #include <vm/vm_page.h>
32 #include <vm/vnode_pager.h>
33 
34 struct pagerlst	vnode_pager_list;	/* list of managed vnodes */
35 
36 #ifdef DEBUG
37 int	vpagerdebug = 0x00;
38 #define	VDB_FOLLOW	0x01
39 #define VDB_INIT	0x02
40 #define VDB_IO		0x04
41 #define VDB_FAIL	0x08
42 #define VDB_ALLOC	0x10
43 #define VDB_SIZE	0x20
44 #endif
45 
46 static vm_pager_t	 vnode_pager_alloc
47 			    __P((caddr_t, vm_size_t, vm_prot_t, vm_offset_t));
48 static void		 vnode_pager_cluster
49 			    __P((vm_pager_t, vm_offset_t,
50 				 vm_offset_t *, vm_offset_t *));
51 static void		 vnode_pager_dealloc __P((vm_pager_t));
52 static int		 vnode_pager_getpage
53 			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
54 static boolean_t	 vnode_pager_haspage __P((vm_pager_t, vm_offset_t));
55 static void		 vnode_pager_init __P((void));
56 static int		 vnode_pager_io
57 			    __P((vn_pager_t, vm_page_t *, int,
58 				 boolean_t, enum uio_rw));
59 static boolean_t	 vnode_pager_putpage
60 			    __P((vm_pager_t, vm_page_t *, int, boolean_t));
61 
62 struct pagerops vnodepagerops = {
63 	vnode_pager_init,
64 	vnode_pager_alloc,
65 	vnode_pager_dealloc,
66 	vnode_pager_getpage,
67 	vnode_pager_putpage,
68 	vnode_pager_haspage,
69 	vnode_pager_cluster
70 };
71 
72 static void
73 vnode_pager_init()
74 {
75 #ifdef DEBUG
76 	if (vpagerdebug & VDB_FOLLOW)
77 		printf("vnode_pager_init()\n");
78 #endif
79 	TAILQ_INIT(&vnode_pager_list);
80 }
81 
82 /*
83  * Allocate (or lookup) pager for a vnode.
84  * Handle is a vnode pointer.
85  */
86 static vm_pager_t
87 vnode_pager_alloc(handle, size, prot, foff)
88 	caddr_t handle;
89 	vm_size_t size;
90 	vm_prot_t prot;
91 	vm_offset_t foff;
92 {
93 	register vm_pager_t pager;
94 	register vn_pager_t vnp;
95 	vm_object_t object;
96 	struct vattr vattr;
97 	struct vnode *vp;
98 	struct proc *p = curproc;	/* XXX */
99 
100 #ifdef DEBUG
101 	if (vpagerdebug & (VDB_FOLLOW|VDB_ALLOC))
102 		printf("vnode_pager_alloc(%x, %x, %x)\n", handle, size, prot);
103 #endif
104 	/*
105 	 * Pageout to vnode, no can do yet.
106 	 */
107 	if (handle == NULL)
108 		return(NULL);
109 
110 	/*
111 	 * Vnodes keep a pointer to any associated pager so no need to
112 	 * lookup with vm_pager_lookup.
113 	 */
114 	vp = (struct vnode *)handle;
115 	pager = (vm_pager_t)vp->v_vmdata;
116 	if (pager == NULL) {
117 		/*
118 		 * Allocate pager structures
119 		 */
120 		pager = (vm_pager_t)malloc(sizeof *pager, M_VMPAGER, M_WAITOK);
121 		if (pager == NULL)
122 			return(NULL);
123 		vnp = (vn_pager_t)malloc(sizeof *vnp, M_VMPGDATA, M_WAITOK);
124 		if (vnp == NULL) {
125 			free((caddr_t)pager, M_VMPAGER);
126 			return(NULL);
127 		}
128 		/*
129 		 * And an object of the appropriate size
130 		 */
131 		if (VOP_GETATTR(vp, &vattr, p->p_ucred, p) == 0) {
132 			object = vm_object_allocate(round_page(vattr.va_size));
133 			vm_object_enter(object, pager);
134 			vm_object_setpager(object, pager, 0, TRUE);
135 		} else {
136 			free((caddr_t)vnp, M_VMPGDATA);
137 			free((caddr_t)pager, M_VMPAGER);
138 			return(NULL);
139 		}
140 		/*
141 		 * Hold a reference to the vnode and initialize pager data.
142 		 */
143 		VREF(vp);
144 		vnp->vnp_flags = 0;
145 		vnp->vnp_vp = vp;
146 		vnp->vnp_size = vattr.va_size;
147 		TAILQ_INSERT_TAIL(&vnode_pager_list, pager, pg_list);
148 		pager->pg_handle = handle;
149 		pager->pg_type = PG_VNODE;
150 		pager->pg_flags = 0;
151 		pager->pg_ops = &vnodepagerops;
152 		pager->pg_data = vnp;
153 		vp->v_vmdata = (caddr_t)pager;
154 	} else {
155 		/*
156 		 * vm_object_lookup() will remove the object from the
157 		 * cache if found and also gain a reference to the object.
158 		 */
159 		object = vm_object_lookup(pager);
160 #ifdef DEBUG
161 		vnp = (vn_pager_t)pager->pg_data;
162 #endif
163 	}
164 #ifdef DEBUG
165 	if (vpagerdebug & VDB_ALLOC)
166 		printf("vnode_pager_setup: vp %x sz %x pager %x object %x\n",
167 		       vp, vnp->vnp_size, pager, object);
168 #endif
169 	return(pager);
170 }
171 
172 static void
173 vnode_pager_dealloc(pager)
174 	vm_pager_t pager;
175 {
176 	register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
177 	register struct vnode *vp;
178 #ifdef NOTDEF
179 	struct proc *p = curproc;		/* XXX */
180 #endif
181 
182 #ifdef DEBUG
183 	if (vpagerdebug & VDB_FOLLOW)
184 		printf("vnode_pager_dealloc(%x)\n", pager);
185 #endif
186 	if (vp = vnp->vnp_vp) {
187 		vp->v_vmdata = NULL;
188 		vp->v_flag &= ~VTEXT;
189 #if NOTDEF
190 		/* can hang if done at reboot on NFS FS */
191 		(void) VOP_FSYNC(vp, p->p_ucred, p);
192 #endif
193 		vrele(vp);
194 	}
195 	TAILQ_REMOVE(&vnode_pager_list, pager, pg_list);
196 	free((caddr_t)vnp, M_VMPGDATA);
197 	free((caddr_t)pager, M_VMPAGER);
198 }
199 
200 static int
201 vnode_pager_getpage(pager, mlist, npages, sync)
202 	vm_pager_t pager;
203 	vm_page_t *mlist;
204 	int npages;
205 	boolean_t sync;
206 {
207 
208 #ifdef DEBUG
209 	if (vpagerdebug & VDB_FOLLOW)
210 		printf("vnode_pager_getpage(%x, %x, %x, %x)\n",
211 		       pager, mlist, npages, sync);
212 #endif
213 	return(vnode_pager_io((vn_pager_t)pager->pg_data,
214 			      mlist, npages, sync, UIO_READ));
215 }
216 
217 static boolean_t
218 vnode_pager_putpage(pager, mlist, npages, sync)
219 	vm_pager_t pager;
220 	vm_page_t *mlist;
221 	int npages;
222 	boolean_t sync;
223 {
224 	int err;
225 
226 #ifdef DEBUG
227 	if (vpagerdebug & VDB_FOLLOW)
228 		printf("vnode_pager_putpage(%x, %x, %x, %x)\n",
229 		       pager, mlist, npages, sync);
230 #endif
231 	if (pager == NULL)
232 		return (FALSE);			/* ??? */
233 	err = vnode_pager_io((vn_pager_t)pager->pg_data,
234 			     mlist, npages, sync, UIO_WRITE);
235 	/*
236 	 * If the operation was successful, mark the pages clean.
237 	 */
238 	if (err == VM_PAGER_OK) {
239 		while (npages--) {
240 			(*mlist)->flags |= PG_CLEAN;
241 			pmap_clear_modify(VM_PAGE_TO_PHYS(*mlist));
242 			mlist++;
243 		}
244 	}
245 	return(err);
246 }
247 
248 static boolean_t
249 vnode_pager_haspage(pager, offset)
250 	vm_pager_t pager;
251 	vm_offset_t offset;
252 {
253 	register vn_pager_t vnp = (vn_pager_t)pager->pg_data;
254 	daddr_t bn;
255 	int err;
256 
257 #ifdef DEBUG
258 	if (vpagerdebug & VDB_FOLLOW)
259 		printf("vnode_pager_haspage(%x, %x)\n", pager, offset);
260 #endif
261 
262 	/*
263 	 * Offset beyond end of file, do not have the page
264 	 * Lock the vnode first to make sure we have the most recent
265 	 * version of the size.
266 	 */
267 	VOP_LOCK(vnp->vnp_vp);
268 	if (offset >= vnp->vnp_size) {
269 		VOP_UNLOCK(vnp->vnp_vp);
270 #ifdef DEBUG
271 		if (vpagerdebug & (VDB_FAIL|VDB_SIZE))
272 			printf("vnode_pager_haspage: pg %x, off %x, size %x\n",
273 			       pager, offset, vnp->vnp_size);
274 #endif
275 		return(FALSE);
276 	}
277 
278 	/*
279 	 * Read the index to find the disk block to read
280 	 * from.  If there is no block, report that we don't
281 	 * have this data.
282 	 *
283 	 * Assumes that the vnode has whole page or nothing.
284 	 */
285 	err = VOP_BMAP(vnp->vnp_vp,
286 		       offset / vnp->vnp_vp->v_mount->mnt_stat.f_iosize,
287 		       (struct vnode **)0, &bn, NULL);
288 	VOP_UNLOCK(vnp->vnp_vp);
289 	if (err) {
290 #ifdef DEBUG
291 		if (vpagerdebug & VDB_FAIL)
292 			printf("vnode_pager_haspage: BMAP err %d, pg %x, off %x\n",
293 			       err, pager, offset);
294 #endif
295 		return(TRUE);
296 	}
297 	return((long)bn < 0 ? FALSE : TRUE);
298 }
299 
300 static void
301 vnode_pager_cluster(pager, offset, loffset, hoffset)
302 	vm_pager_t	pager;
303 	vm_offset_t	offset;
304 	vm_offset_t	*loffset;
305 	vm_offset_t	*hoffset;
306 {
307 	vn_pager_t vnp = (vn_pager_t)pager->pg_data;
308 	vm_offset_t loff, hoff;
309 
310 #ifdef DEBUG
311 	if (vpagerdebug & VDB_FOLLOW)
312 		printf("vnode_pager_cluster(%x, %x) ", pager, offset);
313 #endif
314 	loff = offset;
315 	if (loff >= vnp->vnp_size)
316 		panic("vnode_pager_cluster: bad offset");
317 	/*
318 	 * XXX could use VOP_BMAP to get maxcontig value
319 	 */
320 	hoff = loff + MAXBSIZE;
321 	if (hoff > round_page(vnp->vnp_size))
322 		hoff = round_page(vnp->vnp_size);
323 
324 	*loffset = loff;
325 	*hoffset = hoff;
326 #ifdef DEBUG
327 	if (vpagerdebug & VDB_FOLLOW)
328 		printf("returns [%x-%x]\n", loff, hoff);
329 #endif
330 }
331 
332 /*
333  * (XXX)
334  * Lets the VM system know about a change in size for a file.
335  * If this vnode is mapped into some address space (i.e. we have a pager
336  * for it) we adjust our own internal size and flush any cached pages in
337  * the associated object that are affected by the size change.
338  *
339  * Note: this routine may be invoked as a result of a pager put
340  * operation (possibly at object termination time), so we must be careful.
341  */
342 void
343 vnode_pager_setsize(vp, nsize)
344 	struct vnode *vp;
345 	u_long nsize;
346 {
347 	register vn_pager_t vnp;
348 	register vm_object_t object;
349 	vm_pager_t pager;
350 
351 	/*
352 	 * Not a mapped vnode
353 	 */
354 	if (vp == NULL || vp->v_type != VREG || vp->v_vmdata == NULL)
355 		return;
356 	/*
357 	 * Hasn't changed size
358 	 */
359 	pager = (vm_pager_t)vp->v_vmdata;
360 	vnp = (vn_pager_t)pager->pg_data;
361 	if (nsize == vnp->vnp_size)
362 		return;
363 	/*
364 	 * No object.
365 	 * This can happen during object termination since
366 	 * vm_object_page_clean is called after the object
367 	 * has been removed from the hash table, and clean
368 	 * may cause vnode write operations which can wind
369 	 * up back here.
370 	 */
371 	object = vm_object_lookup(pager);
372 	if (object == NULL)
373 		return;
374 
375 #ifdef DEBUG
376 	if (vpagerdebug & (VDB_FOLLOW|VDB_SIZE))
377 		printf("vnode_pager_setsize: vp %x obj %x osz %d nsz %d\n",
378 		       vp, object, vnp->vnp_size, nsize);
379 #endif
380 	/*
381 	 * File has shrunk.
382 	 * Toss any cached pages beyond the new EOF.
383 	 */
384 	if (nsize < vnp->vnp_size) {
385 		vm_object_lock(object);
386 		vm_object_page_remove(object,
387 				      (vm_offset_t)nsize, vnp->vnp_size);
388 		vm_object_unlock(object);
389 	}
390 	vnp->vnp_size = (vm_offset_t)nsize;
391 	vm_object_deallocate(object);
392 }
393 
394 void
395 vnode_pager_umount(mp)
396 	register struct mount *mp;
397 {
398 	register vm_pager_t pager, npager;
399 	struct vnode *vp;
400 
401 	for (pager = vnode_pager_list.tqh_first; pager != NULL; pager = npager){
402 		/*
403 		 * Save the next pointer now since uncaching may
404 		 * terminate the object and render pager invalid
405 		 */
406 		npager = pager->pg_list.tqe_next;
407 		vp = ((vn_pager_t)pager->pg_data)->vnp_vp;
408 		if (mp == (struct mount *)0 || vp->v_mount == mp) {
409 			VOP_LOCK(vp);
410 			(void) vnode_pager_uncache(vp);
411 			VOP_UNLOCK(vp);
412 		}
413 	}
414 }
415 
416 /*
417  * Remove vnode associated object from the object cache.
418  *
419  * XXX unlock the vnode if it is currently locked.
420  * We must do this since uncaching the object may result in its
421  * destruction which may initiate paging activity which may necessitate
422  * re-locking the vnode.
423  */
424 boolean_t
425 vnode_pager_uncache(vp)
426 	register struct vnode *vp;
427 {
428 	register vm_object_t object;
429 	boolean_t uncached;
430 	vm_pager_t pager;
431 
432 	/*
433 	 * Not a mapped vnode
434 	 */
435 	pager = (vm_pager_t)vp->v_vmdata;
436 	if (pager == NULL)
437 		return (TRUE);
438 #ifdef DEBUG
439 	if (!VOP_ISLOCKED(vp)) {
440 		extern int (**nfsv2_vnodeop_p)();
441 
442 		if (vp->v_op != nfsv2_vnodeop_p)
443 			panic("vnode_pager_uncache: vnode not locked!");
444 	}
445 #endif
446 	/*
447 	 * Must use vm_object_lookup() as it actually removes
448 	 * the object from the cache list.
449 	 */
450 	object = vm_object_lookup(pager);
451 	if (object) {
452 		uncached = (object->ref_count <= 1);
453 		VOP_UNLOCK(vp);
454 		pager_cache(object, FALSE);
455 		VOP_LOCK(vp);
456 	} else
457 		uncached = TRUE;
458 	return(uncached);
459 }
460 
461 static int
462 vnode_pager_io(vnp, mlist, npages, sync, rw)
463 	register vn_pager_t vnp;
464 	vm_page_t *mlist;
465 	int npages;
466 	boolean_t sync;
467 	enum uio_rw rw;
468 {
469 	struct uio auio;
470 	struct iovec aiov;
471 	vm_offset_t kva, foff;
472 	int error, size;
473 	struct proc *p = curproc;		/* XXX */
474 
475 	/* XXX */
476 	vm_page_t m;
477 	if (npages != 1)
478 		panic("vnode_pager_io: cannot handle multiple pages");
479 	m = *mlist;
480 	/* XXX */
481 
482 #ifdef DEBUG
483 	if (vpagerdebug & VDB_FOLLOW)
484 		printf("vnode_pager_io(%x, %x, %c): vnode %x\n",
485 		       vnp, m, rw == UIO_READ ? 'R' : 'W', vnp->vnp_vp);
486 #endif
487 	foff = m->offset + m->object->paging_offset;
488 	/*
489 	 * Allocate a kernel virtual address and initialize so that
490 	 * we can use VOP_READ/WRITE routines.
491 	 */
492 	kva = vm_pager_map_pages(mlist, npages, sync);
493 	if (kva == NULL)
494 		return(VM_PAGER_AGAIN);
495 	/*
496 	 * After all of the potentially blocking operations have been
497 	 * performed, we can do the size checks:
498 	 *	read beyond EOF (returns error)
499 	 *	short read
500 	 */
501 	VOP_LOCK(vnp->vnp_vp);
502 	if (foff >= vnp->vnp_size) {
503 		VOP_UNLOCK(vnp->vnp_vp);
504 		vm_pager_unmap_pages(kva, npages);
505 #ifdef DEBUG
506 		if (vpagerdebug & VDB_SIZE)
507 			printf("vnode_pager_io: vp %x, off %d size %d\n",
508 			       vnp->vnp_vp, foff, vnp->vnp_size);
509 #endif
510 		return(VM_PAGER_BAD);
511 	}
512 	if (foff + PAGE_SIZE > vnp->vnp_size)
513 		size = vnp->vnp_size - foff;
514 	else
515 		size = PAGE_SIZE;
516 	aiov.iov_base = (caddr_t)kva;
517 	aiov.iov_len = size;
518 	auio.uio_iov = &aiov;
519 	auio.uio_iovcnt = 1;
520 	auio.uio_offset = foff;
521 	auio.uio_segflg = UIO_SYSSPACE;
522 	auio.uio_rw = rw;
523 	auio.uio_resid = size;
524 	auio.uio_procp = (struct proc *)0;
525 #ifdef DEBUG
526 	if (vpagerdebug & VDB_IO)
527 		printf("vnode_pager_io: vp %x kva %x foff %x size %x",
528 		       vnp->vnp_vp, kva, foff, size);
529 #endif
530 	if (rw == UIO_READ)
531 		error = VOP_READ(vnp->vnp_vp, &auio, 0, p->p_ucred);
532 	else
533 		error = VOP_WRITE(vnp->vnp_vp, &auio, 0, p->p_ucred);
534 	VOP_UNLOCK(vnp->vnp_vp);
535 #ifdef DEBUG
536 	if (vpagerdebug & VDB_IO) {
537 		if (error || auio.uio_resid)
538 			printf(" returns error %x, resid %x",
539 			       error, auio.uio_resid);
540 		printf("\n");
541 	}
542 #endif
543 	if (!error) {
544 		register int count = size - auio.uio_resid;
545 
546 		if (count == 0)
547 			error = EINVAL;
548 		else if (count != PAGE_SIZE && rw == UIO_READ)
549 			bzero((void *)(kva + count), PAGE_SIZE - count);
550 	}
551 	vm_pager_unmap_pages(kva, npages);
552 	return (error ? VM_PAGER_ERROR : VM_PAGER_OK);
553 }
554