xref: /original-bsd/sys/ufs/lfs/lfs_syscalls.c (revision 6996549e)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_syscalls.c	7.22 (Berkeley) 09/02/92
8  */
9 
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/mount.h>
14 #include <sys/vnode.h>
15 #include <sys/malloc.h>
16 #include <sys/kernel.h>
17 
18 #include <ufs/ufs/quota.h>
19 #include <ufs/ufs/inode.h>
20 #include <ufs/ufs/ufsmount.h>
21 #include <ufs/ufs/ufs_extern.h>
22 
23 #include <ufs/lfs/lfs.h>
24 #include <ufs/lfs/lfs_extern.h>
25 
26 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
27 
28 /*
29  * lfs_markv:
30  *
31  * This will mark inodes and blocks dirty, so they are written into the log.
32  * It will block until all the blocks have been written.  The segment create
33  * time passed in the block_info and inode_info structures is used to decide
34  * if the data is valid for each block (in case some process dirtied a block
35  * or inode that is being cleaned between the determination that a block is
36  * live and the lfs_markv call).
37  *
38  *  0 on success
39  * -1/errno is return on error.
40  */
41 int
42 lfs_markv(p, uap, retval)
43 	struct proc *p;
44 	struct args {
45 		fsid_t fsid;		/* file system */
46 		BLOCK_INFO *blkiov;	/* block array */
47 		int blkcnt;		/* count of block array entries */
48 	} *uap;
49 	int *retval;
50 {
51 	struct segment *sp;
52 	BLOCK_INFO *blkp;
53 	IFILE *ifp;
54 	struct buf *bp, **bpp;
55 	struct inode *ip;
56 	struct lfs *fs;
57 	struct mount *mntp;
58 	struct vnode *vp;
59 	void *start;
60 	ino_t lastino;
61 	daddr_t b_daddr, v_daddr;
62 	u_long bsize;
63 	int cnt, error;
64 
65 	if (error = suser(p->p_ucred, &p->p_acflag))
66 		return (error);
67 	if ((mntp = getvfs(&uap->fsid)) == NULL)
68 		return (EINVAL);
69 	/* Initialize a segment. */
70 	sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
71 	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
72 	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
73 	sp->seg_flags = SEGM_CKP;
74 	sp->vp = NULL;
75 
76 	cnt = uap->blkcnt;
77 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
78 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
79 		goto err1;
80 
81 	/* Mark blocks/inodes dirty.  */
82 	fs = VFSTOUFS(mntp)->um_lfs;
83 	bsize = fs->lfs_bsize;
84 	error = 0;
85 
86 	lfs_seglock(fs);
87 	lfs_initseg(fs, sp);
88 	sp->seg_flags |= SEGM_CLEAN;
89 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
90 	    blkp = start; cnt--; ++blkp) {
91 		/*
92 		 * Get the IFILE entry (only once) and see if the file still
93 		 * exists.
94 		 */
95 		if (lastino != blkp->bi_inode) {
96 			if (lastino != LFS_UNUSED_INUM) {
97 				lfs_updatemeta(sp);
98 				lfs_writeinode(fs, sp, ip);
99 				vput(vp);
100 				sp->vp = NULL;
101 			}
102 			lastino = blkp->bi_inode;
103 			if (blkp->bi_inode == LFS_IFILE_INUM)
104 				v_daddr = fs->lfs_idaddr;
105 			else {
106 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
107 				v_daddr = ifp->if_daddr;
108 				brelse(bp);
109 			}
110 			if (v_daddr == LFS_UNUSED_DADDR)
111 				continue;
112 			/* Get the vnode/inode. */
113 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
114 			    blkp->bi_lbn == LFS_UNUSED_LBN ? NULL :
115 			    blkp->bi_bp)) {
116 #ifdef DIAGNOSTIC
117 				printf("lfs_markv: VFS_VGET failed (%d)\n",
118 				    blkp->bi_inode);
119 #endif
120 				lastino = LFS_UNUSED_INUM;
121 				v_daddr == LFS_UNUSED_DADDR;
122 				continue;
123 			}
124 			sp->vp = vp;
125 			ip = VTOI(vp);
126 		} else if (v_daddr == LFS_UNUSED_DADDR)
127 			continue;
128 
129 		/* If this BLOCK_INFO didn't contain a block, keep going. */
130 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
131 			continue;
132 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr) ||
133 		    b_daddr != blkp->bi_daddr)
134 			continue;
135 		/*
136 		 * If we got to here, then we are keeping the block.  If it
137 		 * is an indirect block, we want to actually put it in the
138 		 * buffer cache so that it can be updated in the finish_meta
139 		 * section.  If it's not, we need to allocate a fake buffer
140 		 * so that writeseg can perform the copyin and write the buffer.
141 		 */
142 		if (blkp->bi_lbn >= 0)	/* Data Block */
143 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
144 			    blkp->bi_bp);
145 		else {
146 			bp = getblk(vp, blkp->bi_lbn, bsize);
147 			if (!(bp->b_flags & B_CACHE) &&
148 			    (error = copyin(blkp->bi_bp, bp->b_un.b_addr,
149 			    bsize)))
150 				goto err2;
151 			if (error = VOP_BWRITE(bp))
152 				goto err2;
153 		}
154 		while (lfs_gatherblock(sp, bp, NULL));
155 	}
156 	if (sp->vp) {
157 		lfs_updatemeta(sp);
158 		lfs_writeinode(fs, sp, ip);
159 		vput(vp);
160 	}
161 	(void) lfs_writeseg(fs, sp);
162 	lfs_segunlock(fs);
163 	free(start, M_SEGMENT);
164 	free(sp->bpp, M_SEGMENT);
165 	free(sp, M_SEGMENT);
166 	return (error);
167 /*
168  * XXX If we come in to error 2, we might have indirect blocks that were
169  * updated and now have bad block pointers.  I don't know what to do
170  * about this.
171  */
172 
173 err2:	vput(vp);
174 	/* Free up fakebuffers */
175 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
176 		if ((*bpp)->b_flags & B_CALL) {
177 			brelvp(*bpp);
178 			free(*bpp, M_SEGMENT);
179 		} else
180 			brelse(*bpp);
181 	lfs_segunlock(fs);
182 err1:
183 	free(sp->bpp, M_SEGMENT);
184 	free(sp, M_SEGMENT);
185 	free(start, M_SEGMENT);
186 	return(error);
187 }
188 
189 /*
190  * lfs_bmapv:
191  *
192  * This will fill in the current disk address for arrays of blocks.
193  *
194  *  0 on success
195  * -1/errno is return on error.
196  */
197 int
198 lfs_bmapv(p, uap, retval)
199 	struct proc *p;
200 	struct args {
201 		fsid_t fsid;		/* file system */
202 		BLOCK_INFO *blkiov;	/* block array */
203 		int blkcnt;		/* count of block array entries */
204 	} *uap;
205 	int *retval;
206 {
207 	BLOCK_INFO *blkp;
208 	struct mount *mntp;
209 	struct vnode *vp;
210 	void *start;
211 	daddr_t daddr;
212 	int cnt, error, step;
213 
214 	if (error = suser(p->p_ucred, &p->p_acflag))
215 		return (error);
216 	if ((mntp = getvfs(&uap->fsid)) == NULL)
217 		return (EINVAL);
218 
219 	cnt = uap->blkcnt;
220 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
221 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
222 		free(blkp, M_SEGMENT);
223 		return (error);
224 	}
225 
226 	for (step = cnt; step--; ++blkp) {
227 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
228 			continue;
229 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
230 			daddr = LFS_UNUSED_DADDR;
231 		else {
232 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr))
233 				daddr = LFS_UNUSED_DADDR;
234 			vput(vp);
235 		}
236 		blkp->bi_daddr = daddr;
237         }
238 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
239 	free(start, M_SEGMENT);
240 	return (0);
241 }
242 
243 /*
244  * lfs_segclean:
245  *
246  * Mark the segment clean.
247  *
248  *  0 on success
249  * -1/errno is return on error.
250  */
251 int
252 lfs_segclean(p, uap, retval)
253 	struct proc *p;
254 	struct args {
255 		fsid_t fsid;		/* file system */
256 		u_long segment;		/* segment number */
257 	} *uap;
258 	int *retval;
259 {
260 	CLEANERINFO *cip;
261 	SEGUSE *sup;
262 	struct buf *bp;
263 	struct mount *mntp;
264 	struct lfs *fs;
265 	int error;
266 
267 	if (error = suser(p->p_ucred, &p->p_acflag))
268 		return (error);
269 	if ((mntp = getvfs(&uap->fsid)) == NULL)
270 		return (EINVAL);
271 
272 	fs = VFSTOUFS(mntp)->um_lfs;
273 
274 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
275 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
276 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
277 	    sup->su_ninos * btodb(fs->lfs_bsize);
278 	sup->su_flags &= ~SEGUSE_DIRTY;
279 	(void) VOP_BWRITE(bp);
280 
281 	LFS_CLEANERINFO(cip, fs, bp);
282 	++cip->clean;
283 	--cip->dirty;
284 	(void) VOP_BWRITE(bp);
285 	wakeup(&fs->lfs_avail);
286 	return (0);
287 }
288 
289 /*
290  * lfs_segwait:
291  *
292  * This will block until a segment in file system fsid is written.  A timeout
293  * in milliseconds may be specified which will awake the cleaner automatically.
294  * An fsid of -1 means any file system, and a timeout of 0 means forever.
295  *
296  *  0 on success
297  *  1 on timeout
298  * -1/errno is return on error.
299  */
300 int
301 lfs_segwait(p, uap, retval)
302 	struct proc *p;
303 	struct args {
304 		fsid_t fsid;		/* file system */
305 		struct timeval *tv;	/* timeout */
306 	} *uap;
307 	int *retval;
308 {
309 	extern int lfs_allclean_wakeup;
310 	struct mount *mntp;
311 	struct timeval atv;
312 	void *addr;
313 	u_long timeout;
314 	int error, s;
315 
316 	if (error = suser(p->p_ucred, &p->p_acflag)) {
317 		return (error);
318 }
319 #ifdef WHEN_QUADS_WORK
320 	if (uap->fsid == (fsid_t)-1)
321 		addr = &lfs_allclean_wakeup;
322 	else {
323 		if ((mntp = getvfs(&uap->fsid)) == NULL)
324 			return (EINVAL);
325 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
326 	}
327 #else
328 	if ((mntp = getvfs(&uap->fsid)) == NULL)
329 		addr = &lfs_allclean_wakeup;
330 	else
331 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
332 #endif
333 
334 	if (uap->tv) {
335 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
336 			return (error);
337 		if (itimerfix(&atv))
338 			return (EINVAL);
339 		s = splclock();
340 		timevaladd(&atv, (struct timeval *)&time);
341 		timeout = hzto(&atv);
342 		splx(s);
343 	} else
344 		timeout = 0;
345 
346 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
347 	return (error == ERESTART ? EINTR : 0);
348 }
349 
350 /*
351  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
352  * daddr from the ifile, so don't look it up again.  If the cleaner is
353  * processing IINFO structures, it may have the ondisk inode already, so
354  * don't go retrieving it again.
355  */
356 int
357 lfs_fastvget(mp, ino, daddr, vpp, dinp)
358 	struct mount *mp;
359 	ino_t ino;
360 	daddr_t daddr;
361 	struct vnode **vpp;
362 	struct dinode *dinp;
363 {
364 	register struct inode *ip;
365 	struct vnode *vp;
366 	struct ufsmount *ump;
367 	struct buf *bp;
368 	dev_t dev;
369 	int error;
370 
371 	ump = VFSTOUFS(mp);
372 	dev = ump->um_dev;
373 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL)
374 		return (0);
375 
376 	/* Allocate new vnode/inode. */
377 	if (error = lfs_vcreate(mp, ino, &vp)) {
378 		*vpp = NULL;
379 		return (error);
380 	}
381 
382 	/*
383 	 * Put it onto its hash chain and lock it so that other requests for
384 	 * this inode will block if they arrive while we are sleeping waiting
385 	 * for old data structures to be purged or for the contents of the
386 	 * disk portion of this inode to be read.
387 	 */
388 	ip = VTOI(vp);
389 	ufs_ihashins(ip);
390 
391 	/*
392 	 * XXX
393 	 * This may not need to be here, logically it should go down with
394 	 * the i_devvp initialization.
395 	 * Ask Kirk.
396 	 */
397 	ip->i_lfs = ump->um_lfs;
398 
399 	/* Read in the disk contents for the inode, copy into the inode. */
400 	if (dinp)
401 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
402 			return (error);
403 	else {
404 		if (error = bread(ump->um_devvp, daddr,
405 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
406 			/*
407 			 * The inode does not contain anything useful, so it
408 			 * would be misleading to leave it on its hash chain.
409 			 * Iput() will return it to the free list.
410 			 */
411 			ufs_ihashrem(ip);
412 
413 			/* Unlock and discard unneeded inode. */
414 			ufs_iput(ip);
415 			brelse(bp);
416 			*vpp = NULL;
417 			return (error);
418 		}
419 		ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino);
420 		brelse(bp);
421 	}
422 
423 	/* Inode was just read from user space or disk, make sure it's locked */
424 	ip->i_flag |= ILOCKED;
425 
426 	/*
427 	 * Initialize the vnode from the inode, check for aliases.  In all
428 	 * cases re-init ip, the underlying vnode/inode may have changed.
429 	 */
430 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
431 		ufs_iput(ip);
432 		*vpp = NULL;
433 		return (error);
434 	}
435 	/*
436 	 * Finish inode initialization now that aliasing has been resolved.
437 	 */
438 	ip->i_devvp = ump->um_devvp;
439 	ip->i_flag |= IMOD;
440 	++ump->um_lfs->lfs_uinodes;
441 	VREF(ip->i_devvp);
442 	*vpp = vp;
443 	return (0);
444 }
445 struct buf *
446 lfs_fakebuf(vp, lbn, size, uaddr)
447 	struct vnode *vp;
448 	int lbn;
449 	size_t size;
450 	caddr_t uaddr;
451 {
452 	struct buf *bp;
453 
454 	bp = lfs_newbuf(vp, lbn, 0);
455 	bp->b_saveaddr = uaddr;
456 	bp->b_bufsize = size;
457 	bp->b_bcount = size;
458 	bp->b_flags |= B_INVAL;
459 	return(bp);
460 }
461