xref: /original-bsd/sys/ufs/lfs/lfs_syscalls.c (revision 860e07fc)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_syscalls.c	7.23 (Berkeley) 09/03/92
8  */
9 
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/mount.h>
14 #include <sys/vnode.h>
15 #include <sys/malloc.h>
16 #include <sys/kernel.h>
17 
18 #include <ufs/ufs/quota.h>
19 #include <ufs/ufs/inode.h>
20 #include <ufs/ufs/ufsmount.h>
21 #include <ufs/ufs/ufs_extern.h>
22 
23 #include <ufs/lfs/lfs.h>
24 #include <ufs/lfs/lfs_extern.h>
25 #define INC_FINFO(SP) \
26 	++((SEGSUM *)((SP)->segsum))->ss_nfinfo
27 
28 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
29 
30 /*
31  * lfs_markv:
32  *
33  * This will mark inodes and blocks dirty, so they are written into the log.
34  * It will block until all the blocks have been written.  The segment create
35  * time passed in the block_info and inode_info structures is used to decide
36  * if the data is valid for each block (in case some process dirtied a block
37  * or inode that is being cleaned between the determination that a block is
38  * live and the lfs_markv call).
39  *
40  *  0 on success
41  * -1/errno is return on error.
42  */
43 int
44 lfs_markv(p, uap, retval)
45 	struct proc *p;
46 	struct args {
47 		fsid_t fsid;		/* file system */
48 		BLOCK_INFO *blkiov;	/* block array */
49 		int blkcnt;		/* count of block array entries */
50 	} *uap;
51 	int *retval;
52 {
53 	struct segment *sp;
54 	BLOCK_INFO *blkp;
55 	IFILE *ifp;
56 	struct buf *bp, **bpp;
57 	struct inode *ip;
58 	struct lfs *fs;
59 	struct mount *mntp;
60 	struct vnode *vp;
61 	void *start;
62 	ino_t lastino;
63 	daddr_t b_daddr, v_daddr;
64 	u_long bsize;
65 	int cnt, error;
66 
67 	if (error = suser(p->p_ucred, &p->p_acflag))
68 		return (error);
69 	if ((mntp = getvfs(&uap->fsid)) == NULL)
70 		return (EINVAL);
71 	/* Initialize a segment. */
72 	sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
73 	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
74 	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
75 	sp->seg_flags = SEGM_CKP;
76 	sp->vp = NULL;
77 
78 	cnt = uap->blkcnt;
79 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
80 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
81 		goto err1;
82 
83 	/* Mark blocks/inodes dirty.  */
84 	fs = VFSTOUFS(mntp)->um_lfs;
85 	bsize = fs->lfs_bsize;
86 	error = 0;
87 
88 	lfs_seglock(fs);
89 	lfs_initseg(fs, sp);
90 	sp->seg_flags |= SEGM_CLEAN;
91 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
92 	    blkp = start; cnt--; ++blkp) {
93 		/*
94 		 * Get the IFILE entry (only once) and see if the file still
95 		 * exists.
96 		 */
97 		if (lastino != blkp->bi_inode) {
98 			if (lastino != LFS_UNUSED_INUM) {
99 				lfs_updatemeta(sp);
100 				lfs_writeinode(fs, sp, ip);
101 				vput(vp);
102 				if (sp->fip->fi_nblocks) {
103 					INC_FINFO(sp);
104 					sp->fip =
105 					(FINFO *) (&sp->fip->fi_blocks[sp->fip->fi_nblocks]);
106 				}
107 				sp->start_lbp = &sp->fip->fi_blocks[0];
108 				sp->fip->fi_version = blkp->bi_version;
109 				sp->fip->fi_nblocks = 0;
110 				sp->fip->fi_ino = blkp->bi_inode;
111 				sp->vp = NULL;
112 			}
113 			lastino = blkp->bi_inode;
114 			if (blkp->bi_inode == LFS_IFILE_INUM)
115 				v_daddr = fs->lfs_idaddr;
116 			else {
117 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
118 				v_daddr = ifp->if_daddr;
119 				brelse(bp);
120 			}
121 			if (v_daddr == LFS_UNUSED_DADDR)
122 				continue;
123 			/* Get the vnode/inode. */
124 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
125 			    blkp->bi_lbn == LFS_UNUSED_LBN ? NULL :
126 			    blkp->bi_bp)) {
127 #ifdef DIAGNOSTIC
128 				printf("lfs_markv: VFS_VGET failed (%d)\n",
129 				    blkp->bi_inode);
130 #endif
131 				lastino = LFS_UNUSED_INUM;
132 				v_daddr == LFS_UNUSED_DADDR;
133 				continue;
134 			}
135 			sp->vp = vp;
136 			ip = VTOI(vp);
137 		} else if (v_daddr == LFS_UNUSED_DADDR)
138 			continue;
139 
140 		/* If this BLOCK_INFO didn't contain a block, keep going. */
141 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
142 			continue;
143 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr) ||
144 		    b_daddr != blkp->bi_daddr)
145 			continue;
146 		/*
147 		 * If we got to here, then we are keeping the block.  If it
148 		 * is an indirect block, we want to actually put it in the
149 		 * buffer cache so that it can be updated in the finish_meta
150 		 * section.  If it's not, we need to allocate a fake buffer
151 		 * so that writeseg can perform the copyin and write the buffer.
152 		 */
153 		if (blkp->bi_lbn >= 0)	/* Data Block */
154 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
155 			    blkp->bi_bp);
156 		else {
157 			bp = getblk(vp, blkp->bi_lbn, bsize);
158 			if (!(bp->b_flags & B_CACHE) &&
159 			    (error = copyin(blkp->bi_bp, bp->b_un.b_addr,
160 			    bsize)))
161 				goto err2;
162 			if (error = VOP_BWRITE(bp))
163 				goto err2;
164 		}
165 		while (lfs_gatherblock(sp, bp, NULL));
166 	}
167 	if (sp->vp) {
168 		if (sp->fip->fi_nblocks)
169 			INC_FINFO(sp);
170 		lfs_updatemeta(sp);
171 		lfs_writeinode(fs, sp, ip);
172 		vput(vp);
173 	}
174 	(void) lfs_writeseg(fs, sp);
175 	lfs_segunlock(fs);
176 	free(start, M_SEGMENT);
177 	free(sp->bpp, M_SEGMENT);
178 	free(sp, M_SEGMENT);
179 	return (error);
180 /*
181  * XXX If we come in to error 2, we might have indirect blocks that were
182  * updated and now have bad block pointers.  I don't know what to do
183  * about this.
184  */
185 
186 err2:	vput(vp);
187 	/* Free up fakebuffers */
188 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
189 		if ((*bpp)->b_flags & B_CALL) {
190 			brelvp(*bpp);
191 			free(*bpp, M_SEGMENT);
192 		} else
193 			brelse(*bpp);
194 	lfs_segunlock(fs);
195 err1:
196 	free(sp->bpp, M_SEGMENT);
197 	free(sp, M_SEGMENT);
198 	free(start, M_SEGMENT);
199 	return(error);
200 }
201 
202 /*
203  * lfs_bmapv:
204  *
205  * This will fill in the current disk address for arrays of blocks.
206  *
207  *  0 on success
208  * -1/errno is return on error.
209  */
210 int
211 lfs_bmapv(p, uap, retval)
212 	struct proc *p;
213 	struct args {
214 		fsid_t fsid;		/* file system */
215 		BLOCK_INFO *blkiov;	/* block array */
216 		int blkcnt;		/* count of block array entries */
217 	} *uap;
218 	int *retval;
219 {
220 	BLOCK_INFO *blkp;
221 	struct mount *mntp;
222 	struct vnode *vp;
223 	void *start;
224 	daddr_t daddr;
225 	int cnt, error, step;
226 
227 	if (error = suser(p->p_ucred, &p->p_acflag))
228 		return (error);
229 	if ((mntp = getvfs(&uap->fsid)) == NULL)
230 		return (EINVAL);
231 
232 	cnt = uap->blkcnt;
233 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
234 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
235 		free(blkp, M_SEGMENT);
236 		return (error);
237 	}
238 
239 	for (step = cnt; step--; ++blkp) {
240 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
241 			continue;
242 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
243 			daddr = LFS_UNUSED_DADDR;
244 		else {
245 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr))
246 				daddr = LFS_UNUSED_DADDR;
247 			vput(vp);
248 		}
249 		blkp->bi_daddr = daddr;
250         }
251 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
252 	free(start, M_SEGMENT);
253 	return (0);
254 }
255 
256 /*
257  * lfs_segclean:
258  *
259  * Mark the segment clean.
260  *
261  *  0 on success
262  * -1/errno is return on error.
263  */
264 int
265 lfs_segclean(p, uap, retval)
266 	struct proc *p;
267 	struct args {
268 		fsid_t fsid;		/* file system */
269 		u_long segment;		/* segment number */
270 	} *uap;
271 	int *retval;
272 {
273 	CLEANERINFO *cip;
274 	SEGUSE *sup;
275 	struct buf *bp;
276 	struct mount *mntp;
277 	struct lfs *fs;
278 	int error;
279 
280 	if (error = suser(p->p_ucred, &p->p_acflag))
281 		return (error);
282 	if ((mntp = getvfs(&uap->fsid)) == NULL)
283 		return (EINVAL);
284 
285 	fs = VFSTOUFS(mntp)->um_lfs;
286 
287 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
288 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
289 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
290 	    sup->su_ninos * btodb(fs->lfs_bsize);
291 	sup->su_flags &= ~SEGUSE_DIRTY;
292 	(void) VOP_BWRITE(bp);
293 
294 	LFS_CLEANERINFO(cip, fs, bp);
295 	++cip->clean;
296 	--cip->dirty;
297 	(void) VOP_BWRITE(bp);
298 	wakeup(&fs->lfs_avail);
299 	return (0);
300 }
301 
302 /*
303  * lfs_segwait:
304  *
305  * This will block until a segment in file system fsid is written.  A timeout
306  * in milliseconds may be specified which will awake the cleaner automatically.
307  * An fsid of -1 means any file system, and a timeout of 0 means forever.
308  *
309  *  0 on success
310  *  1 on timeout
311  * -1/errno is return on error.
312  */
313 int
314 lfs_segwait(p, uap, retval)
315 	struct proc *p;
316 	struct args {
317 		fsid_t fsid;		/* file system */
318 		struct timeval *tv;	/* timeout */
319 	} *uap;
320 	int *retval;
321 {
322 	extern int lfs_allclean_wakeup;
323 	struct mount *mntp;
324 	struct timeval atv;
325 	void *addr;
326 	u_long timeout;
327 	int error, s;
328 
329 	if (error = suser(p->p_ucred, &p->p_acflag)) {
330 		return (error);
331 }
332 #ifdef WHEN_QUADS_WORK
333 	if (uap->fsid == (fsid_t)-1)
334 		addr = &lfs_allclean_wakeup;
335 	else {
336 		if ((mntp = getvfs(&uap->fsid)) == NULL)
337 			return (EINVAL);
338 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
339 	}
340 #else
341 	if ((mntp = getvfs(&uap->fsid)) == NULL)
342 		addr = &lfs_allclean_wakeup;
343 	else
344 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
345 #endif
346 
347 	if (uap->tv) {
348 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
349 			return (error);
350 		if (itimerfix(&atv))
351 			return (EINVAL);
352 		s = splclock();
353 		timevaladd(&atv, (struct timeval *)&time);
354 		timeout = hzto(&atv);
355 		splx(s);
356 	} else
357 		timeout = 0;
358 
359 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
360 	return (error == ERESTART ? EINTR : 0);
361 }
362 
363 /*
364  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
365  * daddr from the ifile, so don't look it up again.  If the cleaner is
366  * processing IINFO structures, it may have the ondisk inode already, so
367  * don't go retrieving it again.
368  */
369 int
370 lfs_fastvget(mp, ino, daddr, vpp, dinp)
371 	struct mount *mp;
372 	ino_t ino;
373 	daddr_t daddr;
374 	struct vnode **vpp;
375 	struct dinode *dinp;
376 {
377 	register struct inode *ip;
378 	struct vnode *vp;
379 	struct ufsmount *ump;
380 	struct buf *bp;
381 	dev_t dev;
382 	int error;
383 
384 	ump = VFSTOUFS(mp);
385 	dev = ump->um_dev;
386 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
387 		ip = VTOI(*vpp);
388 		ip->i_flag |= IMOD;
389 		return (0);
390 	}
391 
392 	/* Allocate new vnode/inode. */
393 	if (error = lfs_vcreate(mp, ino, &vp)) {
394 		*vpp = NULL;
395 		return (error);
396 	}
397 
398 	/*
399 	 * Put it onto its hash chain and lock it so that other requests for
400 	 * this inode will block if they arrive while we are sleeping waiting
401 	 * for old data structures to be purged or for the contents of the
402 	 * disk portion of this inode to be read.
403 	 */
404 	ip = VTOI(vp);
405 	ufs_ihashins(ip);
406 
407 	/*
408 	 * XXX
409 	 * This may not need to be here, logically it should go down with
410 	 * the i_devvp initialization.
411 	 * Ask Kirk.
412 	 */
413 	ip->i_lfs = ump->um_lfs;
414 
415 	/* Read in the disk contents for the inode, copy into the inode. */
416 	if (dinp)
417 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
418 			return (error);
419 	else {
420 		if (error = bread(ump->um_devvp, daddr,
421 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
422 			/*
423 			 * The inode does not contain anything useful, so it
424 			 * would be misleading to leave it on its hash chain.
425 			 * Iput() will return it to the free list.
426 			 */
427 			ufs_ihashrem(ip);
428 
429 			/* Unlock and discard unneeded inode. */
430 			ufs_iput(ip);
431 			brelse(bp);
432 			*vpp = NULL;
433 			return (error);
434 		}
435 		ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino);
436 		brelse(bp);
437 	}
438 
439 	/* Inode was just read from user space or disk, make sure it's locked */
440 	ip->i_flag |= ILOCKED;
441 
442 	/*
443 	 * Initialize the vnode from the inode, check for aliases.  In all
444 	 * cases re-init ip, the underlying vnode/inode may have changed.
445 	 */
446 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
447 		ufs_iput(ip);
448 		*vpp = NULL;
449 		return (error);
450 	}
451 	/*
452 	 * Finish inode initialization now that aliasing has been resolved.
453 	 */
454 	ip->i_devvp = ump->um_devvp;
455 	ip->i_flag |= IMOD;
456 	++ump->um_lfs->lfs_uinodes;
457 	VREF(ip->i_devvp);
458 	*vpp = vp;
459 	return (0);
460 }
461 struct buf *
462 lfs_fakebuf(vp, lbn, size, uaddr)
463 	struct vnode *vp;
464 	int lbn;
465 	size_t size;
466 	caddr_t uaddr;
467 {
468 	struct buf *bp;
469 
470 	bp = lfs_newbuf(vp, lbn, 0);
471 	bp->b_saveaddr = uaddr;
472 	bp->b_bufsize = size;
473 	bp->b_bcount = size;
474 	bp->b_flags |= B_INVAL;
475 	return(bp);
476 }
477