xref: /original-bsd/sys/ufs/lfs/lfs_syscalls.c (revision b3c06cab)
1 /*-
2  * Copyright (c) 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_syscalls.c	8.9 (Berkeley) 05/08/95
8  */
9 
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/mount.h>
14 #include <sys/vnode.h>
15 #include <sys/malloc.h>
16 #include <sys/kernel.h>
17 
18 #include <ufs/ufs/quota.h>
19 #include <ufs/ufs/inode.h>
20 #include <ufs/ufs/ufsmount.h>
21 #include <ufs/ufs/ufs_extern.h>
22 
23 #include <ufs/lfs/lfs.h>
24 #include <ufs/lfs/lfs_extern.h>
25 #define BUMP_FIP(SP) \
26 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
27 
28 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
29 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
30 
31 /*
32  * Before committing to add something to a segment summary, make sure there
33  * is enough room.  S is the bytes added to the summary.
34  */
35 #define	CHECK_SEG(s)			\
36 if (sp->sum_bytes_left < (s)) {		\
37 	(void) lfs_writeseg(fs, sp);	\
38 }
39 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
40 
41 int debug_cleaner = 0;
42 int clean_vnlocked = 0;
43 int clean_inlocked = 0;
44 
45 /*
46  * lfs_markv:
47  *
48  * This will mark inodes and blocks dirty, so they are written into the log.
49  * It will block until all the blocks have been written.  The segment create
50  * time passed in the block_info and inode_info structures is used to decide
51  * if the data is valid for each block (in case some process dirtied a block
52  * or inode that is being cleaned between the determination that a block is
53  * live and the lfs_markv call).
54  *
55  *  0 on success
56  * -1/errno is return on error.
57  */
58 struct lfs_markv_args {
59 	fsid_t *fsidp;		/* file system */
60 	BLOCK_INFO *blkiov;	/* block array */
61 	int blkcnt;		/* count of block array entries */
62 };
63 int
64 lfs_markv(p, uap, retval)
65 	struct proc *p;
66 	struct lfs_markv_args *uap;
67 	int *retval;
68 {
69 	struct segment *sp;
70 	BLOCK_INFO *blkp;
71 	IFILE *ifp;
72 	struct buf *bp, **bpp;
73 	struct inode *ip;
74 	struct lfs *fs;
75 	struct mount *mntp;
76 	struct vnode *vp;
77 	fsid_t fsid;
78 	void *start;
79 	ino_t lastino;
80 	ufs_daddr_t b_daddr, v_daddr;
81 	u_long bsize;
82 	int cnt, error;
83 
84 	if (error = suser(p->p_ucred, &p->p_acflag))
85 		return (error);
86 
87 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
88 		return (error);
89 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
90 		return (EINVAL);
91 
92 	cnt = uap->blkcnt;
93 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
94 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
95 		goto err1;
96 
97 	/* Mark blocks/inodes dirty.  */
98 	fs = VFSTOUFS(mntp)->um_lfs;
99 	bsize = fs->lfs_bsize;
100 	error = 0;
101 
102 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
103 	sp = fs->lfs_sp;
104 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
105 	    blkp = start; cnt--; ++blkp) {
106 		/*
107 		 * Get the IFILE entry (only once) and see if the file still
108 		 * exists.
109 		 */
110 		if (lastino != blkp->bi_inode) {
111 			if (lastino != LFS_UNUSED_INUM) {
112 				/* Finish up last file */
113 				if (sp->fip->fi_nblocks == 0) {
114 					DEC_FINFO(sp);
115 					sp->sum_bytes_left +=
116 					    sizeof(FINFO) - sizeof(ufs_daddr_t);
117 				} else {
118 					lfs_updatemeta(sp);
119 					BUMP_FIP(sp);
120 				}
121 
122 				lfs_writeinode(fs, sp, ip);
123 				lfs_vunref(vp);
124 			}
125 
126 			/* Start a new file */
127 			CHECK_SEG(sizeof(FINFO));
128 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(ufs_daddr_t);
129 			INC_FINFO(sp);
130 			sp->start_lbp = &sp->fip->fi_blocks[0];
131 			sp->vp = NULL;
132 			sp->fip->fi_version = blkp->bi_version;
133 			sp->fip->fi_nblocks = 0;
134 			sp->fip->fi_ino = blkp->bi_inode;
135 			lastino = blkp->bi_inode;
136 			if (blkp->bi_inode == LFS_IFILE_INUM)
137 				v_daddr = fs->lfs_idaddr;
138 			else {
139 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
140 				v_daddr = ifp->if_daddr;
141 				brelse(bp);
142 			}
143 			if (v_daddr == LFS_UNUSED_DADDR)
144 				continue;
145 
146 			/* Get the vnode/inode. */
147 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
148 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
149 			    blkp->bi_bp : NULL)) {
150 #ifdef DIAGNOSTIC
151 				printf("lfs_markv: VFS_VGET failed (%d)\n",
152 				    blkp->bi_inode);
153 				panic("lfs_markv VFS_VGET FAILED");
154 #endif
155 				lastino = LFS_UNUSED_INUM;
156 				v_daddr = LFS_UNUSED_DADDR;
157 				continue;
158 			}
159 			sp->vp = vp;
160 			ip = VTOI(vp);
161 		} else if (v_daddr == LFS_UNUSED_DADDR)
162 			continue;
163 
164 		/* If this BLOCK_INFO didn't contain a block, keep going. */
165 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
166 			continue;
167 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
168 		    b_daddr != blkp->bi_daddr)
169 			continue;
170 		/*
171 		 * If we got to here, then we are keeping the block.  If it
172 		 * is an indirect block, we want to actually put it in the
173 		 * buffer cache so that it can be updated in the finish_meta
174 		 * section.  If it's not, we need to allocate a fake buffer
175 		 * so that writeseg can perform the copyin and write the buffer.
176 		 */
177 		if (blkp->bi_lbn >= 0)	/* Data Block */
178 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
179 			    blkp->bi_bp);
180 		else {
181 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
182 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
183 			    (error = copyin(blkp->bi_bp, bp->b_data,
184 			    blkp->bi_size)))
185 				goto err2;
186 			if (error = VOP_BWRITE(bp))
187 				goto err2;
188 		}
189 		while (lfs_gatherblock(sp, bp, NULL));
190 	}
191 	if (sp->vp) {
192 		if (sp->fip->fi_nblocks == 0) {
193 			DEC_FINFO(sp);
194 			sp->sum_bytes_left +=
195 			    sizeof(FINFO) - sizeof(ufs_daddr_t);
196 		} else
197 			lfs_updatemeta(sp);
198 
199 		lfs_writeinode(fs, sp, ip);
200 		lfs_vunref(vp);
201 	}
202 	(void) lfs_writeseg(fs, sp);
203 	lfs_segunlock(fs);
204 	free(start, M_SEGMENT);
205 	return (error);
206 
207 /*
208  * XXX
209  * If we come in to error 2, we might have indirect blocks that were
210  * updated and now have bad block pointers.  I don't know what to do
211  * about this.
212  */
213 
214 err2:	lfs_vunref(vp);
215 	/* Free up fakebuffers */
216 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
217 		if ((*bpp)->b_flags & B_CALL) {
218 			brelvp(*bpp);
219 			free(*bpp, M_SEGMENT);
220 		} else
221 			brelse(*bpp);
222 	lfs_segunlock(fs);
223 err1:
224 	free(start, M_SEGMENT);
225 	return (error);
226 }
227 
228 /*
229  * lfs_bmapv:
230  *
231  * This will fill in the current disk address for arrays of blocks.
232  *
233  *  0 on success
234  * -1/errno is return on error.
235  */
236 struct lfs_bmapv_args {
237 	fsid_t *fsidp;		/* file system */
238 	BLOCK_INFO *blkiov;	/* block array */
239 	int blkcnt;		/* count of block array entries */
240 };
241 int
242 lfs_bmapv(p, uap, retval)
243 	struct proc *p;
244 	struct lfs_bmapv_args *uap;
245 	int *retval;
246 {
247 	BLOCK_INFO *blkp;
248 	struct mount *mntp;
249 	struct ufsmount *ump;
250 	struct vnode *vp;
251 	fsid_t fsid;
252 	void *start;
253 	ufs_daddr_t daddr;
254 	int cnt, error, step;
255 
256 	if (error = suser(p->p_ucred, &p->p_acflag))
257 		return (error);
258 
259 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
260 		return (error);
261 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
262 		return (EINVAL);
263 
264 	cnt = uap->blkcnt;
265 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
266 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
267 		free(blkp, M_SEGMENT);
268 		return (error);
269 	}
270 
271 	for (step = cnt; step--; ++blkp) {
272 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
273 			continue;
274 		/*
275 		 * A regular call to VFS_VGET could deadlock
276 		 * here.  Instead, we try an unlocked access.
277 		 */
278 		ump = VFSTOUFS(mntp);
279 		if ((vp =
280 		    ufs_ihashlookup(ump->um_dev, blkp->bi_inode)) != NULL) {
281 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
282 				daddr = LFS_UNUSED_DADDR;
283 		} else if (VFS_VGET(mntp, blkp->bi_inode, &vp))
284 			daddr = LFS_UNUSED_DADDR;
285 		else  {
286 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
287 				daddr = LFS_UNUSED_DADDR;
288 			vput(vp);
289 		}
290 		blkp->bi_daddr = daddr;
291         }
292 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
293 	free(start, M_SEGMENT);
294 	return (0);
295 }
296 
297 /*
298  * lfs_segclean:
299  *
300  * Mark the segment clean.
301  *
302  *  0 on success
303  * -1/errno is return on error.
304  */
305 struct lfs_segclean_args {
306 	fsid_t *fsidp;		/* file system */
307 	u_long segment;		/* segment number */
308 };
309 int
310 lfs_segclean(p, uap, retval)
311 	struct proc *p;
312 	struct lfs_segclean_args *uap;
313 	int *retval;
314 {
315 	CLEANERINFO *cip;
316 	SEGUSE *sup;
317 	struct buf *bp;
318 	struct mount *mntp;
319 	struct lfs *fs;
320 	fsid_t fsid;
321 	int error;
322 
323 	if (error = suser(p->p_ucred, &p->p_acflag))
324 		return (error);
325 
326 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
327 		return (error);
328 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
329 		return (EINVAL);
330 
331 	fs = VFSTOUFS(mntp)->um_lfs;
332 
333 	if (datosn(fs, fs->lfs_curseg) == uap->segment)
334 		return (EBUSY);
335 
336 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
337 	if (sup->su_flags & SEGUSE_ACTIVE) {
338 		brelse(bp);
339 		return (EBUSY);
340 	}
341 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
342 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
343 	    sup->su_ninos * btodb(fs->lfs_bsize);
344 	sup->su_flags &= ~SEGUSE_DIRTY;
345 	(void) VOP_BWRITE(bp);
346 
347 	LFS_CLEANERINFO(cip, fs, bp);
348 	++cip->clean;
349 	--cip->dirty;
350 	(void) VOP_BWRITE(bp);
351 	wakeup(&fs->lfs_avail);
352 	return (0);
353 }
354 
355 /*
356  * lfs_segwait:
357  *
358  * This will block until a segment in file system fsid is written.  A timeout
359  * in milliseconds may be specified which will awake the cleaner automatically.
360  * An fsid of -1 means any file system, and a timeout of 0 means forever.
361  *
362  *  0 on success
363  *  1 on timeout
364  * -1/errno is return on error.
365  */
366 struct lfs_segwait_args {
367 	fsid_t *fsidp;		/* file system */
368 	struct timeval *tv;	/* timeout */
369 };
370 int
371 lfs_segwait(p, uap, retval)
372 	struct proc *p;
373 	struct lfs_segwait_args *uap;
374 	int *retval;
375 {
376 	extern int lfs_allclean_wakeup;
377 	struct mount *mntp;
378 	struct timeval atv;
379 	fsid_t fsid;
380 	void *addr;
381 	u_long timeout;
382 	int error, s;
383 
384 	if (error = suser(p->p_ucred, &p->p_acflag)) {
385 		return (error);
386 }
387 #ifdef WHEN_QUADS_WORK
388 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
389 		return (error);
390 	if (fsid == (fsid_t)-1)
391 		addr = &lfs_allclean_wakeup;
392 	else {
393 		if ((mntp = vfs_getvfs(&fsid)) == NULL)
394 			return (EINVAL);
395 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
396 	}
397 #else
398 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
399 		return (error);
400 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
401 		addr = &lfs_allclean_wakeup;
402 	else
403 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
404 #endif
405 
406 	if (uap->tv) {
407 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
408 			return (error);
409 		if (itimerfix(&atv))
410 			return (EINVAL);
411 		s = splclock();
412 		timevaladd(&atv, (struct timeval *)&time);
413 		timeout = hzto(&atv);
414 		splx(s);
415 	} else
416 		timeout = 0;
417 
418 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
419 	return (error == ERESTART ? EINTR : 0);
420 }
421 
422 /*
423  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
424  * daddr from the ifile, so don't look it up again.  If the cleaner is
425  * processing IINFO structures, it may have the ondisk inode already, so
426  * don't go retrieving it again.
427  */
428 int
429 lfs_fastvget(mp, ino, daddr, vpp, dinp)
430 	struct mount *mp;
431 	ino_t ino;
432 	ufs_daddr_t daddr;
433 	struct vnode **vpp;
434 	struct dinode *dinp;
435 {
436 	register struct inode *ip;
437 	struct vnode *vp;
438 	struct ufsmount *ump;
439 	struct buf *bp;
440 	dev_t dev;
441 	int error;
442 
443 	ump = VFSTOUFS(mp);
444 	dev = ump->um_dev;
445 	/*
446 	 * This is playing fast and loose.  Someone may have the inode
447 	 * locked, in which case they are going to be distinctly unhappy
448 	 * if we trash something.
449 	 */
450 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
451 		lfs_vref(*vpp);
452 		if ((*vpp)->v_flag & VXLOCK)
453 			clean_vnlocked++;
454 		ip = VTOI(*vpp);
455 		if (ip->i_flag & IN_LOCKED)
456 			clean_inlocked++;
457 		if (!(ip->i_flag & IN_MODIFIED))
458 			++ump->um_lfs->lfs_uinodes;
459 		ip->i_flag |= IN_MODIFIED;
460 		return (0);
461 	}
462 
463 	/* Allocate new vnode/inode. */
464 	if (error = lfs_vcreate(mp, ino, &vp)) {
465 		*vpp = NULL;
466 		return (error);
467 	}
468 
469 	/*
470 	 * Put it onto its hash chain and lock it so that other requests for
471 	 * this inode will block if they arrive while we are sleeping waiting
472 	 * for old data structures to be purged or for the contents of the
473 	 * disk portion of this inode to be read.
474 	 */
475 	ip = VTOI(vp);
476 	ufs_ihashins(ip);
477 
478 	/*
479 	 * XXX
480 	 * This may not need to be here, logically it should go down with
481 	 * the i_devvp initialization.
482 	 * Ask Kirk.
483 	 */
484 	ip->i_lfs = ump->um_lfs;
485 
486 	/* Read in the disk contents for the inode, copy into the inode. */
487 	if (dinp)
488 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
489 			return (error);
490 	else {
491 		if (error = bread(ump->um_devvp, daddr,
492 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
493 			/*
494 			 * The inode does not contain anything useful, so it
495 			 * would be misleading to leave it on its hash chain.
496 			 * Iput() will return it to the free list.
497 			 */
498 			ufs_ihashrem(ip);
499 
500 			/* Unlock and discard unneeded inode. */
501 			lfs_vunref(vp);
502 			brelse(bp);
503 			*vpp = NULL;
504 			return (error);
505 		}
506 		ip->i_din =
507 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
508 		brelse(bp);
509 	}
510 
511 	/* Inode was just read from user space or disk, make sure it's locked */
512 	ip->i_flag |= IN_LOCKED;
513 
514 	/*
515 	 * Initialize the vnode from the inode, check for aliases.  In all
516 	 * cases re-init ip, the underlying vnode/inode may have changed.
517 	 */
518 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
519 		lfs_vunref(vp);
520 		*vpp = NULL;
521 		return (error);
522 	}
523 	/*
524 	 * Finish inode initialization now that aliasing has been resolved.
525 	 */
526 	ip->i_devvp = ump->um_devvp;
527 	ip->i_flag |= IN_MODIFIED;
528 	++ump->um_lfs->lfs_uinodes;
529 	VREF(ip->i_devvp);
530 	*vpp = vp;
531 	return (0);
532 }
533 struct buf *
534 lfs_fakebuf(vp, lbn, size, uaddr)
535 	struct vnode *vp;
536 	int lbn;
537 	size_t size;
538 	caddr_t uaddr;
539 {
540 	struct buf *bp;
541 
542 	bp = lfs_newbuf(vp, lbn, 0);
543 	bp->b_saveaddr = uaddr;
544 	bp->b_bufsize = size;
545 	bp->b_bcount = size;
546 	bp->b_flags |= B_INVAL;
547 	return (bp);
548 }
549