xref: /original-bsd/sys/ufs/lfs/lfs_syscalls.c (revision 95ecee29)
1 /*-
2  * Copyright (c) 1991, 1993
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_syscalls.c	8.3 (Berkeley) 09/23/93
8  */
9 
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/mount.h>
14 #include <sys/vnode.h>
15 #include <sys/malloc.h>
16 #include <sys/kernel.h>
17 
18 #include <ufs/ufs/quota.h>
19 #include <ufs/ufs/inode.h>
20 #include <ufs/ufs/ufsmount.h>
21 #include <ufs/ufs/ufs_extern.h>
22 
23 #include <ufs/lfs/lfs.h>
24 #include <ufs/lfs/lfs_extern.h>
25 #define BUMP_FIP(SP) \
26 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
27 
28 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
29 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
30 
31 /*
32  * Before committing to add something to a segment summary, make sure there
33  * is enough room.  S is the bytes added to the summary.
34  */
35 #define	CHECK_SEG(s)			\
36 if (sp->sum_bytes_left < (s)) {		\
37 	(void) lfs_writeseg(fs, sp);	\
38 }
39 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
40 
41 /*
42  * lfs_markv:
43  *
44  * This will mark inodes and blocks dirty, so they are written into the log.
45  * It will block until all the blocks have been written.  The segment create
46  * time passed in the block_info and inode_info structures is used to decide
47  * if the data is valid for each block (in case some process dirtied a block
48  * or inode that is being cleaned between the determination that a block is
49  * live and the lfs_markv call).
50  *
51  *  0 on success
52  * -1/errno is return on error.
53  */
54 struct lfs_markv_args {
55 	fsid_t fsid;		/* file system */
56 	BLOCK_INFO *blkiov;	/* block array */
57 	int blkcnt;		/* count of block array entries */
58 };
59 int
60 lfs_markv(p, uap, retval)
61 	struct proc *p;
62 	struct lfs_markv_args *uap;
63 	int *retval;
64 {
65 	struct segment *sp;
66 	BLOCK_INFO *blkp;
67 	IFILE *ifp;
68 	struct buf *bp, **bpp;
69 	struct inode *ip;
70 	struct lfs *fs;
71 	struct mount *mntp;
72 	struct vnode *vp;
73 	void *start;
74 	ino_t lastino;
75 	daddr_t b_daddr, v_daddr;
76 	u_long bsize;
77 	int cnt, error;
78 
79 	if (error = suser(p->p_ucred, &p->p_acflag))
80 		return (error);
81 	if ((mntp = getvfs(&uap->fsid)) == NULL)
82 		return (EINVAL);
83 
84 	cnt = uap->blkcnt;
85 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
86 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
87 		goto err1;
88 
89 	/* Mark blocks/inodes dirty.  */
90 	fs = VFSTOUFS(mntp)->um_lfs;
91 	bsize = fs->lfs_bsize;
92 	error = 0;
93 
94 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
95 	sp = fs->lfs_sp;
96 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
97 	    blkp = start; cnt--; ++blkp) {
98 		/*
99 		 * Get the IFILE entry (only once) and see if the file still
100 		 * exists.
101 		 */
102 		if (lastino != blkp->bi_inode) {
103 			if (lastino != LFS_UNUSED_INUM) {
104 				/* Finish up last file */
105 				lfs_updatemeta(sp);
106 				lfs_writeinode(fs, sp, ip);
107 				lfs_vunref(vp);
108 				if (sp->fip->fi_nblocks)
109 					BUMP_FIP(sp);
110 				else  {
111 					DEC_FINFO(sp);
112 					sp->sum_bytes_left +=
113 						sizeof(FINFO) - sizeof(daddr_t);
114 
115 				}
116 			}
117 
118 			/* Start a new file */
119 			CHECK_SEG(sizeof(FINFO));
120 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
121 			INC_FINFO(sp);
122 			sp->start_lbp = &sp->fip->fi_blocks[0];
123 			sp->vp = NULL;
124 			sp->fip->fi_version = blkp->bi_version;
125 			sp->fip->fi_nblocks = 0;
126 			sp->fip->fi_ino = blkp->bi_inode;
127 			lastino = blkp->bi_inode;
128 			if (blkp->bi_inode == LFS_IFILE_INUM)
129 				v_daddr = fs->lfs_idaddr;
130 			else {
131 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
132 				v_daddr = ifp->if_daddr;
133 				brelse(bp);
134 			}
135 			if (v_daddr == LFS_UNUSED_DADDR)
136 				continue;
137 
138 			/* Get the vnode/inode. */
139 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
140 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
141 			    blkp->bi_bp : NULL)) {
142 #ifdef DIAGNOSTIC
143 				printf("lfs_markv: VFS_VGET failed (%d)\n",
144 				    blkp->bi_inode);
145 #endif
146 				lastino = LFS_UNUSED_INUM;
147 				v_daddr = LFS_UNUSED_DADDR;
148 				continue;
149 			}
150 			sp->vp = vp;
151 			ip = VTOI(vp);
152 		} else if (v_daddr == LFS_UNUSED_DADDR)
153 			continue;
154 
155 		/* If this BLOCK_INFO didn't contain a block, keep going. */
156 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
157 			continue;
158 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
159 		    b_daddr != blkp->bi_daddr)
160 			continue;
161 		/*
162 		 * If we got to here, then we are keeping the block.  If it
163 		 * is an indirect block, we want to actually put it in the
164 		 * buffer cache so that it can be updated in the finish_meta
165 		 * section.  If it's not, we need to allocate a fake buffer
166 		 * so that writeseg can perform the copyin and write the buffer.
167 		 */
168 		if (blkp->bi_lbn >= 0)	/* Data Block */
169 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
170 			    blkp->bi_bp);
171 		else {
172 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
173 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
174 			    (error = copyin(blkp->bi_bp, bp->b_data,
175 			    bsize)))
176 				goto err2;
177 			if (error = VOP_BWRITE(bp))
178 				goto err2;
179 		}
180 		while (lfs_gatherblock(sp, bp, NULL));
181 	}
182 	if (sp->vp) {
183 		lfs_updatemeta(sp);
184 		lfs_writeinode(fs, sp, ip);
185 		lfs_vunref(vp);
186 		if (!sp->fip->fi_nblocks) {
187 			DEC_FINFO(sp);
188 			sp->sum_bytes_left += sizeof(FINFO) - sizeof(daddr_t);
189 		}
190 	}
191 	(void) lfs_writeseg(fs, sp);
192 	lfs_segunlock(fs);
193 	free(start, M_SEGMENT);
194 	return (error);
195 /*
196  * XXX If we come in to error 2, we might have indirect blocks that were
197  * updated and now have bad block pointers.  I don't know what to do
198  * about this.
199  */
200 
201 err2:	lfs_vunref(vp);
202 	/* Free up fakebuffers */
203 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
204 		if ((*bpp)->b_flags & B_CALL) {
205 			brelvp(*bpp);
206 			free(*bpp, M_SEGMENT);
207 		} else
208 			brelse(*bpp);
209 	lfs_segunlock(fs);
210 err1:
211 	free(start, M_SEGMENT);
212 	return(error);
213 }
214 
215 /*
216  * lfs_bmapv:
217  *
218  * This will fill in the current disk address for arrays of blocks.
219  *
220  *  0 on success
221  * -1/errno is return on error.
222  */
223 struct lfs_bmapv_args {
224 	fsid_t fsid;		/* file system */
225 	BLOCK_INFO *blkiov;	/* block array */
226 	int blkcnt;		/* count of block array entries */
227 };
228 int
229 lfs_bmapv(p, uap, retval)
230 	struct proc *p;
231 	struct lfs_bmapv_args *uap;
232 	int *retval;
233 {
234 	BLOCK_INFO *blkp;
235 	struct mount *mntp;
236 	struct vnode *vp;
237 	void *start;
238 	daddr_t daddr;
239 	int cnt, error, step;
240 
241 	if (error = suser(p->p_ucred, &p->p_acflag))
242 		return (error);
243 	if ((mntp = getvfs(&uap->fsid)) == NULL)
244 		return (EINVAL);
245 
246 	cnt = uap->blkcnt;
247 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
248 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
249 		free(blkp, M_SEGMENT);
250 		return (error);
251 	}
252 
253 	for (step = cnt; step--; ++blkp) {
254 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
255 			continue;
256 		/* Could be a deadlock ? */
257 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
258 			daddr = LFS_UNUSED_DADDR;
259 		else {
260 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
261 				daddr = LFS_UNUSED_DADDR;
262 			vput(vp);
263 		}
264 		blkp->bi_daddr = daddr;
265         }
266 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
267 	free(start, M_SEGMENT);
268 	return (0);
269 }
270 
271 /*
272  * lfs_segclean:
273  *
274  * Mark the segment clean.
275  *
276  *  0 on success
277  * -1/errno is return on error.
278  */
279 struct lfs_segclean_args {
280 	fsid_t fsid;		/* file system */
281 	u_long segment;		/* segment number */
282 };
283 int
284 lfs_segclean(p, uap, retval)
285 	struct proc *p;
286 	struct lfs_segclean_args *uap;
287 	int *retval;
288 {
289 	CLEANERINFO *cip;
290 	SEGUSE *sup;
291 	struct buf *bp;
292 	struct mount *mntp;
293 	struct lfs *fs;
294 	int error;
295 
296 	if (error = suser(p->p_ucred, &p->p_acflag))
297 		return (error);
298 	if ((mntp = getvfs(&uap->fsid)) == NULL)
299 		return (EINVAL);
300 
301 	fs = VFSTOUFS(mntp)->um_lfs;
302 
303 	if (datosn(fs, fs->lfs_curseg) == uap->segment)
304 		return (EBUSY);
305 
306 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
307 	if (sup->su_flags & SEGUSE_ACTIVE) {
308 		brelse(bp);
309 		return(EBUSY);
310 	}
311 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
312 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
313 	    sup->su_ninos * btodb(fs->lfs_bsize);
314 	sup->su_flags &= ~SEGUSE_DIRTY;
315 	(void) VOP_BWRITE(bp);
316 
317 	LFS_CLEANERINFO(cip, fs, bp);
318 	++cip->clean;
319 	--cip->dirty;
320 	(void) VOP_BWRITE(bp);
321 	wakeup(&fs->lfs_avail);
322 	return (0);
323 }
324 
325 /*
326  * lfs_segwait:
327  *
328  * This will block until a segment in file system fsid is written.  A timeout
329  * in milliseconds may be specified which will awake the cleaner automatically.
330  * An fsid of -1 means any file system, and a timeout of 0 means forever.
331  *
332  *  0 on success
333  *  1 on timeout
334  * -1/errno is return on error.
335  */
336 struct lfs_segwait_args {
337 	fsid_t fsid;		/* file system */
338 	struct timeval *tv;	/* timeout */
339 };
340 int
341 lfs_segwait(p, uap, retval)
342 	struct proc *p;
343 	struct lfs_segwait_args *uap;
344 	int *retval;
345 {
346 	extern int lfs_allclean_wakeup;
347 	struct mount *mntp;
348 	struct timeval atv;
349 	void *addr;
350 	u_long timeout;
351 	int error, s;
352 
353 	if (error = suser(p->p_ucred, &p->p_acflag)) {
354 		return (error);
355 }
356 #ifdef WHEN_QUADS_WORK
357 	if (uap->fsid == (fsid_t)-1)
358 		addr = &lfs_allclean_wakeup;
359 	else {
360 		if ((mntp = getvfs(&uap->fsid)) == NULL)
361 			return (EINVAL);
362 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
363 	}
364 #else
365 	if ((mntp = getvfs(&uap->fsid)) == NULL)
366 		addr = &lfs_allclean_wakeup;
367 	else
368 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
369 #endif
370 
371 	if (uap->tv) {
372 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
373 			return (error);
374 		if (itimerfix(&atv))
375 			return (EINVAL);
376 		s = splclock();
377 		timevaladd(&atv, (struct timeval *)&time);
378 		timeout = hzto(&atv);
379 		splx(s);
380 	} else
381 		timeout = 0;
382 
383 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
384 	return (error == ERESTART ? EINTR : 0);
385 }
386 
387 /*
388  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
389  * daddr from the ifile, so don't look it up again.  If the cleaner is
390  * processing IINFO structures, it may have the ondisk inode already, so
391  * don't go retrieving it again.
392  */
393 int
394 lfs_fastvget(mp, ino, daddr, vpp, dinp)
395 	struct mount *mp;
396 	ino_t ino;
397 	daddr_t daddr;
398 	struct vnode **vpp;
399 	struct dinode *dinp;
400 {
401 	register struct inode *ip;
402 	struct vnode *vp;
403 	struct ufsmount *ump;
404 	struct buf *bp;
405 	dev_t dev;
406 	int error;
407 
408 	ump = VFSTOUFS(mp);
409 	dev = ump->um_dev;
410 	/*
411 	 * This is playing fast and loose.  Someone may have the inode
412 	 * locked, in which case they are going to be distinctly unhappy
413 	 * if we trash something.
414 	 */
415 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
416 		lfs_vref(*vpp);
417 		if ((*vpp)->v_flag & VXLOCK)
418 			printf ("Cleaned vnode VXLOCKED\n");
419 		ip = VTOI(*vpp);
420 		if (ip->i_flags & IN_LOCKED)
421 			printf("cleaned vnode locked\n");
422 		if (!(ip->i_flag & IN_MODIFIED)) {
423 			++ump->um_lfs->lfs_uinodes;
424 			ip->i_flag |= IN_MODIFIED;
425 		}
426 		ip->i_flag |= IN_MODIFIED;
427 		return (0);
428 	}
429 
430 	/* Allocate new vnode/inode. */
431 	if (error = lfs_vcreate(mp, ino, &vp)) {
432 		*vpp = NULL;
433 		return (error);
434 	}
435 
436 	/*
437 	 * Put it onto its hash chain and lock it so that other requests for
438 	 * this inode will block if they arrive while we are sleeping waiting
439 	 * for old data structures to be purged or for the contents of the
440 	 * disk portion of this inode to be read.
441 	 */
442 	ip = VTOI(vp);
443 	ufs_ihashins(ip);
444 
445 	/*
446 	 * XXX
447 	 * This may not need to be here, logically it should go down with
448 	 * the i_devvp initialization.
449 	 * Ask Kirk.
450 	 */
451 	ip->i_lfs = ump->um_lfs;
452 
453 	/* Read in the disk contents for the inode, copy into the inode. */
454 	if (dinp)
455 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
456 			return (error);
457 	else {
458 		if (error = bread(ump->um_devvp, daddr,
459 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
460 			/*
461 			 * The inode does not contain anything useful, so it
462 			 * would be misleading to leave it on its hash chain.
463 			 * Iput() will return it to the free list.
464 			 */
465 			ufs_ihashrem(ip);
466 
467 			/* Unlock and discard unneeded inode. */
468 			lfs_vunref(vp);
469 			brelse(bp);
470 			*vpp = NULL;
471 			return (error);
472 		}
473 		ip->i_din =
474 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
475 		brelse(bp);
476 	}
477 
478 	/* Inode was just read from user space or disk, make sure it's locked */
479 	ip->i_flag |= IN_LOCKED;
480 
481 	/*
482 	 * Initialize the vnode from the inode, check for aliases.  In all
483 	 * cases re-init ip, the underlying vnode/inode may have changed.
484 	 */
485 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
486 		lfs_vunref(vp);
487 		*vpp = NULL;
488 		return (error);
489 	}
490 	/*
491 	 * Finish inode initialization now that aliasing has been resolved.
492 	 */
493 	ip->i_devvp = ump->um_devvp;
494 	ip->i_flag |= IN_MODIFIED;
495 	++ump->um_lfs->lfs_uinodes;
496 	VREF(ip->i_devvp);
497 	*vpp = vp;
498 	return (0);
499 }
500 struct buf *
501 lfs_fakebuf(vp, lbn, size, uaddr)
502 	struct vnode *vp;
503 	int lbn;
504 	size_t size;
505 	caddr_t uaddr;
506 {
507 	struct buf *bp;
508 
509 	bp = lfs_newbuf(vp, lbn, 0);
510 	bp->b_saveaddr = uaddr;
511 	bp->b_bufsize = size;
512 	bp->b_bcount = size;
513 	bp->b_flags |= B_INVAL;
514 	return(bp);
515 }
516