xref: /original-bsd/sys/ufs/lfs/lfs_syscalls.c (revision a6d8c59f)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_syscalls.c	7.28 (Berkeley) 12/10/92
8  */
9 
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/mount.h>
14 #include <sys/vnode.h>
15 #include <sys/malloc.h>
16 #include <sys/kernel.h>
17 
18 #include <ufs/ufs/quota.h>
19 #include <ufs/ufs/inode.h>
20 #include <ufs/ufs/ufsmount.h>
21 #include <ufs/ufs/ufs_extern.h>
22 
23 #include <ufs/lfs/lfs.h>
24 #include <ufs/lfs/lfs_extern.h>
25 #define BUMP_FIP(SP) \
26 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
27 
28 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
29 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
30 
31 /*
32  * Before committing to add something to a segment summary, make sure there
33  * is enough room.  S is the bytes added to the summary.
34  */
35 #define	CHECK_SEG(s)			\
36 if (sp->sum_bytes_left < (s)) {		\
37 	(void) lfs_writeseg(fs, sp);	\
38 }
39 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
40 
41 /*
42  * lfs_markv:
43  *
44  * This will mark inodes and blocks dirty, so they are written into the log.
45  * It will block until all the blocks have been written.  The segment create
46  * time passed in the block_info and inode_info structures is used to decide
47  * if the data is valid for each block (in case some process dirtied a block
48  * or inode that is being cleaned between the determination that a block is
49  * live and the lfs_markv call).
50  *
51  *  0 on success
52  * -1/errno is return on error.
53  */
54 int
55 lfs_markv(p, uap, retval)
56 	struct proc *p;
57 	struct args {
58 		fsid_t fsid;		/* file system */
59 		BLOCK_INFO *blkiov;	/* block array */
60 		int blkcnt;		/* count of block array entries */
61 	} *uap;
62 	int *retval;
63 {
64 	struct segment *sp;
65 	BLOCK_INFO *blkp;
66 	IFILE *ifp;
67 	struct buf *bp, **bpp;
68 	struct inode *ip;
69 	struct lfs *fs;
70 	struct mount *mntp;
71 	struct vnode *vp;
72 	void *start;
73 	ino_t lastino;
74 	daddr_t b_daddr, v_daddr;
75 	u_long bsize;
76 	int cnt, error;
77 
78 	if (error = suser(p->p_ucred, &p->p_acflag))
79 		return (error);
80 	if ((mntp = getvfs(&uap->fsid)) == NULL)
81 		return (EINVAL);
82 
83 	cnt = uap->blkcnt;
84 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
85 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
86 		goto err1;
87 
88 	/* Mark blocks/inodes dirty.  */
89 	fs = VFSTOUFS(mntp)->um_lfs;
90 	bsize = fs->lfs_bsize;
91 	error = 0;
92 
93 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
94 	sp = fs->lfs_sp;
95 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
96 	    blkp = start; cnt--; ++blkp) {
97 		/*
98 		 * Get the IFILE entry (only once) and see if the file still
99 		 * exists.
100 		 */
101 		if (lastino != blkp->bi_inode) {
102 			if (lastino != LFS_UNUSED_INUM) {
103 				/* Finish up last file */
104 				lfs_updatemeta(sp);
105 				lfs_writeinode(fs, sp, ip);
106 				lfs_vunref(vp);
107 				if (sp->fip->fi_nblocks)
108 					BUMP_FIP(sp);
109 				else  {
110 					DEC_FINFO(sp);
111 					sp->sum_bytes_left +=
112 						sizeof(FINFO) - sizeof(daddr_t);
113 
114 				}
115 			}
116 
117 			/* Start a new file */
118 			CHECK_SEG(sizeof(FINFO));
119 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
120 			INC_FINFO(sp);
121 			sp->start_lbp = &sp->fip->fi_blocks[0];
122 			sp->vp = NULL;
123 			sp->fip->fi_version = blkp->bi_version;
124 			sp->fip->fi_nblocks = 0;
125 			sp->fip->fi_ino = blkp->bi_inode;
126 			lastino = blkp->bi_inode;
127 			if (blkp->bi_inode == LFS_IFILE_INUM)
128 				v_daddr = fs->lfs_idaddr;
129 			else {
130 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
131 				v_daddr = ifp->if_daddr;
132 				brelse(bp);
133 			}
134 			if (v_daddr == LFS_UNUSED_DADDR)
135 				continue;
136 
137 			/* Get the vnode/inode. */
138 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
139 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
140 			    blkp->bi_bp : NULL)) {
141 #ifdef DIAGNOSTIC
142 				printf("lfs_markv: VFS_VGET failed (%d)\n",
143 				    blkp->bi_inode);
144 #endif
145 				lastino = LFS_UNUSED_INUM;
146 				v_daddr = LFS_UNUSED_DADDR;
147 				continue;
148 			}
149 			sp->vp = vp;
150 			ip = VTOI(vp);
151 		} else if (v_daddr == LFS_UNUSED_DADDR)
152 			continue;
153 
154 		/* If this BLOCK_INFO didn't contain a block, keep going. */
155 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
156 			continue;
157 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
158 		    b_daddr != blkp->bi_daddr)
159 			continue;
160 		/*
161 		 * If we got to here, then we are keeping the block.  If it
162 		 * is an indirect block, we want to actually put it in the
163 		 * buffer cache so that it can be updated in the finish_meta
164 		 * section.  If it's not, we need to allocate a fake buffer
165 		 * so that writeseg can perform the copyin and write the buffer.
166 		 */
167 		if (blkp->bi_lbn >= 0)	/* Data Block */
168 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
169 			    blkp->bi_bp);
170 		else {
171 			bp = getblk(vp, blkp->bi_lbn, bsize);
172 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
173 			    (error = copyin(blkp->bi_bp, bp->b_un.b_addr,
174 			    bsize)))
175 				goto err2;
176 			if (error = VOP_BWRITE(bp))
177 				goto err2;
178 		}
179 		while (lfs_gatherblock(sp, bp, NULL));
180 	}
181 	if (sp->vp) {
182 		lfs_updatemeta(sp);
183 		lfs_writeinode(fs, sp, ip);
184 		lfs_vunref(vp);
185 		if (!sp->fip->fi_nblocks) {
186 			DEC_FINFO(sp);
187 			sp->sum_bytes_left += sizeof(FINFO) - sizeof(daddr_t);
188 		}
189 	}
190 	(void) lfs_writeseg(fs, sp);
191 	lfs_segunlock(fs);
192 	free(start, M_SEGMENT);
193 	return (error);
194 /*
195  * XXX If we come in to error 2, we might have indirect blocks that were
196  * updated and now have bad block pointers.  I don't know what to do
197  * about this.
198  */
199 
200 err2:	lfs_vunref(vp);
201 	/* Free up fakebuffers */
202 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
203 		if ((*bpp)->b_flags & B_CALL) {
204 			brelvp(*bpp);
205 			free(*bpp, M_SEGMENT);
206 		} else
207 			brelse(*bpp);
208 	lfs_segunlock(fs);
209 err1:
210 	free(start, M_SEGMENT);
211 	return(error);
212 }
213 
214 /*
215  * lfs_bmapv:
216  *
217  * This will fill in the current disk address for arrays of blocks.
218  *
219  *  0 on success
220  * -1/errno is return on error.
221  */
222 int
223 lfs_bmapv(p, uap, retval)
224 	struct proc *p;
225 	struct args {
226 		fsid_t fsid;		/* file system */
227 		BLOCK_INFO *blkiov;	/* block array */
228 		int blkcnt;		/* count of block array entries */
229 	} *uap;
230 	int *retval;
231 {
232 	BLOCK_INFO *blkp;
233 	struct mount *mntp;
234 	struct vnode *vp;
235 	void *start;
236 	daddr_t daddr;
237 	int cnt, error, step;
238 
239 	if (error = suser(p->p_ucred, &p->p_acflag))
240 		return (error);
241 	if ((mntp = getvfs(&uap->fsid)) == NULL)
242 		return (EINVAL);
243 
244 	cnt = uap->blkcnt;
245 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
246 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
247 		free(blkp, M_SEGMENT);
248 		return (error);
249 	}
250 
251 	for (step = cnt; step--; ++blkp) {
252 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
253 			continue;
254 		/* Could be a deadlock ? */
255 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
256 			daddr = LFS_UNUSED_DADDR;
257 		else {
258 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
259 				daddr = LFS_UNUSED_DADDR;
260 			vput(vp);
261 		}
262 		blkp->bi_daddr = daddr;
263         }
264 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
265 	free(start, M_SEGMENT);
266 	return (0);
267 }
268 
269 /*
270  * lfs_segclean:
271  *
272  * Mark the segment clean.
273  *
274  *  0 on success
275  * -1/errno is return on error.
276  */
277 int
278 lfs_segclean(p, uap, retval)
279 	struct proc *p;
280 	struct args {
281 		fsid_t fsid;		/* file system */
282 		u_long segment;		/* segment number */
283 	} *uap;
284 	int *retval;
285 {
286 	CLEANERINFO *cip;
287 	SEGUSE *sup;
288 	struct buf *bp;
289 	struct mount *mntp;
290 	struct lfs *fs;
291 	int error;
292 
293 	if (error = suser(p->p_ucred, &p->p_acflag))
294 		return (error);
295 	if ((mntp = getvfs(&uap->fsid)) == NULL)
296 		return (EINVAL);
297 
298 	fs = VFSTOUFS(mntp)->um_lfs;
299 
300 	if (datosn(fs, fs->lfs_curseg) == uap->segment)
301 		return (EBUSY);
302 
303 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
304 	if (sup->su_flags & SEGUSE_ACTIVE) {
305 		brelse(bp);
306 		return(EBUSY);
307 	}
308 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
309 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
310 	    sup->su_ninos * btodb(fs->lfs_bsize);
311 	sup->su_flags &= ~SEGUSE_DIRTY;
312 	(void) VOP_BWRITE(bp);
313 
314 	LFS_CLEANERINFO(cip, fs, bp);
315 	++cip->clean;
316 	--cip->dirty;
317 	(void) VOP_BWRITE(bp);
318 	wakeup(&fs->lfs_avail);
319 	return (0);
320 }
321 
322 /*
323  * lfs_segwait:
324  *
325  * This will block until a segment in file system fsid is written.  A timeout
326  * in milliseconds may be specified which will awake the cleaner automatically.
327  * An fsid of -1 means any file system, and a timeout of 0 means forever.
328  *
329  *  0 on success
330  *  1 on timeout
331  * -1/errno is return on error.
332  */
333 int
334 lfs_segwait(p, uap, retval)
335 	struct proc *p;
336 	struct args {
337 		fsid_t fsid;		/* file system */
338 		struct timeval *tv;	/* timeout */
339 	} *uap;
340 	int *retval;
341 {
342 	extern int lfs_allclean_wakeup;
343 	struct mount *mntp;
344 	struct timeval atv;
345 	void *addr;
346 	u_long timeout;
347 	int error, s;
348 
349 	if (error = suser(p->p_ucred, &p->p_acflag)) {
350 		return (error);
351 }
352 #ifdef WHEN_QUADS_WORK
353 	if (uap->fsid == (fsid_t)-1)
354 		addr = &lfs_allclean_wakeup;
355 	else {
356 		if ((mntp = getvfs(&uap->fsid)) == NULL)
357 			return (EINVAL);
358 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
359 	}
360 #else
361 	if ((mntp = getvfs(&uap->fsid)) == NULL)
362 		addr = &lfs_allclean_wakeup;
363 	else
364 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
365 #endif
366 
367 	if (uap->tv) {
368 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
369 			return (error);
370 		if (itimerfix(&atv))
371 			return (EINVAL);
372 		s = splclock();
373 		timevaladd(&atv, (struct timeval *)&time);
374 		timeout = hzto(&atv);
375 		splx(s);
376 	} else
377 		timeout = 0;
378 
379 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
380 	return (error == ERESTART ? EINTR : 0);
381 }
382 
383 /*
384  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
385  * daddr from the ifile, so don't look it up again.  If the cleaner is
386  * processing IINFO structures, it may have the ondisk inode already, so
387  * don't go retrieving it again.
388  */
389 int
390 lfs_fastvget(mp, ino, daddr, vpp, dinp)
391 	struct mount *mp;
392 	ino_t ino;
393 	daddr_t daddr;
394 	struct vnode **vpp;
395 	struct dinode *dinp;
396 {
397 	register struct inode *ip;
398 	struct vnode *vp;
399 	struct ufsmount *ump;
400 	struct buf *bp;
401 	dev_t dev;
402 	int error;
403 
404 	ump = VFSTOUFS(mp);
405 	dev = ump->um_dev;
406 	/*
407 	 * This is playing fast and loose.  Someone may have the inode
408 	 * locked, in which case they are going to be distinctly unhappy
409 	 * if we trash something.
410 	 */
411 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
412 		lfs_vref(*vpp);
413 		if ((*vpp)->v_flag & VXLOCK)
414 			printf ("Cleaned vnode VXLOCKED\n");
415 		ip = VTOI(*vpp);
416 		if (ip->i_flags & ILOCKED)
417 			printf ("Cleaned vnode ILOCKED\n");
418 		if (!(ip->i_flag & IMOD)) {
419 			++ump->um_lfs->lfs_uinodes;
420 			ip->i_flag |= IMOD;
421 		}
422 		ip->i_flag |= IMOD;
423 		return (0);
424 	}
425 
426 	/* Allocate new vnode/inode. */
427 	if (error = lfs_vcreate(mp, ino, &vp)) {
428 		*vpp = NULL;
429 		return (error);
430 	}
431 
432 	/*
433 	 * Put it onto its hash chain and lock it so that other requests for
434 	 * this inode will block if they arrive while we are sleeping waiting
435 	 * for old data structures to be purged or for the contents of the
436 	 * disk portion of this inode to be read.
437 	 */
438 	ip = VTOI(vp);
439 	ufs_ihashins(ip);
440 
441 	/*
442 	 * XXX
443 	 * This may not need to be here, logically it should go down with
444 	 * the i_devvp initialization.
445 	 * Ask Kirk.
446 	 */
447 	ip->i_lfs = ump->um_lfs;
448 
449 	/* Read in the disk contents for the inode, copy into the inode. */
450 	if (dinp)
451 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
452 			return (error);
453 	else {
454 		if (error = bread(ump->um_devvp, daddr,
455 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
456 			/*
457 			 * The inode does not contain anything useful, so it
458 			 * would be misleading to leave it on its hash chain.
459 			 * Iput() will return it to the free list.
460 			 */
461 			ufs_ihashrem(ip);
462 
463 			/* Unlock and discard unneeded inode. */
464 			lfs_vunref(vp);
465 			brelse(bp);
466 			*vpp = NULL;
467 			return (error);
468 		}
469 		ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino);
470 		brelse(bp);
471 	}
472 
473 	/* Inode was just read from user space or disk, make sure it's locked */
474 	ip->i_flag |= ILOCKED;
475 
476 	/*
477 	 * Initialize the vnode from the inode, check for aliases.  In all
478 	 * cases re-init ip, the underlying vnode/inode may have changed.
479 	 */
480 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
481 		lfs_vunref(vp);
482 		*vpp = NULL;
483 		return (error);
484 	}
485 	/*
486 	 * Finish inode initialization now that aliasing has been resolved.
487 	 */
488 	ip->i_devvp = ump->um_devvp;
489 	ip->i_flag |= IMOD;
490 	++ump->um_lfs->lfs_uinodes;
491 	VREF(ip->i_devvp);
492 	*vpp = vp;
493 	return (0);
494 }
495 struct buf *
496 lfs_fakebuf(vp, lbn, size, uaddr)
497 	struct vnode *vp;
498 	int lbn;
499 	size_t size;
500 	caddr_t uaddr;
501 {
502 	struct buf *bp;
503 
504 	bp = lfs_newbuf(vp, lbn, 0);
505 	bp->b_saveaddr = uaddr;
506 	bp->b_bufsize = size;
507 	bp->b_bcount = size;
508 	bp->b_flags |= B_INVAL;
509 	return(bp);
510 }
511