xref: /original-bsd/sys/ufs/lfs/lfs_syscalls.c (revision a5b2b2cf)
1 /*-
2  * Copyright (c) 1991 The Regents of the University of California.
3  * All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_syscalls.c	7.25 (Berkeley) 10/08/92
8  */
9 
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/mount.h>
14 #include <sys/vnode.h>
15 #include <sys/malloc.h>
16 #include <sys/kernel.h>
17 
18 #include <ufs/ufs/quota.h>
19 #include <ufs/ufs/inode.h>
20 #include <ufs/ufs/ufsmount.h>
21 #include <ufs/ufs/ufs_extern.h>
22 
23 #include <ufs/lfs/lfs.h>
24 #include <ufs/lfs/lfs_extern.h>
25 #define BUMP_FIP(SP) \
26 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
27 
28 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
29 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
30 
31 /*
32  * Before committing to add something to a segment summary, make sure there
33  * is enough room.  S is the bytes added to the summary.
34  */
35 #define	CHECK_SEG(s)			\
36 if (sp->sum_bytes_left < (s)) {		\
37 	(void) lfs_writeseg(fs, sp);	\
38 	lfs_initseg(fs, sp);		\
39 }
40 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
41 
42 /*
43  * lfs_markv:
44  *
45  * This will mark inodes and blocks dirty, so they are written into the log.
46  * It will block until all the blocks have been written.  The segment create
47  * time passed in the block_info and inode_info structures is used to decide
48  * if the data is valid for each block (in case some process dirtied a block
49  * or inode that is being cleaned between the determination that a block is
50  * live and the lfs_markv call).
51  *
52  *  0 on success
53  * -1/errno is return on error.
54  */
55 int
56 lfs_markv(p, uap, retval)
57 	struct proc *p;
58 	struct args {
59 		fsid_t fsid;		/* file system */
60 		BLOCK_INFO *blkiov;	/* block array */
61 		int blkcnt;		/* count of block array entries */
62 	} *uap;
63 	int *retval;
64 {
65 	struct segment *sp;
66 	BLOCK_INFO *blkp;
67 	IFILE *ifp;
68 	struct buf *bp, **bpp;
69 	struct inode *ip;
70 	struct lfs *fs;
71 	struct mount *mntp;
72 	struct vnode *vp;
73 	void *start;
74 	ino_t lastino;
75 	daddr_t b_daddr, v_daddr;
76 	u_long bsize;
77 	int cnt, error;
78 
79 	if (error = suser(p->p_ucred, &p->p_acflag))
80 		return (error);
81 	if ((mntp = getvfs(&uap->fsid)) == NULL)
82 		return (EINVAL);
83 	/* Initialize a segment. */
84 	sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK);
85 	sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) /
86 	    sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK);
87 	sp->seg_flags = SEGM_CKP;
88 	sp->vp = NULL;
89 
90 	cnt = uap->blkcnt;
91 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
92 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
93 		goto err1;
94 
95 	/* Mark blocks/inodes dirty.  */
96 	fs = VFSTOUFS(mntp)->um_lfs;
97 	bsize = fs->lfs_bsize;
98 	error = 0;
99 
100 	lfs_seglock(fs);
101 	lfs_initseg(fs, sp);
102 	sp->seg_flags |= SEGM_CLEAN;
103 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
104 	    blkp = start; cnt--; ++blkp) {
105 		/*
106 		 * Get the IFILE entry (only once) and see if the file still
107 		 * exists.
108 		 */
109 		if (lastino != blkp->bi_inode) {
110 			if (lastino != LFS_UNUSED_INUM) {
111 				/* Finish up last file */
112 				lfs_updatemeta(sp);
113 				lfs_writeinode(fs, sp, ip);
114 				vput(vp);
115 				if (sp->fip->fi_nblocks)
116 					BUMP_FIP(sp);
117 				else  {
118 					DEC_FINFO(sp);
119 					sp->sum_bytes_left +=
120 						sizeof(FINFO) - sizeof(daddr_t);
121 
122 				}
123 			}
124 
125 			/* Start a new file */
126 			CHECK_SEG(sizeof(FINFO));
127 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t);
128 			INC_FINFO(sp);
129 			sp->start_lbp = &sp->fip->fi_blocks[0];
130 			sp->vp = NULL;
131 			sp->fip->fi_version = blkp->bi_version;
132 			sp->fip->fi_nblocks = 0;
133 			sp->fip->fi_ino = blkp->bi_inode;
134 			lastino = blkp->bi_inode;
135 			if (blkp->bi_inode == LFS_IFILE_INUM)
136 				v_daddr = fs->lfs_idaddr;
137 			else {
138 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
139 				v_daddr = ifp->if_daddr;
140 				brelse(bp);
141 			}
142 			if (v_daddr == LFS_UNUSED_DADDR)
143 				continue;
144 
145 			/* Get the vnode/inode. */
146 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
147 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
148 			    blkp->bi_bp : NULL)) {
149 #ifdef DIAGNOSTIC
150 				printf("lfs_markv: VFS_VGET failed (%d)\n",
151 				    blkp->bi_inode);
152 #endif
153 				lastino = LFS_UNUSED_INUM;
154 				v_daddr = LFS_UNUSED_DADDR;
155 				continue;
156 			}
157 			sp->vp = vp;
158 			ip = VTOI(vp);
159 		} else if (v_daddr == LFS_UNUSED_DADDR)
160 			continue;
161 
162 		/* If this BLOCK_INFO didn't contain a block, keep going. */
163 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
164 			continue;
165 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
166 		    b_daddr != blkp->bi_daddr)
167 			continue;
168 		/*
169 		 * If we got to here, then we are keeping the block.  If it
170 		 * is an indirect block, we want to actually put it in the
171 		 * buffer cache so that it can be updated in the finish_meta
172 		 * section.  If it's not, we need to allocate a fake buffer
173 		 * so that writeseg can perform the copyin and write the buffer.
174 		 */
175 		if (blkp->bi_lbn >= 0)	/* Data Block */
176 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
177 			    blkp->bi_bp);
178 		else {
179 			bp = getblk(vp, blkp->bi_lbn, bsize);
180 			if (!(bp->b_flags & B_CACHE) &&
181 			    (error = copyin(blkp->bi_bp, bp->b_un.b_addr,
182 			    bsize)))
183 				goto err2;
184 			if (error = VOP_BWRITE(bp))
185 				goto err2;
186 		}
187 		while (lfs_gatherblock(sp, bp, NULL));
188 	}
189 	if (sp->vp) {
190 		lfs_updatemeta(sp);
191 		lfs_writeinode(fs, sp, ip);
192 		vput(vp);
193 		if (!sp->fip->fi_nblocks) {
194 			DEC_FINFO(sp);
195 			sp->sum_bytes_left += sizeof(FINFO) - sizeof(daddr_t);
196 		}
197 	}
198 	(void) lfs_writeseg(fs, sp);
199 	lfs_segunlock(fs);
200 	free(start, M_SEGMENT);
201 	free(sp->bpp, M_SEGMENT);
202 	free(sp, M_SEGMENT);
203 	return (error);
204 /*
205  * XXX If we come in to error 2, we might have indirect blocks that were
206  * updated and now have bad block pointers.  I don't know what to do
207  * about this.
208  */
209 
210 err2:	vput(vp);
211 	/* Free up fakebuffers */
212 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
213 		if ((*bpp)->b_flags & B_CALL) {
214 			brelvp(*bpp);
215 			free(*bpp, M_SEGMENT);
216 		} else
217 			brelse(*bpp);
218 	lfs_segunlock(fs);
219 err1:
220 	free(sp->bpp, M_SEGMENT);
221 	free(sp, M_SEGMENT);
222 	free(start, M_SEGMENT);
223 	return(error);
224 }
225 
226 /*
227  * lfs_bmapv:
228  *
229  * This will fill in the current disk address for arrays of blocks.
230  *
231  *  0 on success
232  * -1/errno is return on error.
233  */
234 int
235 lfs_bmapv(p, uap, retval)
236 	struct proc *p;
237 	struct args {
238 		fsid_t fsid;		/* file system */
239 		BLOCK_INFO *blkiov;	/* block array */
240 		int blkcnt;		/* count of block array entries */
241 	} *uap;
242 	int *retval;
243 {
244 	BLOCK_INFO *blkp;
245 	struct mount *mntp;
246 	struct vnode *vp;
247 	void *start;
248 	daddr_t daddr;
249 	int cnt, error, step;
250 
251 	if (error = suser(p->p_ucred, &p->p_acflag))
252 		return (error);
253 	if ((mntp = getvfs(&uap->fsid)) == NULL)
254 		return (EINVAL);
255 
256 	cnt = uap->blkcnt;
257 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
258 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
259 		free(blkp, M_SEGMENT);
260 		return (error);
261 	}
262 
263 	for (step = cnt; step--; ++blkp) {
264 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
265 			continue;
266 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
267 			daddr = LFS_UNUSED_DADDR;
268 		else {
269 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
270 				daddr = LFS_UNUSED_DADDR;
271 			vput(vp);
272 		}
273 		blkp->bi_daddr = daddr;
274         }
275 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
276 	free(start, M_SEGMENT);
277 	return (0);
278 }
279 
280 /*
281  * lfs_segclean:
282  *
283  * Mark the segment clean.
284  *
285  *  0 on success
286  * -1/errno is return on error.
287  */
288 int
289 lfs_segclean(p, uap, retval)
290 	struct proc *p;
291 	struct args {
292 		fsid_t fsid;		/* file system */
293 		u_long segment;		/* segment number */
294 	} *uap;
295 	int *retval;
296 {
297 	CLEANERINFO *cip;
298 	SEGUSE *sup;
299 	struct buf *bp;
300 	struct mount *mntp;
301 	struct lfs *fs;
302 	int error;
303 
304 	if (error = suser(p->p_ucred, &p->p_acflag))
305 		return (error);
306 	if ((mntp = getvfs(&uap->fsid)) == NULL)
307 		return (EINVAL);
308 
309 	fs = VFSTOUFS(mntp)->um_lfs;
310 
311 	if (datosn(fs, fs->lfs_curseg) == uap->segment)
312 		return (EBUSY);
313 
314 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
315 	if (sup->su_flags & SEGUSE_ACTIVE) {
316 		brelse(bp);
317 		return(EBUSY);
318 	}
319 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
320 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
321 	    sup->su_ninos * btodb(fs->lfs_bsize);
322 	sup->su_flags &= ~SEGUSE_DIRTY;
323 	(void) VOP_BWRITE(bp);
324 
325 	LFS_CLEANERINFO(cip, fs, bp);
326 	++cip->clean;
327 	--cip->dirty;
328 	(void) VOP_BWRITE(bp);
329 	wakeup(&fs->lfs_avail);
330 	return (0);
331 }
332 
333 /*
334  * lfs_segwait:
335  *
336  * This will block until a segment in file system fsid is written.  A timeout
337  * in milliseconds may be specified which will awake the cleaner automatically.
338  * An fsid of -1 means any file system, and a timeout of 0 means forever.
339  *
340  *  0 on success
341  *  1 on timeout
342  * -1/errno is return on error.
343  */
344 int
345 lfs_segwait(p, uap, retval)
346 	struct proc *p;
347 	struct args {
348 		fsid_t fsid;		/* file system */
349 		struct timeval *tv;	/* timeout */
350 	} *uap;
351 	int *retval;
352 {
353 	extern int lfs_allclean_wakeup;
354 	struct mount *mntp;
355 	struct timeval atv;
356 	void *addr;
357 	u_long timeout;
358 	int error, s;
359 
360 	if (error = suser(p->p_ucred, &p->p_acflag)) {
361 		return (error);
362 }
363 #ifdef WHEN_QUADS_WORK
364 	if (uap->fsid == (fsid_t)-1)
365 		addr = &lfs_allclean_wakeup;
366 	else {
367 		if ((mntp = getvfs(&uap->fsid)) == NULL)
368 			return (EINVAL);
369 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
370 	}
371 #else
372 	if ((mntp = getvfs(&uap->fsid)) == NULL)
373 		addr = &lfs_allclean_wakeup;
374 	else
375 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
376 #endif
377 
378 	if (uap->tv) {
379 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
380 			return (error);
381 		if (itimerfix(&atv))
382 			return (EINVAL);
383 		s = splclock();
384 		timevaladd(&atv, (struct timeval *)&time);
385 		timeout = hzto(&atv);
386 		splx(s);
387 	} else
388 		timeout = 0;
389 
390 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
391 	return (error == ERESTART ? EINTR : 0);
392 }
393 
394 /*
395  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
396  * daddr from the ifile, so don't look it up again.  If the cleaner is
397  * processing IINFO structures, it may have the ondisk inode already, so
398  * don't go retrieving it again.
399  */
400 int
401 lfs_fastvget(mp, ino, daddr, vpp, dinp)
402 	struct mount *mp;
403 	ino_t ino;
404 	daddr_t daddr;
405 	struct vnode **vpp;
406 	struct dinode *dinp;
407 {
408 	register struct inode *ip;
409 	struct vnode *vp;
410 	struct ufsmount *ump;
411 	struct buf *bp;
412 	dev_t dev;
413 	int error;
414 
415 	ump = VFSTOUFS(mp);
416 	dev = ump->um_dev;
417 	if ((*vpp = ufs_ihashget(dev, ino)) != NULL) {
418 		ip = VTOI(*vpp);
419 		if (!(ip->i_flag & IMOD)) {
420 			++ump->um_lfs->lfs_uinodes;
421 			ip->i_flag |= IMOD;
422 		}
423 		ip->i_flag |= IMOD;
424 		return (0);
425 	}
426 
427 	/* Allocate new vnode/inode. */
428 	if (error = lfs_vcreate(mp, ino, &vp)) {
429 		*vpp = NULL;
430 		return (error);
431 	}
432 
433 	/*
434 	 * Put it onto its hash chain and lock it so that other requests for
435 	 * this inode will block if they arrive while we are sleeping waiting
436 	 * for old data structures to be purged or for the contents of the
437 	 * disk portion of this inode to be read.
438 	 */
439 	ip = VTOI(vp);
440 	ufs_ihashins(ip);
441 
442 	/*
443 	 * XXX
444 	 * This may not need to be here, logically it should go down with
445 	 * the i_devvp initialization.
446 	 * Ask Kirk.
447 	 */
448 	ip->i_lfs = ump->um_lfs;
449 
450 	/* Read in the disk contents for the inode, copy into the inode. */
451 	if (dinp)
452 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
453 			return (error);
454 	else {
455 		if (error = bread(ump->um_devvp, daddr,
456 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
457 			/*
458 			 * The inode does not contain anything useful, so it
459 			 * would be misleading to leave it on its hash chain.
460 			 * Iput() will return it to the free list.
461 			 */
462 			ufs_ihashrem(ip);
463 
464 			/* Unlock and discard unneeded inode. */
465 			ufs_iput(ip);
466 			brelse(bp);
467 			*vpp = NULL;
468 			return (error);
469 		}
470 		ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino);
471 		brelse(bp);
472 	}
473 
474 	/* Inode was just read from user space or disk, make sure it's locked */
475 	ip->i_flag |= ILOCKED;
476 
477 	/*
478 	 * Initialize the vnode from the inode, check for aliases.  In all
479 	 * cases re-init ip, the underlying vnode/inode may have changed.
480 	 */
481 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
482 		ufs_iput(ip);
483 		*vpp = NULL;
484 		return (error);
485 	}
486 	/*
487 	 * Finish inode initialization now that aliasing has been resolved.
488 	 */
489 	ip->i_devvp = ump->um_devvp;
490 	ip->i_flag |= IMOD;
491 	++ump->um_lfs->lfs_uinodes;
492 	VREF(ip->i_devvp);
493 	*vpp = vp;
494 	return (0);
495 }
496 struct buf *
497 lfs_fakebuf(vp, lbn, size, uaddr)
498 	struct vnode *vp;
499 	int lbn;
500 	size_t size;
501 	caddr_t uaddr;
502 {
503 	struct buf *bp;
504 
505 	bp = lfs_newbuf(vp, lbn, 0);
506 	bp->b_saveaddr = uaddr;
507 	bp->b_bufsize = size;
508 	bp->b_bcount = size;
509 	bp->b_flags |= B_INVAL;
510 	return(bp);
511 }
512