xref: /original-bsd/sys/ufs/lfs/lfs_syscalls.c (revision 27393bdf)
1 /*-
2  * Copyright (c) 1991, 1993, 1994
3  *	The Regents of the University of California.  All rights reserved.
4  *
5  * %sccs.include.redist.c%
6  *
7  *	@(#)lfs_syscalls.c	8.8 (Berkeley) 03/28/95
8  */
9 
10 #include <sys/param.h>
11 #include <sys/proc.h>
12 #include <sys/buf.h>
13 #include <sys/mount.h>
14 #include <sys/vnode.h>
15 #include <sys/malloc.h>
16 #include <sys/kernel.h>
17 
18 #include <ufs/ufs/quota.h>
19 #include <ufs/ufs/inode.h>
20 #include <ufs/ufs/ufsmount.h>
21 #include <ufs/ufs/ufs_extern.h>
22 
23 #include <ufs/lfs/lfs.h>
24 #include <ufs/lfs/lfs_extern.h>
25 #define BUMP_FIP(SP) \
26 	(SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks])
27 
28 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo
29 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo
30 
31 /*
32  * Before committing to add something to a segment summary, make sure there
33  * is enough room.  S is the bytes added to the summary.
34  */
35 #define	CHECK_SEG(s)			\
36 if (sp->sum_bytes_left < (s)) {		\
37 	(void) lfs_writeseg(fs, sp);	\
38 }
39 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t));
40 
41 /*
42  * lfs_markv:
43  *
44  * This will mark inodes and blocks dirty, so they are written into the log.
45  * It will block until all the blocks have been written.  The segment create
46  * time passed in the block_info and inode_info structures is used to decide
47  * if the data is valid for each block (in case some process dirtied a block
48  * or inode that is being cleaned between the determination that a block is
49  * live and the lfs_markv call).
50  *
51  *  0 on success
52  * -1/errno is return on error.
53  */
54 struct lfs_markv_args {
55 	fsid_t *fsidp;		/* file system */
56 	BLOCK_INFO *blkiov;	/* block array */
57 	int blkcnt;		/* count of block array entries */
58 };
59 int
60 lfs_markv(p, uap, retval)
61 	struct proc *p;
62 	struct lfs_markv_args *uap;
63 	int *retval;
64 {
65 	struct segment *sp;
66 	BLOCK_INFO *blkp;
67 	IFILE *ifp;
68 	struct buf *bp, **bpp;
69 	struct inode *ip;
70 	struct lfs *fs;
71 	struct mount *mntp;
72 	struct vnode *vp;
73 	fsid_t fsid;
74 	void *start;
75 	ino_t lastino;
76 	ufs_daddr_t b_daddr, v_daddr;
77 	u_long bsize;
78 	int cnt, error;
79 
80 	if (error = suser(p->p_ucred, &p->p_acflag))
81 		return (error);
82 
83 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
84 		return (error);
85 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
86 		return (EINVAL);
87 
88 	cnt = uap->blkcnt;
89 	start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
90 	if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO)))
91 		goto err1;
92 
93 	/* Mark blocks/inodes dirty.  */
94 	fs = VFSTOUFS(mntp)->um_lfs;
95 	bsize = fs->lfs_bsize;
96 	error = 0;
97 
98 	lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN);
99 	sp = fs->lfs_sp;
100 	for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM,
101 	    blkp = start; cnt--; ++blkp) {
102 		/*
103 		 * Get the IFILE entry (only once) and see if the file still
104 		 * exists.
105 		 */
106 		if (lastino != blkp->bi_inode) {
107 			if (lastino != LFS_UNUSED_INUM) {
108 				/* Finish up last file */
109 				if (sp->fip->fi_nblocks == 0) {
110 					DEC_FINFO(sp);
111 					sp->sum_bytes_left +=
112 					    sizeof(FINFO) - sizeof(ufs_daddr_t);
113 				} else {
114 					lfs_updatemeta(sp);
115 					BUMP_FIP(sp);
116 				}
117 
118 				lfs_writeinode(fs, sp, ip);
119 				lfs_vunref(vp);
120 			}
121 
122 			/* Start a new file */
123 			CHECK_SEG(sizeof(FINFO));
124 			sp->sum_bytes_left -= sizeof(FINFO) - sizeof(ufs_daddr_t);
125 			INC_FINFO(sp);
126 			sp->start_lbp = &sp->fip->fi_blocks[0];
127 			sp->vp = NULL;
128 			sp->fip->fi_version = blkp->bi_version;
129 			sp->fip->fi_nblocks = 0;
130 			sp->fip->fi_ino = blkp->bi_inode;
131 			lastino = blkp->bi_inode;
132 			if (blkp->bi_inode == LFS_IFILE_INUM)
133 				v_daddr = fs->lfs_idaddr;
134 			else {
135 				LFS_IENTRY(ifp, fs, blkp->bi_inode, bp);
136 				v_daddr = ifp->if_daddr;
137 				brelse(bp);
138 			}
139 			if (v_daddr == LFS_UNUSED_DADDR)
140 				continue;
141 
142 			/* Get the vnode/inode. */
143 			if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp,
144 			    blkp->bi_lbn == LFS_UNUSED_LBN ?
145 			    blkp->bi_bp : NULL)) {
146 #ifdef DIAGNOSTIC
147 				printf("lfs_markv: VFS_VGET failed (%d)\n",
148 				    blkp->bi_inode);
149 #endif
150 				lastino = LFS_UNUSED_INUM;
151 				v_daddr = LFS_UNUSED_DADDR;
152 				continue;
153 			}
154 			sp->vp = vp;
155 			ip = VTOI(vp);
156 		} else if (v_daddr == LFS_UNUSED_DADDR)
157 			continue;
158 
159 		/* If this BLOCK_INFO didn't contain a block, keep going. */
160 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
161 			continue;
162 		if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) ||
163 		    b_daddr != blkp->bi_daddr)
164 			continue;
165 		/*
166 		 * If we got to here, then we are keeping the block.  If it
167 		 * is an indirect block, we want to actually put it in the
168 		 * buffer cache so that it can be updated in the finish_meta
169 		 * section.  If it's not, we need to allocate a fake buffer
170 		 * so that writeseg can perform the copyin and write the buffer.
171 		 */
172 		if (blkp->bi_lbn >= 0)	/* Data Block */
173 			bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize,
174 			    blkp->bi_bp);
175 		else {
176 			bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0);
177 			if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) &&
178 			    (error = copyin(blkp->bi_bp, bp->b_data,
179 			    bsize)))
180 				goto err2;
181 			if (error = VOP_BWRITE(bp))
182 				goto err2;
183 		}
184 		while (lfs_gatherblock(sp, bp, NULL));
185 	}
186 	if (sp->vp) {
187 		if (sp->fip->fi_nblocks == 0) {
188 			DEC_FINFO(sp);
189 			sp->sum_bytes_left +=
190 			    sizeof(FINFO) - sizeof(ufs_daddr_t);
191 		} else
192 			lfs_updatemeta(sp);
193 
194 		lfs_writeinode(fs, sp, ip);
195 		lfs_vunref(vp);
196 	}
197 	(void) lfs_writeseg(fs, sp);
198 	lfs_segunlock(fs);
199 	free(start, M_SEGMENT);
200 	return (error);
201 
202 /*
203  * XXX
204  * If we come in to error 2, we might have indirect blocks that were
205  * updated and now have bad block pointers.  I don't know what to do
206  * about this.
207  */
208 
209 err2:	lfs_vunref(vp);
210 	/* Free up fakebuffers */
211 	for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp)
212 		if ((*bpp)->b_flags & B_CALL) {
213 			brelvp(*bpp);
214 			free(*bpp, M_SEGMENT);
215 		} else
216 			brelse(*bpp);
217 	lfs_segunlock(fs);
218 err1:
219 	free(start, M_SEGMENT);
220 	return (error);
221 }
222 
223 /*
224  * lfs_bmapv:
225  *
226  * This will fill in the current disk address for arrays of blocks.
227  *
228  *  0 on success
229  * -1/errno is return on error.
230  */
231 struct lfs_bmapv_args {
232 	fsid_t *fsidp;		/* file system */
233 	BLOCK_INFO *blkiov;	/* block array */
234 	int blkcnt;		/* count of block array entries */
235 };
236 int
237 lfs_bmapv(p, uap, retval)
238 	struct proc *p;
239 	struct lfs_bmapv_args *uap;
240 	int *retval;
241 {
242 	BLOCK_INFO *blkp;
243 	struct mount *mntp;
244 	struct vnode *vp;
245 	fsid_t fsid;
246 	void *start;
247 	ufs_daddr_t daddr;
248 	int cnt, error, step;
249 
250 	if (error = suser(p->p_ucred, &p->p_acflag))
251 		return (error);
252 
253 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
254 		return (error);
255 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
256 		return (EINVAL);
257 
258 	cnt = uap->blkcnt;
259 	start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK);
260 	if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) {
261 		free(blkp, M_SEGMENT);
262 		return (error);
263 	}
264 
265 	for (step = cnt; step--; ++blkp) {
266 		if (blkp->bi_lbn == LFS_UNUSED_LBN)
267 			continue;
268 		/* Could be a deadlock ? */
269 		if (VFS_VGET(mntp, blkp->bi_inode, &vp))
270 			daddr = LFS_UNUSED_DADDR;
271 		else {
272 			if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL))
273 				daddr = LFS_UNUSED_DADDR;
274 			vput(vp);
275 		}
276 		blkp->bi_daddr = daddr;
277         }
278 	copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO));
279 	free(start, M_SEGMENT);
280 	return (0);
281 }
282 
283 /*
284  * lfs_segclean:
285  *
286  * Mark the segment clean.
287  *
288  *  0 on success
289  * -1/errno is return on error.
290  */
291 struct lfs_segclean_args {
292 	fsid_t *fsidp;		/* file system */
293 	u_long segment;		/* segment number */
294 };
295 int
296 lfs_segclean(p, uap, retval)
297 	struct proc *p;
298 	struct lfs_segclean_args *uap;
299 	int *retval;
300 {
301 	CLEANERINFO *cip;
302 	SEGUSE *sup;
303 	struct buf *bp;
304 	struct mount *mntp;
305 	struct lfs *fs;
306 	fsid_t fsid;
307 	int error;
308 
309 	if (error = suser(p->p_ucred, &p->p_acflag))
310 		return (error);
311 
312 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
313 		return (error);
314 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
315 		return (EINVAL);
316 
317 	fs = VFSTOUFS(mntp)->um_lfs;
318 
319 	if (datosn(fs, fs->lfs_curseg) == uap->segment)
320 		return (EBUSY);
321 
322 	LFS_SEGENTRY(sup, fs, uap->segment, bp);
323 	if (sup->su_flags & SEGUSE_ACTIVE) {
324 		brelse(bp);
325 		return (EBUSY);
326 	}
327 	fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1;
328 	fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) +
329 	    sup->su_ninos * btodb(fs->lfs_bsize);
330 	sup->su_flags &= ~SEGUSE_DIRTY;
331 	(void) VOP_BWRITE(bp);
332 
333 	LFS_CLEANERINFO(cip, fs, bp);
334 	++cip->clean;
335 	--cip->dirty;
336 	(void) VOP_BWRITE(bp);
337 	wakeup(&fs->lfs_avail);
338 	return (0);
339 }
340 
341 /*
342  * lfs_segwait:
343  *
344  * This will block until a segment in file system fsid is written.  A timeout
345  * in milliseconds may be specified which will awake the cleaner automatically.
346  * An fsid of -1 means any file system, and a timeout of 0 means forever.
347  *
348  *  0 on success
349  *  1 on timeout
350  * -1/errno is return on error.
351  */
352 struct lfs_segwait_args {
353 	fsid_t *fsidp;		/* file system */
354 	struct timeval *tv;	/* timeout */
355 };
356 int
357 lfs_segwait(p, uap, retval)
358 	struct proc *p;
359 	struct lfs_segwait_args *uap;
360 	int *retval;
361 {
362 	extern int lfs_allclean_wakeup;
363 	struct mount *mntp;
364 	struct timeval atv;
365 	fsid_t fsid;
366 	void *addr;
367 	u_long timeout;
368 	int error, s;
369 
370 	if (error = suser(p->p_ucred, &p->p_acflag)) {
371 		return (error);
372 }
373 #ifdef WHEN_QUADS_WORK
374 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
375 		return (error);
376 	if (fsid == (fsid_t)-1)
377 		addr = &lfs_allclean_wakeup;
378 	else {
379 		if ((mntp = vfs_getvfs(&fsid)) == NULL)
380 			return (EINVAL);
381 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
382 	}
383 #else
384 	if (error = copyin(uap->fsidp, &fsid, sizeof(fsid_t)))
385 		return (error);
386 	if ((mntp = vfs_getvfs(&fsid)) == NULL)
387 		addr = &lfs_allclean_wakeup;
388 	else
389 		addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg;
390 #endif
391 
392 	if (uap->tv) {
393 		if (error = copyin(uap->tv, &atv, sizeof(struct timeval)))
394 			return (error);
395 		if (itimerfix(&atv))
396 			return (EINVAL);
397 		s = splclock();
398 		timevaladd(&atv, (struct timeval *)&time);
399 		timeout = hzto(&atv);
400 		splx(s);
401 	} else
402 		timeout = 0;
403 
404 	error = tsleep(addr, PCATCH | PUSER, "segment", timeout);
405 	return (error == ERESTART ? EINTR : 0);
406 }
407 
408 /*
409  * VFS_VGET call specialized for the cleaner.  The cleaner already knows the
410  * daddr from the ifile, so don't look it up again.  If the cleaner is
411  * processing IINFO structures, it may have the ondisk inode already, so
412  * don't go retrieving it again.
413  */
414 int
415 lfs_fastvget(mp, ino, daddr, vpp, dinp)
416 	struct mount *mp;
417 	ino_t ino;
418 	ufs_daddr_t daddr;
419 	struct vnode **vpp;
420 	struct dinode *dinp;
421 {
422 	register struct inode *ip;
423 	struct vnode *vp;
424 	struct ufsmount *ump;
425 	struct buf *bp;
426 	dev_t dev;
427 	int error;
428 
429 	ump = VFSTOUFS(mp);
430 	dev = ump->um_dev;
431 	/*
432 	 * This is playing fast and loose.  Someone may have the inode
433 	 * locked, in which case they are going to be distinctly unhappy
434 	 * if we trash something.
435 	 */
436 	if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) {
437 		lfs_vref(*vpp);
438 		if ((*vpp)->v_flag & VXLOCK)
439 			printf ("Cleaned vnode VXLOCKED\n");
440 		ip = VTOI(*vpp);
441 		if (ip->i_flag & IN_LOCKED)
442 			printf("cleaned vnode locked\n");
443 		if (!(ip->i_flag & IN_MODIFIED)) {
444 			++ump->um_lfs->lfs_uinodes;
445 			ip->i_flag |= IN_MODIFIED;
446 		}
447 		ip->i_flag |= IN_MODIFIED;
448 		return (0);
449 	}
450 
451 	/* Allocate new vnode/inode. */
452 	if (error = lfs_vcreate(mp, ino, &vp)) {
453 		*vpp = NULL;
454 		return (error);
455 	}
456 
457 	/*
458 	 * Put it onto its hash chain and lock it so that other requests for
459 	 * this inode will block if they arrive while we are sleeping waiting
460 	 * for old data structures to be purged or for the contents of the
461 	 * disk portion of this inode to be read.
462 	 */
463 	ip = VTOI(vp);
464 	ufs_ihashins(ip);
465 
466 	/*
467 	 * XXX
468 	 * This may not need to be here, logically it should go down with
469 	 * the i_devvp initialization.
470 	 * Ask Kirk.
471 	 */
472 	ip->i_lfs = ump->um_lfs;
473 
474 	/* Read in the disk contents for the inode, copy into the inode. */
475 	if (dinp)
476 		if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode)))
477 			return (error);
478 	else {
479 		if (error = bread(ump->um_devvp, daddr,
480 		    (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) {
481 			/*
482 			 * The inode does not contain anything useful, so it
483 			 * would be misleading to leave it on its hash chain.
484 			 * Iput() will return it to the free list.
485 			 */
486 			ufs_ihashrem(ip);
487 
488 			/* Unlock and discard unneeded inode. */
489 			lfs_vunref(vp);
490 			brelse(bp);
491 			*vpp = NULL;
492 			return (error);
493 		}
494 		ip->i_din =
495 		    *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data);
496 		brelse(bp);
497 	}
498 
499 	/* Inode was just read from user space or disk, make sure it's locked */
500 	ip->i_flag |= IN_LOCKED;
501 
502 	/*
503 	 * Initialize the vnode from the inode, check for aliases.  In all
504 	 * cases re-init ip, the underlying vnode/inode may have changed.
505 	 */
506 	if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) {
507 		lfs_vunref(vp);
508 		*vpp = NULL;
509 		return (error);
510 	}
511 	/*
512 	 * Finish inode initialization now that aliasing has been resolved.
513 	 */
514 	ip->i_devvp = ump->um_devvp;
515 	ip->i_flag |= IN_MODIFIED;
516 	++ump->um_lfs->lfs_uinodes;
517 	VREF(ip->i_devvp);
518 	*vpp = vp;
519 	return (0);
520 }
521 struct buf *
522 lfs_fakebuf(vp, lbn, size, uaddr)
523 	struct vnode *vp;
524 	int lbn;
525 	size_t size;
526 	caddr_t uaddr;
527 {
528 	struct buf *bp;
529 
530 	bp = lfs_newbuf(vp, lbn, 0);
531 	bp->b_saveaddr = uaddr;
532 	bp->b_bufsize = size;
533 	bp->b_bcount = size;
534 	bp->b_flags |= B_INVAL;
535 	return (bp);
536 }
537