1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_syscalls.c 7.22 (Berkeley) 09/02/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/buf.h> 13 #include <sys/mount.h> 14 #include <sys/vnode.h> 15 #include <sys/malloc.h> 16 #include <sys/kernel.h> 17 18 #include <ufs/ufs/quota.h> 19 #include <ufs/ufs/inode.h> 20 #include <ufs/ufs/ufsmount.h> 21 #include <ufs/ufs/ufs_extern.h> 22 23 #include <ufs/lfs/lfs.h> 24 #include <ufs/lfs/lfs_extern.h> 25 26 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t)); 27 28 /* 29 * lfs_markv: 30 * 31 * This will mark inodes and blocks dirty, so they are written into the log. 32 * It will block until all the blocks have been written. The segment create 33 * time passed in the block_info and inode_info structures is used to decide 34 * if the data is valid for each block (in case some process dirtied a block 35 * or inode that is being cleaned between the determination that a block is 36 * live and the lfs_markv call). 37 * 38 * 0 on success 39 * -1/errno is return on error. 40 */ 41 int 42 lfs_markv(p, uap, retval) 43 struct proc *p; 44 struct args { 45 fsid_t fsid; /* file system */ 46 BLOCK_INFO *blkiov; /* block array */ 47 int blkcnt; /* count of block array entries */ 48 } *uap; 49 int *retval; 50 { 51 struct segment *sp; 52 BLOCK_INFO *blkp; 53 IFILE *ifp; 54 struct buf *bp, **bpp; 55 struct inode *ip; 56 struct lfs *fs; 57 struct mount *mntp; 58 struct vnode *vp; 59 void *start; 60 ino_t lastino; 61 daddr_t b_daddr, v_daddr; 62 u_long bsize; 63 int cnt, error; 64 65 if (error = suser(p->p_ucred, &p->p_acflag)) 66 return (error); 67 if ((mntp = getvfs(&uap->fsid)) == NULL) 68 return (EINVAL); 69 /* Initialize a segment. */ 70 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 71 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 72 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 73 sp->seg_flags = SEGM_CKP; 74 sp->vp = NULL; 75 76 cnt = uap->blkcnt; 77 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 78 if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO))) 79 goto err1; 80 81 /* Mark blocks/inodes dirty. */ 82 fs = VFSTOUFS(mntp)->um_lfs; 83 bsize = fs->lfs_bsize; 84 error = 0; 85 86 lfs_seglock(fs); 87 lfs_initseg(fs, sp); 88 sp->seg_flags |= SEGM_CLEAN; 89 for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM, 90 blkp = start; cnt--; ++blkp) { 91 /* 92 * Get the IFILE entry (only once) and see if the file still 93 * exists. 94 */ 95 if (lastino != blkp->bi_inode) { 96 if (lastino != LFS_UNUSED_INUM) { 97 lfs_updatemeta(sp); 98 lfs_writeinode(fs, sp, ip); 99 vput(vp); 100 sp->vp = NULL; 101 } 102 lastino = blkp->bi_inode; 103 if (blkp->bi_inode == LFS_IFILE_INUM) 104 v_daddr = fs->lfs_idaddr; 105 else { 106 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); 107 v_daddr = ifp->if_daddr; 108 brelse(bp); 109 } 110 if (v_daddr == LFS_UNUSED_DADDR) 111 continue; 112 /* Get the vnode/inode. */ 113 if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp, 114 blkp->bi_lbn == LFS_UNUSED_LBN ? NULL : 115 blkp->bi_bp)) { 116 #ifdef DIAGNOSTIC 117 printf("lfs_markv: VFS_VGET failed (%d)\n", 118 blkp->bi_inode); 119 #endif 120 lastino = LFS_UNUSED_INUM; 121 v_daddr == LFS_UNUSED_DADDR; 122 continue; 123 } 124 sp->vp = vp; 125 ip = VTOI(vp); 126 } else if (v_daddr == LFS_UNUSED_DADDR) 127 continue; 128 129 /* If this BLOCK_INFO didn't contain a block, keep going. */ 130 if (blkp->bi_lbn == LFS_UNUSED_LBN) 131 continue; 132 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr) || 133 b_daddr != blkp->bi_daddr) 134 continue; 135 /* 136 * If we got to here, then we are keeping the block. If it 137 * is an indirect block, we want to actually put it in the 138 * buffer cache so that it can be updated in the finish_meta 139 * section. If it's not, we need to allocate a fake buffer 140 * so that writeseg can perform the copyin and write the buffer. 141 */ 142 if (blkp->bi_lbn >= 0) /* Data Block */ 143 bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize, 144 blkp->bi_bp); 145 else { 146 bp = getblk(vp, blkp->bi_lbn, bsize); 147 if (!(bp->b_flags & B_CACHE) && 148 (error = copyin(blkp->bi_bp, bp->b_un.b_addr, 149 bsize))) 150 goto err2; 151 if (error = VOP_BWRITE(bp)) 152 goto err2; 153 } 154 while (lfs_gatherblock(sp, bp, NULL)); 155 } 156 if (sp->vp) { 157 lfs_updatemeta(sp); 158 lfs_writeinode(fs, sp, ip); 159 vput(vp); 160 } 161 (void) lfs_writeseg(fs, sp); 162 lfs_segunlock(fs); 163 free(start, M_SEGMENT); 164 free(sp->bpp, M_SEGMENT); 165 free(sp, M_SEGMENT); 166 return (error); 167 /* 168 * XXX If we come in to error 2, we might have indirect blocks that were 169 * updated and now have bad block pointers. I don't know what to do 170 * about this. 171 */ 172 173 err2: vput(vp); 174 /* Free up fakebuffers */ 175 for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp) 176 if ((*bpp)->b_flags & B_CALL) { 177 brelvp(*bpp); 178 free(*bpp, M_SEGMENT); 179 } else 180 brelse(*bpp); 181 lfs_segunlock(fs); 182 err1: 183 free(sp->bpp, M_SEGMENT); 184 free(sp, M_SEGMENT); 185 free(start, M_SEGMENT); 186 return(error); 187 } 188 189 /* 190 * lfs_bmapv: 191 * 192 * This will fill in the current disk address for arrays of blocks. 193 * 194 * 0 on success 195 * -1/errno is return on error. 196 */ 197 int 198 lfs_bmapv(p, uap, retval) 199 struct proc *p; 200 struct args { 201 fsid_t fsid; /* file system */ 202 BLOCK_INFO *blkiov; /* block array */ 203 int blkcnt; /* count of block array entries */ 204 } *uap; 205 int *retval; 206 { 207 BLOCK_INFO *blkp; 208 struct mount *mntp; 209 struct vnode *vp; 210 void *start; 211 daddr_t daddr; 212 int cnt, error, step; 213 214 if (error = suser(p->p_ucred, &p->p_acflag)) 215 return (error); 216 if ((mntp = getvfs(&uap->fsid)) == NULL) 217 return (EINVAL); 218 219 cnt = uap->blkcnt; 220 start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 221 if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) { 222 free(blkp, M_SEGMENT); 223 return (error); 224 } 225 226 for (step = cnt; step--; ++blkp) { 227 if (blkp->bi_lbn == LFS_UNUSED_LBN) 228 continue; 229 if (VFS_VGET(mntp, blkp->bi_inode, &vp)) 230 daddr = LFS_UNUSED_DADDR; 231 else { 232 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr)) 233 daddr = LFS_UNUSED_DADDR; 234 vput(vp); 235 } 236 blkp->bi_daddr = daddr; 237 } 238 copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO)); 239 free(start, M_SEGMENT); 240 return (0); 241 } 242 243 /* 244 * lfs_segclean: 245 * 246 * Mark the segment clean. 247 * 248 * 0 on success 249 * -1/errno is return on error. 250 */ 251 int 252 lfs_segclean(p, uap, retval) 253 struct proc *p; 254 struct args { 255 fsid_t fsid; /* file system */ 256 u_long segment; /* segment number */ 257 } *uap; 258 int *retval; 259 { 260 CLEANERINFO *cip; 261 SEGUSE *sup; 262 struct buf *bp; 263 struct mount *mntp; 264 struct lfs *fs; 265 int error; 266 267 if (error = suser(p->p_ucred, &p->p_acflag)) 268 return (error); 269 if ((mntp = getvfs(&uap->fsid)) == NULL) 270 return (EINVAL); 271 272 fs = VFSTOUFS(mntp)->um_lfs; 273 274 LFS_SEGENTRY(sup, fs, uap->segment, bp); 275 fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1; 276 fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) + 277 sup->su_ninos * btodb(fs->lfs_bsize); 278 sup->su_flags &= ~SEGUSE_DIRTY; 279 (void) VOP_BWRITE(bp); 280 281 LFS_CLEANERINFO(cip, fs, bp); 282 ++cip->clean; 283 --cip->dirty; 284 (void) VOP_BWRITE(bp); 285 wakeup(&fs->lfs_avail); 286 return (0); 287 } 288 289 /* 290 * lfs_segwait: 291 * 292 * This will block until a segment in file system fsid is written. A timeout 293 * in milliseconds may be specified which will awake the cleaner automatically. 294 * An fsid of -1 means any file system, and a timeout of 0 means forever. 295 * 296 * 0 on success 297 * 1 on timeout 298 * -1/errno is return on error. 299 */ 300 int 301 lfs_segwait(p, uap, retval) 302 struct proc *p; 303 struct args { 304 fsid_t fsid; /* file system */ 305 struct timeval *tv; /* timeout */ 306 } *uap; 307 int *retval; 308 { 309 extern int lfs_allclean_wakeup; 310 struct mount *mntp; 311 struct timeval atv; 312 void *addr; 313 u_long timeout; 314 int error, s; 315 316 if (error = suser(p->p_ucred, &p->p_acflag)) { 317 return (error); 318 } 319 #ifdef WHEN_QUADS_WORK 320 if (uap->fsid == (fsid_t)-1) 321 addr = &lfs_allclean_wakeup; 322 else { 323 if ((mntp = getvfs(&uap->fsid)) == NULL) 324 return (EINVAL); 325 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 326 } 327 #else 328 if ((mntp = getvfs(&uap->fsid)) == NULL) 329 addr = &lfs_allclean_wakeup; 330 else 331 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 332 #endif 333 334 if (uap->tv) { 335 if (error = copyin(uap->tv, &atv, sizeof(struct timeval))) 336 return (error); 337 if (itimerfix(&atv)) 338 return (EINVAL); 339 s = splclock(); 340 timevaladd(&atv, (struct timeval *)&time); 341 timeout = hzto(&atv); 342 splx(s); 343 } else 344 timeout = 0; 345 346 error = tsleep(addr, PCATCH | PUSER, "segment", timeout); 347 return (error == ERESTART ? EINTR : 0); 348 } 349 350 /* 351 * VFS_VGET call specialized for the cleaner. The cleaner already knows the 352 * daddr from the ifile, so don't look it up again. If the cleaner is 353 * processing IINFO structures, it may have the ondisk inode already, so 354 * don't go retrieving it again. 355 */ 356 int 357 lfs_fastvget(mp, ino, daddr, vpp, dinp) 358 struct mount *mp; 359 ino_t ino; 360 daddr_t daddr; 361 struct vnode **vpp; 362 struct dinode *dinp; 363 { 364 register struct inode *ip; 365 struct vnode *vp; 366 struct ufsmount *ump; 367 struct buf *bp; 368 dev_t dev; 369 int error; 370 371 ump = VFSTOUFS(mp); 372 dev = ump->um_dev; 373 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) 374 return (0); 375 376 /* Allocate new vnode/inode. */ 377 if (error = lfs_vcreate(mp, ino, &vp)) { 378 *vpp = NULL; 379 return (error); 380 } 381 382 /* 383 * Put it onto its hash chain and lock it so that other requests for 384 * this inode will block if they arrive while we are sleeping waiting 385 * for old data structures to be purged or for the contents of the 386 * disk portion of this inode to be read. 387 */ 388 ip = VTOI(vp); 389 ufs_ihashins(ip); 390 391 /* 392 * XXX 393 * This may not need to be here, logically it should go down with 394 * the i_devvp initialization. 395 * Ask Kirk. 396 */ 397 ip->i_lfs = ump->um_lfs; 398 399 /* Read in the disk contents for the inode, copy into the inode. */ 400 if (dinp) 401 if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode))) 402 return (error); 403 else { 404 if (error = bread(ump->um_devvp, daddr, 405 (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) { 406 /* 407 * The inode does not contain anything useful, so it 408 * would be misleading to leave it on its hash chain. 409 * Iput() will return it to the free list. 410 */ 411 ufs_ihashrem(ip); 412 413 /* Unlock and discard unneeded inode. */ 414 ufs_iput(ip); 415 brelse(bp); 416 *vpp = NULL; 417 return (error); 418 } 419 ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino); 420 brelse(bp); 421 } 422 423 /* Inode was just read from user space or disk, make sure it's locked */ 424 ip->i_flag |= ILOCKED; 425 426 /* 427 * Initialize the vnode from the inode, check for aliases. In all 428 * cases re-init ip, the underlying vnode/inode may have changed. 429 */ 430 if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) { 431 ufs_iput(ip); 432 *vpp = NULL; 433 return (error); 434 } 435 /* 436 * Finish inode initialization now that aliasing has been resolved. 437 */ 438 ip->i_devvp = ump->um_devvp; 439 ip->i_flag |= IMOD; 440 ++ump->um_lfs->lfs_uinodes; 441 VREF(ip->i_devvp); 442 *vpp = vp; 443 return (0); 444 } 445 struct buf * 446 lfs_fakebuf(vp, lbn, size, uaddr) 447 struct vnode *vp; 448 int lbn; 449 size_t size; 450 caddr_t uaddr; 451 { 452 struct buf *bp; 453 454 bp = lfs_newbuf(vp, lbn, 0); 455 bp->b_saveaddr = uaddr; 456 bp->b_bufsize = size; 457 bp->b_bcount = size; 458 bp->b_flags |= B_INVAL; 459 return(bp); 460 } 461