1 /*- 2 * Copyright (c) 1991, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_syscalls.c 8.3 (Berkeley) 09/23/93 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/buf.h> 13 #include <sys/mount.h> 14 #include <sys/vnode.h> 15 #include <sys/malloc.h> 16 #include <sys/kernel.h> 17 18 #include <ufs/ufs/quota.h> 19 #include <ufs/ufs/inode.h> 20 #include <ufs/ufs/ufsmount.h> 21 #include <ufs/ufs/ufs_extern.h> 22 23 #include <ufs/lfs/lfs.h> 24 #include <ufs/lfs/lfs_extern.h> 25 #define BUMP_FIP(SP) \ 26 (SP)->fip = (FINFO *) (&(SP)->fip->fi_blocks[(SP)->fip->fi_nblocks]) 27 28 #define INC_FINFO(SP) ++((SEGSUM *)((SP)->segsum))->ss_nfinfo 29 #define DEC_FINFO(SP) --((SEGSUM *)((SP)->segsum))->ss_nfinfo 30 31 /* 32 * Before committing to add something to a segment summary, make sure there 33 * is enough room. S is the bytes added to the summary. 34 */ 35 #define CHECK_SEG(s) \ 36 if (sp->sum_bytes_left < (s)) { \ 37 (void) lfs_writeseg(fs, sp); \ 38 } 39 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t)); 40 41 /* 42 * lfs_markv: 43 * 44 * This will mark inodes and blocks dirty, so they are written into the log. 45 * It will block until all the blocks have been written. The segment create 46 * time passed in the block_info and inode_info structures is used to decide 47 * if the data is valid for each block (in case some process dirtied a block 48 * or inode that is being cleaned between the determination that a block is 49 * live and the lfs_markv call). 50 * 51 * 0 on success 52 * -1/errno is return on error. 53 */ 54 struct lfs_markv_args { 55 fsid_t fsid; /* file system */ 56 BLOCK_INFO *blkiov; /* block array */ 57 int blkcnt; /* count of block array entries */ 58 }; 59 int 60 lfs_markv(p, uap, retval) 61 struct proc *p; 62 struct lfs_markv_args *uap; 63 int *retval; 64 { 65 struct segment *sp; 66 BLOCK_INFO *blkp; 67 IFILE *ifp; 68 struct buf *bp, **bpp; 69 struct inode *ip; 70 struct lfs *fs; 71 struct mount *mntp; 72 struct vnode *vp; 73 void *start; 74 ino_t lastino; 75 daddr_t b_daddr, v_daddr; 76 u_long bsize; 77 int cnt, error; 78 79 if (error = suser(p->p_ucred, &p->p_acflag)) 80 return (error); 81 if ((mntp = getvfs(&uap->fsid)) == NULL) 82 return (EINVAL); 83 84 cnt = uap->blkcnt; 85 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 86 if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO))) 87 goto err1; 88 89 /* Mark blocks/inodes dirty. */ 90 fs = VFSTOUFS(mntp)->um_lfs; 91 bsize = fs->lfs_bsize; 92 error = 0; 93 94 lfs_seglock(fs, SEGM_SYNC | SEGM_CLEAN); 95 sp = fs->lfs_sp; 96 for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM, 97 blkp = start; cnt--; ++blkp) { 98 /* 99 * Get the IFILE entry (only once) and see if the file still 100 * exists. 101 */ 102 if (lastino != blkp->bi_inode) { 103 if (lastino != LFS_UNUSED_INUM) { 104 /* Finish up last file */ 105 lfs_updatemeta(sp); 106 lfs_writeinode(fs, sp, ip); 107 lfs_vunref(vp); 108 if (sp->fip->fi_nblocks) 109 BUMP_FIP(sp); 110 else { 111 DEC_FINFO(sp); 112 sp->sum_bytes_left += 113 sizeof(FINFO) - sizeof(daddr_t); 114 115 } 116 } 117 118 /* Start a new file */ 119 CHECK_SEG(sizeof(FINFO)); 120 sp->sum_bytes_left -= sizeof(FINFO) - sizeof(daddr_t); 121 INC_FINFO(sp); 122 sp->start_lbp = &sp->fip->fi_blocks[0]; 123 sp->vp = NULL; 124 sp->fip->fi_version = blkp->bi_version; 125 sp->fip->fi_nblocks = 0; 126 sp->fip->fi_ino = blkp->bi_inode; 127 lastino = blkp->bi_inode; 128 if (blkp->bi_inode == LFS_IFILE_INUM) 129 v_daddr = fs->lfs_idaddr; 130 else { 131 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); 132 v_daddr = ifp->if_daddr; 133 brelse(bp); 134 } 135 if (v_daddr == LFS_UNUSED_DADDR) 136 continue; 137 138 /* Get the vnode/inode. */ 139 if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp, 140 blkp->bi_lbn == LFS_UNUSED_LBN ? 141 blkp->bi_bp : NULL)) { 142 #ifdef DIAGNOSTIC 143 printf("lfs_markv: VFS_VGET failed (%d)\n", 144 blkp->bi_inode); 145 #endif 146 lastino = LFS_UNUSED_INUM; 147 v_daddr = LFS_UNUSED_DADDR; 148 continue; 149 } 150 sp->vp = vp; 151 ip = VTOI(vp); 152 } else if (v_daddr == LFS_UNUSED_DADDR) 153 continue; 154 155 /* If this BLOCK_INFO didn't contain a block, keep going. */ 156 if (blkp->bi_lbn == LFS_UNUSED_LBN) 157 continue; 158 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr, NULL) || 159 b_daddr != blkp->bi_daddr) 160 continue; 161 /* 162 * If we got to here, then we are keeping the block. If it 163 * is an indirect block, we want to actually put it in the 164 * buffer cache so that it can be updated in the finish_meta 165 * section. If it's not, we need to allocate a fake buffer 166 * so that writeseg can perform the copyin and write the buffer. 167 */ 168 if (blkp->bi_lbn >= 0) /* Data Block */ 169 bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize, 170 blkp->bi_bp); 171 else { 172 bp = getblk(vp, blkp->bi_lbn, bsize, 0, 0); 173 if (!(bp->b_flags & (B_DELWRI | B_DONE | B_CACHE)) && 174 (error = copyin(blkp->bi_bp, bp->b_data, 175 bsize))) 176 goto err2; 177 if (error = VOP_BWRITE(bp)) 178 goto err2; 179 } 180 while (lfs_gatherblock(sp, bp, NULL)); 181 } 182 if (sp->vp) { 183 lfs_updatemeta(sp); 184 lfs_writeinode(fs, sp, ip); 185 lfs_vunref(vp); 186 if (!sp->fip->fi_nblocks) { 187 DEC_FINFO(sp); 188 sp->sum_bytes_left += sizeof(FINFO) - sizeof(daddr_t); 189 } 190 } 191 (void) lfs_writeseg(fs, sp); 192 lfs_segunlock(fs); 193 free(start, M_SEGMENT); 194 return (error); 195 /* 196 * XXX If we come in to error 2, we might have indirect blocks that were 197 * updated and now have bad block pointers. I don't know what to do 198 * about this. 199 */ 200 201 err2: lfs_vunref(vp); 202 /* Free up fakebuffers */ 203 for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp) 204 if ((*bpp)->b_flags & B_CALL) { 205 brelvp(*bpp); 206 free(*bpp, M_SEGMENT); 207 } else 208 brelse(*bpp); 209 lfs_segunlock(fs); 210 err1: 211 free(start, M_SEGMENT); 212 return(error); 213 } 214 215 /* 216 * lfs_bmapv: 217 * 218 * This will fill in the current disk address for arrays of blocks. 219 * 220 * 0 on success 221 * -1/errno is return on error. 222 */ 223 struct lfs_bmapv_args { 224 fsid_t fsid; /* file system */ 225 BLOCK_INFO *blkiov; /* block array */ 226 int blkcnt; /* count of block array entries */ 227 }; 228 int 229 lfs_bmapv(p, uap, retval) 230 struct proc *p; 231 struct lfs_bmapv_args *uap; 232 int *retval; 233 { 234 BLOCK_INFO *blkp; 235 struct mount *mntp; 236 struct vnode *vp; 237 void *start; 238 daddr_t daddr; 239 int cnt, error, step; 240 241 if (error = suser(p->p_ucred, &p->p_acflag)) 242 return (error); 243 if ((mntp = getvfs(&uap->fsid)) == NULL) 244 return (EINVAL); 245 246 cnt = uap->blkcnt; 247 start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 248 if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) { 249 free(blkp, M_SEGMENT); 250 return (error); 251 } 252 253 for (step = cnt; step--; ++blkp) { 254 if (blkp->bi_lbn == LFS_UNUSED_LBN) 255 continue; 256 /* Could be a deadlock ? */ 257 if (VFS_VGET(mntp, blkp->bi_inode, &vp)) 258 daddr = LFS_UNUSED_DADDR; 259 else { 260 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr, NULL)) 261 daddr = LFS_UNUSED_DADDR; 262 vput(vp); 263 } 264 blkp->bi_daddr = daddr; 265 } 266 copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO)); 267 free(start, M_SEGMENT); 268 return (0); 269 } 270 271 /* 272 * lfs_segclean: 273 * 274 * Mark the segment clean. 275 * 276 * 0 on success 277 * -1/errno is return on error. 278 */ 279 struct lfs_segclean_args { 280 fsid_t fsid; /* file system */ 281 u_long segment; /* segment number */ 282 }; 283 int 284 lfs_segclean(p, uap, retval) 285 struct proc *p; 286 struct lfs_segclean_args *uap; 287 int *retval; 288 { 289 CLEANERINFO *cip; 290 SEGUSE *sup; 291 struct buf *bp; 292 struct mount *mntp; 293 struct lfs *fs; 294 int error; 295 296 if (error = suser(p->p_ucred, &p->p_acflag)) 297 return (error); 298 if ((mntp = getvfs(&uap->fsid)) == NULL) 299 return (EINVAL); 300 301 fs = VFSTOUFS(mntp)->um_lfs; 302 303 if (datosn(fs, fs->lfs_curseg) == uap->segment) 304 return (EBUSY); 305 306 LFS_SEGENTRY(sup, fs, uap->segment, bp); 307 if (sup->su_flags & SEGUSE_ACTIVE) { 308 brelse(bp); 309 return(EBUSY); 310 } 311 fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1; 312 fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) + 313 sup->su_ninos * btodb(fs->lfs_bsize); 314 sup->su_flags &= ~SEGUSE_DIRTY; 315 (void) VOP_BWRITE(bp); 316 317 LFS_CLEANERINFO(cip, fs, bp); 318 ++cip->clean; 319 --cip->dirty; 320 (void) VOP_BWRITE(bp); 321 wakeup(&fs->lfs_avail); 322 return (0); 323 } 324 325 /* 326 * lfs_segwait: 327 * 328 * This will block until a segment in file system fsid is written. A timeout 329 * in milliseconds may be specified which will awake the cleaner automatically. 330 * An fsid of -1 means any file system, and a timeout of 0 means forever. 331 * 332 * 0 on success 333 * 1 on timeout 334 * -1/errno is return on error. 335 */ 336 struct lfs_segwait_args { 337 fsid_t fsid; /* file system */ 338 struct timeval *tv; /* timeout */ 339 }; 340 int 341 lfs_segwait(p, uap, retval) 342 struct proc *p; 343 struct lfs_segwait_args *uap; 344 int *retval; 345 { 346 extern int lfs_allclean_wakeup; 347 struct mount *mntp; 348 struct timeval atv; 349 void *addr; 350 u_long timeout; 351 int error, s; 352 353 if (error = suser(p->p_ucred, &p->p_acflag)) { 354 return (error); 355 } 356 #ifdef WHEN_QUADS_WORK 357 if (uap->fsid == (fsid_t)-1) 358 addr = &lfs_allclean_wakeup; 359 else { 360 if ((mntp = getvfs(&uap->fsid)) == NULL) 361 return (EINVAL); 362 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 363 } 364 #else 365 if ((mntp = getvfs(&uap->fsid)) == NULL) 366 addr = &lfs_allclean_wakeup; 367 else 368 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 369 #endif 370 371 if (uap->tv) { 372 if (error = copyin(uap->tv, &atv, sizeof(struct timeval))) 373 return (error); 374 if (itimerfix(&atv)) 375 return (EINVAL); 376 s = splclock(); 377 timevaladd(&atv, (struct timeval *)&time); 378 timeout = hzto(&atv); 379 splx(s); 380 } else 381 timeout = 0; 382 383 error = tsleep(addr, PCATCH | PUSER, "segment", timeout); 384 return (error == ERESTART ? EINTR : 0); 385 } 386 387 /* 388 * VFS_VGET call specialized for the cleaner. The cleaner already knows the 389 * daddr from the ifile, so don't look it up again. If the cleaner is 390 * processing IINFO structures, it may have the ondisk inode already, so 391 * don't go retrieving it again. 392 */ 393 int 394 lfs_fastvget(mp, ino, daddr, vpp, dinp) 395 struct mount *mp; 396 ino_t ino; 397 daddr_t daddr; 398 struct vnode **vpp; 399 struct dinode *dinp; 400 { 401 register struct inode *ip; 402 struct vnode *vp; 403 struct ufsmount *ump; 404 struct buf *bp; 405 dev_t dev; 406 int error; 407 408 ump = VFSTOUFS(mp); 409 dev = ump->um_dev; 410 /* 411 * This is playing fast and loose. Someone may have the inode 412 * locked, in which case they are going to be distinctly unhappy 413 * if we trash something. 414 */ 415 if ((*vpp = ufs_ihashlookup(dev, ino)) != NULL) { 416 lfs_vref(*vpp); 417 if ((*vpp)->v_flag & VXLOCK) 418 printf ("Cleaned vnode VXLOCKED\n"); 419 ip = VTOI(*vpp); 420 if (ip->i_flags & IN_LOCKED) 421 printf("cleaned vnode locked\n"); 422 if (!(ip->i_flag & IN_MODIFIED)) { 423 ++ump->um_lfs->lfs_uinodes; 424 ip->i_flag |= IN_MODIFIED; 425 } 426 ip->i_flag |= IN_MODIFIED; 427 return (0); 428 } 429 430 /* Allocate new vnode/inode. */ 431 if (error = lfs_vcreate(mp, ino, &vp)) { 432 *vpp = NULL; 433 return (error); 434 } 435 436 /* 437 * Put it onto its hash chain and lock it so that other requests for 438 * this inode will block if they arrive while we are sleeping waiting 439 * for old data structures to be purged or for the contents of the 440 * disk portion of this inode to be read. 441 */ 442 ip = VTOI(vp); 443 ufs_ihashins(ip); 444 445 /* 446 * XXX 447 * This may not need to be here, logically it should go down with 448 * the i_devvp initialization. 449 * Ask Kirk. 450 */ 451 ip->i_lfs = ump->um_lfs; 452 453 /* Read in the disk contents for the inode, copy into the inode. */ 454 if (dinp) 455 if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode))) 456 return (error); 457 else { 458 if (error = bread(ump->um_devvp, daddr, 459 (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) { 460 /* 461 * The inode does not contain anything useful, so it 462 * would be misleading to leave it on its hash chain. 463 * Iput() will return it to the free list. 464 */ 465 ufs_ihashrem(ip); 466 467 /* Unlock and discard unneeded inode. */ 468 lfs_vunref(vp); 469 brelse(bp); 470 *vpp = NULL; 471 return (error); 472 } 473 ip->i_din = 474 *lfs_ifind(ump->um_lfs, ino, (struct dinode *)bp->b_data); 475 brelse(bp); 476 } 477 478 /* Inode was just read from user space or disk, make sure it's locked */ 479 ip->i_flag |= IN_LOCKED; 480 481 /* 482 * Initialize the vnode from the inode, check for aliases. In all 483 * cases re-init ip, the underlying vnode/inode may have changed. 484 */ 485 if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) { 486 lfs_vunref(vp); 487 *vpp = NULL; 488 return (error); 489 } 490 /* 491 * Finish inode initialization now that aliasing has been resolved. 492 */ 493 ip->i_devvp = ump->um_devvp; 494 ip->i_flag |= IN_MODIFIED; 495 ++ump->um_lfs->lfs_uinodes; 496 VREF(ip->i_devvp); 497 *vpp = vp; 498 return (0); 499 } 500 struct buf * 501 lfs_fakebuf(vp, lbn, size, uaddr) 502 struct vnode *vp; 503 int lbn; 504 size_t size; 505 caddr_t uaddr; 506 { 507 struct buf *bp; 508 509 bp = lfs_newbuf(vp, lbn, 0); 510 bp->b_saveaddr = uaddr; 511 bp->b_bufsize = size; 512 bp->b_bcount = size; 513 bp->b_flags |= B_INVAL; 514 return(bp); 515 } 516