1 /*- 2 * Copyright (c) 1991 The Regents of the University of California. 3 * All rights reserved. 4 * 5 * %sccs.include.redist.c% 6 * 7 * @(#)lfs_syscalls.c 7.23 (Berkeley) 09/03/92 8 */ 9 10 #include <sys/param.h> 11 #include <sys/proc.h> 12 #include <sys/buf.h> 13 #include <sys/mount.h> 14 #include <sys/vnode.h> 15 #include <sys/malloc.h> 16 #include <sys/kernel.h> 17 18 #include <ufs/ufs/quota.h> 19 #include <ufs/ufs/inode.h> 20 #include <ufs/ufs/ufsmount.h> 21 #include <ufs/ufs/ufs_extern.h> 22 23 #include <ufs/lfs/lfs.h> 24 #include <ufs/lfs/lfs_extern.h> 25 #define INC_FINFO(SP) \ 26 ++((SEGSUM *)((SP)->segsum))->ss_nfinfo 27 28 struct buf *lfs_fakebuf __P((struct vnode *, int, size_t, caddr_t)); 29 30 /* 31 * lfs_markv: 32 * 33 * This will mark inodes and blocks dirty, so they are written into the log. 34 * It will block until all the blocks have been written. The segment create 35 * time passed in the block_info and inode_info structures is used to decide 36 * if the data is valid for each block (in case some process dirtied a block 37 * or inode that is being cleaned between the determination that a block is 38 * live and the lfs_markv call). 39 * 40 * 0 on success 41 * -1/errno is return on error. 42 */ 43 int 44 lfs_markv(p, uap, retval) 45 struct proc *p; 46 struct args { 47 fsid_t fsid; /* file system */ 48 BLOCK_INFO *blkiov; /* block array */ 49 int blkcnt; /* count of block array entries */ 50 } *uap; 51 int *retval; 52 { 53 struct segment *sp; 54 BLOCK_INFO *blkp; 55 IFILE *ifp; 56 struct buf *bp, **bpp; 57 struct inode *ip; 58 struct lfs *fs; 59 struct mount *mntp; 60 struct vnode *vp; 61 void *start; 62 ino_t lastino; 63 daddr_t b_daddr, v_daddr; 64 u_long bsize; 65 int cnt, error; 66 67 if (error = suser(p->p_ucred, &p->p_acflag)) 68 return (error); 69 if ((mntp = getvfs(&uap->fsid)) == NULL) 70 return (EINVAL); 71 /* Initialize a segment. */ 72 sp = malloc(sizeof(struct segment), M_SEGMENT, M_WAITOK); 73 sp->bpp = malloc(((LFS_SUMMARY_SIZE - sizeof(SEGSUM)) / 74 sizeof(daddr_t) + 1) * sizeof(struct buf *), M_SEGMENT, M_WAITOK); 75 sp->seg_flags = SEGM_CKP; 76 sp->vp = NULL; 77 78 cnt = uap->blkcnt; 79 start = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 80 if (error = copyin(uap->blkiov, start, cnt * sizeof(BLOCK_INFO))) 81 goto err1; 82 83 /* Mark blocks/inodes dirty. */ 84 fs = VFSTOUFS(mntp)->um_lfs; 85 bsize = fs->lfs_bsize; 86 error = 0; 87 88 lfs_seglock(fs); 89 lfs_initseg(fs, sp); 90 sp->seg_flags |= SEGM_CLEAN; 91 for (v_daddr = LFS_UNUSED_DADDR, lastino = LFS_UNUSED_INUM, 92 blkp = start; cnt--; ++blkp) { 93 /* 94 * Get the IFILE entry (only once) and see if the file still 95 * exists. 96 */ 97 if (lastino != blkp->bi_inode) { 98 if (lastino != LFS_UNUSED_INUM) { 99 lfs_updatemeta(sp); 100 lfs_writeinode(fs, sp, ip); 101 vput(vp); 102 if (sp->fip->fi_nblocks) { 103 INC_FINFO(sp); 104 sp->fip = 105 (FINFO *) (&sp->fip->fi_blocks[sp->fip->fi_nblocks]); 106 } 107 sp->start_lbp = &sp->fip->fi_blocks[0]; 108 sp->fip->fi_version = blkp->bi_version; 109 sp->fip->fi_nblocks = 0; 110 sp->fip->fi_ino = blkp->bi_inode; 111 sp->vp = NULL; 112 } 113 lastino = blkp->bi_inode; 114 if (blkp->bi_inode == LFS_IFILE_INUM) 115 v_daddr = fs->lfs_idaddr; 116 else { 117 LFS_IENTRY(ifp, fs, blkp->bi_inode, bp); 118 v_daddr = ifp->if_daddr; 119 brelse(bp); 120 } 121 if (v_daddr == LFS_UNUSED_DADDR) 122 continue; 123 /* Get the vnode/inode. */ 124 if (lfs_fastvget(mntp, blkp->bi_inode, v_daddr, &vp, 125 blkp->bi_lbn == LFS_UNUSED_LBN ? NULL : 126 blkp->bi_bp)) { 127 #ifdef DIAGNOSTIC 128 printf("lfs_markv: VFS_VGET failed (%d)\n", 129 blkp->bi_inode); 130 #endif 131 lastino = LFS_UNUSED_INUM; 132 v_daddr == LFS_UNUSED_DADDR; 133 continue; 134 } 135 sp->vp = vp; 136 ip = VTOI(vp); 137 } else if (v_daddr == LFS_UNUSED_DADDR) 138 continue; 139 140 /* If this BLOCK_INFO didn't contain a block, keep going. */ 141 if (blkp->bi_lbn == LFS_UNUSED_LBN) 142 continue; 143 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &b_daddr) || 144 b_daddr != blkp->bi_daddr) 145 continue; 146 /* 147 * If we got to here, then we are keeping the block. If it 148 * is an indirect block, we want to actually put it in the 149 * buffer cache so that it can be updated in the finish_meta 150 * section. If it's not, we need to allocate a fake buffer 151 * so that writeseg can perform the copyin and write the buffer. 152 */ 153 if (blkp->bi_lbn >= 0) /* Data Block */ 154 bp = lfs_fakebuf(vp, blkp->bi_lbn, bsize, 155 blkp->bi_bp); 156 else { 157 bp = getblk(vp, blkp->bi_lbn, bsize); 158 if (!(bp->b_flags & B_CACHE) && 159 (error = copyin(blkp->bi_bp, bp->b_un.b_addr, 160 bsize))) 161 goto err2; 162 if (error = VOP_BWRITE(bp)) 163 goto err2; 164 } 165 while (lfs_gatherblock(sp, bp, NULL)); 166 } 167 if (sp->vp) { 168 if (sp->fip->fi_nblocks) 169 INC_FINFO(sp); 170 lfs_updatemeta(sp); 171 lfs_writeinode(fs, sp, ip); 172 vput(vp); 173 } 174 (void) lfs_writeseg(fs, sp); 175 lfs_segunlock(fs); 176 free(start, M_SEGMENT); 177 free(sp->bpp, M_SEGMENT); 178 free(sp, M_SEGMENT); 179 return (error); 180 /* 181 * XXX If we come in to error 2, we might have indirect blocks that were 182 * updated and now have bad block pointers. I don't know what to do 183 * about this. 184 */ 185 186 err2: vput(vp); 187 /* Free up fakebuffers */ 188 for (bpp = --sp->cbpp; bpp >= sp->bpp; --bpp) 189 if ((*bpp)->b_flags & B_CALL) { 190 brelvp(*bpp); 191 free(*bpp, M_SEGMENT); 192 } else 193 brelse(*bpp); 194 lfs_segunlock(fs); 195 err1: 196 free(sp->bpp, M_SEGMENT); 197 free(sp, M_SEGMENT); 198 free(start, M_SEGMENT); 199 return(error); 200 } 201 202 /* 203 * lfs_bmapv: 204 * 205 * This will fill in the current disk address for arrays of blocks. 206 * 207 * 0 on success 208 * -1/errno is return on error. 209 */ 210 int 211 lfs_bmapv(p, uap, retval) 212 struct proc *p; 213 struct args { 214 fsid_t fsid; /* file system */ 215 BLOCK_INFO *blkiov; /* block array */ 216 int blkcnt; /* count of block array entries */ 217 } *uap; 218 int *retval; 219 { 220 BLOCK_INFO *blkp; 221 struct mount *mntp; 222 struct vnode *vp; 223 void *start; 224 daddr_t daddr; 225 int cnt, error, step; 226 227 if (error = suser(p->p_ucred, &p->p_acflag)) 228 return (error); 229 if ((mntp = getvfs(&uap->fsid)) == NULL) 230 return (EINVAL); 231 232 cnt = uap->blkcnt; 233 start = blkp = malloc(cnt * sizeof(BLOCK_INFO), M_SEGMENT, M_WAITOK); 234 if (error = copyin(uap->blkiov, blkp, cnt * sizeof(BLOCK_INFO))) { 235 free(blkp, M_SEGMENT); 236 return (error); 237 } 238 239 for (step = cnt; step--; ++blkp) { 240 if (blkp->bi_lbn == LFS_UNUSED_LBN) 241 continue; 242 if (VFS_VGET(mntp, blkp->bi_inode, &vp)) 243 daddr = LFS_UNUSED_DADDR; 244 else { 245 if (VOP_BMAP(vp, blkp->bi_lbn, NULL, &daddr)) 246 daddr = LFS_UNUSED_DADDR; 247 vput(vp); 248 } 249 blkp->bi_daddr = daddr; 250 } 251 copyout(start, uap->blkiov, cnt * sizeof(BLOCK_INFO)); 252 free(start, M_SEGMENT); 253 return (0); 254 } 255 256 /* 257 * lfs_segclean: 258 * 259 * Mark the segment clean. 260 * 261 * 0 on success 262 * -1/errno is return on error. 263 */ 264 int 265 lfs_segclean(p, uap, retval) 266 struct proc *p; 267 struct args { 268 fsid_t fsid; /* file system */ 269 u_long segment; /* segment number */ 270 } *uap; 271 int *retval; 272 { 273 CLEANERINFO *cip; 274 SEGUSE *sup; 275 struct buf *bp; 276 struct mount *mntp; 277 struct lfs *fs; 278 int error; 279 280 if (error = suser(p->p_ucred, &p->p_acflag)) 281 return (error); 282 if ((mntp = getvfs(&uap->fsid)) == NULL) 283 return (EINVAL); 284 285 fs = VFSTOUFS(mntp)->um_lfs; 286 287 LFS_SEGENTRY(sup, fs, uap->segment, bp); 288 fs->lfs_avail += fsbtodb(fs, fs->lfs_ssize) - 1; 289 fs->lfs_bfree += (sup->su_nsums * LFS_SUMMARY_SIZE / DEV_BSIZE) + 290 sup->su_ninos * btodb(fs->lfs_bsize); 291 sup->su_flags &= ~SEGUSE_DIRTY; 292 (void) VOP_BWRITE(bp); 293 294 LFS_CLEANERINFO(cip, fs, bp); 295 ++cip->clean; 296 --cip->dirty; 297 (void) VOP_BWRITE(bp); 298 wakeup(&fs->lfs_avail); 299 return (0); 300 } 301 302 /* 303 * lfs_segwait: 304 * 305 * This will block until a segment in file system fsid is written. A timeout 306 * in milliseconds may be specified which will awake the cleaner automatically. 307 * An fsid of -1 means any file system, and a timeout of 0 means forever. 308 * 309 * 0 on success 310 * 1 on timeout 311 * -1/errno is return on error. 312 */ 313 int 314 lfs_segwait(p, uap, retval) 315 struct proc *p; 316 struct args { 317 fsid_t fsid; /* file system */ 318 struct timeval *tv; /* timeout */ 319 } *uap; 320 int *retval; 321 { 322 extern int lfs_allclean_wakeup; 323 struct mount *mntp; 324 struct timeval atv; 325 void *addr; 326 u_long timeout; 327 int error, s; 328 329 if (error = suser(p->p_ucred, &p->p_acflag)) { 330 return (error); 331 } 332 #ifdef WHEN_QUADS_WORK 333 if (uap->fsid == (fsid_t)-1) 334 addr = &lfs_allclean_wakeup; 335 else { 336 if ((mntp = getvfs(&uap->fsid)) == NULL) 337 return (EINVAL); 338 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 339 } 340 #else 341 if ((mntp = getvfs(&uap->fsid)) == NULL) 342 addr = &lfs_allclean_wakeup; 343 else 344 addr = &VFSTOUFS(mntp)->um_lfs->lfs_nextseg; 345 #endif 346 347 if (uap->tv) { 348 if (error = copyin(uap->tv, &atv, sizeof(struct timeval))) 349 return (error); 350 if (itimerfix(&atv)) 351 return (EINVAL); 352 s = splclock(); 353 timevaladd(&atv, (struct timeval *)&time); 354 timeout = hzto(&atv); 355 splx(s); 356 } else 357 timeout = 0; 358 359 error = tsleep(addr, PCATCH | PUSER, "segment", timeout); 360 return (error == ERESTART ? EINTR : 0); 361 } 362 363 /* 364 * VFS_VGET call specialized for the cleaner. The cleaner already knows the 365 * daddr from the ifile, so don't look it up again. If the cleaner is 366 * processing IINFO structures, it may have the ondisk inode already, so 367 * don't go retrieving it again. 368 */ 369 int 370 lfs_fastvget(mp, ino, daddr, vpp, dinp) 371 struct mount *mp; 372 ino_t ino; 373 daddr_t daddr; 374 struct vnode **vpp; 375 struct dinode *dinp; 376 { 377 register struct inode *ip; 378 struct vnode *vp; 379 struct ufsmount *ump; 380 struct buf *bp; 381 dev_t dev; 382 int error; 383 384 ump = VFSTOUFS(mp); 385 dev = ump->um_dev; 386 if ((*vpp = ufs_ihashget(dev, ino)) != NULL) { 387 ip = VTOI(*vpp); 388 ip->i_flag |= IMOD; 389 return (0); 390 } 391 392 /* Allocate new vnode/inode. */ 393 if (error = lfs_vcreate(mp, ino, &vp)) { 394 *vpp = NULL; 395 return (error); 396 } 397 398 /* 399 * Put it onto its hash chain and lock it so that other requests for 400 * this inode will block if they arrive while we are sleeping waiting 401 * for old data structures to be purged or for the contents of the 402 * disk portion of this inode to be read. 403 */ 404 ip = VTOI(vp); 405 ufs_ihashins(ip); 406 407 /* 408 * XXX 409 * This may not need to be here, logically it should go down with 410 * the i_devvp initialization. 411 * Ask Kirk. 412 */ 413 ip->i_lfs = ump->um_lfs; 414 415 /* Read in the disk contents for the inode, copy into the inode. */ 416 if (dinp) 417 if (error = copyin(dinp, &ip->i_din, sizeof(struct dinode))) 418 return (error); 419 else { 420 if (error = bread(ump->um_devvp, daddr, 421 (int)ump->um_lfs->lfs_bsize, NOCRED, &bp)) { 422 /* 423 * The inode does not contain anything useful, so it 424 * would be misleading to leave it on its hash chain. 425 * Iput() will return it to the free list. 426 */ 427 ufs_ihashrem(ip); 428 429 /* Unlock and discard unneeded inode. */ 430 ufs_iput(ip); 431 brelse(bp); 432 *vpp = NULL; 433 return (error); 434 } 435 ip->i_din = *lfs_ifind(ump->um_lfs, ino, bp->b_un.b_dino); 436 brelse(bp); 437 } 438 439 /* Inode was just read from user space or disk, make sure it's locked */ 440 ip->i_flag |= ILOCKED; 441 442 /* 443 * Initialize the vnode from the inode, check for aliases. In all 444 * cases re-init ip, the underlying vnode/inode may have changed. 445 */ 446 if (error = ufs_vinit(mp, lfs_specop_p, LFS_FIFOOPS, &vp)) { 447 ufs_iput(ip); 448 *vpp = NULL; 449 return (error); 450 } 451 /* 452 * Finish inode initialization now that aliasing has been resolved. 453 */ 454 ip->i_devvp = ump->um_devvp; 455 ip->i_flag |= IMOD; 456 ++ump->um_lfs->lfs_uinodes; 457 VREF(ip->i_devvp); 458 *vpp = vp; 459 return (0); 460 } 461 struct buf * 462 lfs_fakebuf(vp, lbn, size, uaddr) 463 struct vnode *vp; 464 int lbn; 465 size_t size; 466 caddr_t uaddr; 467 { 468 struct buf *bp; 469 470 bp = lfs_newbuf(vp, lbn, 0); 471 bp->b_saveaddr = uaddr; 472 bp->b_bufsize = size; 473 bp->b_bcount = size; 474 bp->b_flags |= B_INVAL; 475 return(bp); 476 } 477