1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include <sys/param.h> 38 #include <sys/systm.h> 39 #include <sys/bio.h> 40 #include <sys/buf.h> 41 #include <sys/proc.h> 42 #include <sys/rwlock.h> 43 #include <sys/vnode.h> 44 #include <sys/mount.h> 45 #include <sys/racct.h> 46 #include <sys/resourcevar.h> 47 #include <sys/stat.h> 48 49 #include <vm/vm.h> 50 #include <vm/vm_object.h> 51 #include <vm/vnode_pager.h> 52 53 #include <ufs/ufs/extattr.h> 54 #include <ufs/ufs/quota.h> 55 #include <ufs/ufs/inode.h> 56 #include <ufs/ufs/ufsmount.h> 57 #include <ufs/ufs/ufs_extern.h> 58 59 static ufs_lbn_t lbn_count(struct ufsmount *, int); 60 static int readindir(struct vnode *, ufs_lbn_t, ufs2_daddr_t, struct buf **); 61 62 /* 63 * Bmap converts the logical block number of a file to its physical block 64 * number on the disk. The conversion is done by using the logical block 65 * number to index into the array of block pointers described by the dinode. 66 */ 67 int 68 ufs_bmap( 69 struct vop_bmap_args /* { 70 struct vnode *a_vp; 71 daddr_t a_bn; 72 struct bufobj **a_bop; 73 daddr_t *a_bnp; 74 int *a_runp; 75 int *a_runb; 76 } */ *ap) 77 { 78 ufs2_daddr_t blkno; 79 int error; 80 81 /* 82 * Check for underlying vnode requests and ensure that logical 83 * to physical mapping is requested. 84 */ 85 if (ap->a_bop != NULL) 86 *ap->a_bop = &VFSTOUFS(ap->a_vp->v_mount)->um_devvp->v_bufobj; 87 if (ap->a_bnp == NULL) 88 return (0); 89 90 error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL, 91 ap->a_runp, ap->a_runb); 92 *ap->a_bnp = blkno; 93 return (error); 94 } 95 96 static int 97 readindir(struct vnode *vp, 98 ufs_lbn_t lbn, 99 ufs2_daddr_t daddr, 100 struct buf **bpp) 101 { 102 struct buf *bp; 103 struct mount *mp; 104 struct ufsmount *ump; 105 int error; 106 107 mp = vp->v_mount; 108 ump = VFSTOUFS(mp); 109 110 bp = getblk(vp, lbn, mp->mnt_stat.f_iosize, 0, 0, 0); 111 if ((bp->b_flags & B_CACHE) == 0) { 112 KASSERT(daddr != 0, 113 ("readindir: indirect block not in cache")); 114 115 bp->b_blkno = blkptrtodb(ump, daddr); 116 bp->b_iocmd = BIO_READ; 117 bp->b_flags &= ~B_INVAL; 118 bp->b_ioflags &= ~BIO_ERROR; 119 vfs_busy_pages(bp, 0); 120 bp->b_iooffset = dbtob(bp->b_blkno); 121 bstrategy(bp); 122 #ifdef RACCT 123 if (racct_enable) { 124 PROC_LOCK(curproc); 125 racct_add_buf(curproc, bp, 0); 126 PROC_UNLOCK(curproc); 127 } 128 #endif 129 curthread->td_ru.ru_inblock++; 130 error = bufwait(bp); 131 if (error != 0) { 132 brelse(bp); 133 return (error); 134 } 135 } 136 *bpp = bp; 137 return (0); 138 } 139 140 /* 141 * Indirect blocks are now on the vnode for the file. They are given negative 142 * logical block numbers. Indirect blocks are addressed by the negative 143 * address of the first data block to which they point. Double indirect blocks 144 * are addressed by one less than the address of the first indirect block to 145 * which they point. Triple indirect blocks are addressed by one less than 146 * the address of the first double indirect block to which they point. 147 * 148 * ufs_bmaparray does the bmap conversion, and if requested returns the 149 * array of logical blocks which must be traversed to get to a block. 150 * Each entry contains the offset into that block that gets you to the 151 * next block and the disk address of the block (if it is assigned). 152 */ 153 154 int 155 ufs_bmaparray(struct vnode *vp, 156 ufs2_daddr_t bn, 157 ufs2_daddr_t *bnp, 158 struct buf *nbp, 159 int *runp, 160 int *runb) 161 { 162 struct inode *ip; 163 struct buf *bp; 164 struct ufsmount *ump; 165 struct mount *mp; 166 struct indir a[UFS_NIADDR+1], *ap; 167 ufs2_daddr_t daddr; 168 ufs_lbn_t metalbn; 169 int error, num, maxrun = 0; 170 int *nump; 171 172 ap = NULL; 173 ip = VTOI(vp); 174 mp = vp->v_mount; 175 ump = VFSTOUFS(mp); 176 177 if (runp) { 178 maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; 179 *runp = 0; 180 } 181 182 if (runb) { 183 *runb = 0; 184 } 185 186 ap = a; 187 nump = # 188 error = ufs_getlbns(vp, bn, ap, nump); 189 if (error) 190 return (error); 191 192 num = *nump; 193 if (num == 0) { 194 if (bn >= 0 && bn < UFS_NDADDR) { 195 *bnp = blkptrtodb(ump, DIP(ip, i_db[bn])); 196 } else if (bn < 0 && bn >= -UFS_NXADDR) { 197 *bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]); 198 if (*bnp == 0) 199 *bnp = -1; 200 if (nbp == NULL) { 201 /* indirect block not found */ 202 return (EINVAL); 203 } 204 nbp->b_xflags |= BX_ALTDATA; 205 return (0); 206 } else { 207 /* blkno out of range */ 208 return (EINVAL); 209 } 210 /* 211 * Since this is FFS independent code, we are out of 212 * scope for the definitions of BLK_NOCOPY and 213 * BLK_SNAP, but we do know that they will fall in 214 * the range 1..um_seqinc, so we use that test and 215 * return a request for a zeroed out buffer if attempts 216 * are made to read a BLK_NOCOPY or BLK_SNAP block. 217 */ 218 if (IS_SNAPSHOT(ip) && DIP(ip, i_db[bn]) > 0 && 219 DIP(ip, i_db[bn]) < ump->um_seqinc) { 220 *bnp = -1; 221 } else if (*bnp == 0) { 222 *bnp = IS_SNAPSHOT(ip) ? blkptrtodb(ump, 223 bn * ump->um_seqinc) : -1; 224 } else if (runp) { 225 ufs2_daddr_t bnb = bn; 226 for (++bn; bn < UFS_NDADDR && *runp < maxrun && 227 is_sequential(ump, DIP(ip, i_db[bn - 1]), 228 DIP(ip, i_db[bn])); 229 ++bn, ++*runp); 230 bn = bnb; 231 if (runb && (bn > 0)) { 232 for (--bn; (bn >= 0) && (*runb < maxrun) && 233 is_sequential(ump, DIP(ip, i_db[bn]), 234 DIP(ip, i_db[bn+1])); 235 --bn, ++*runb); 236 } 237 } 238 return (0); 239 } 240 241 /* Get disk address out of indirect block array */ 242 daddr = DIP(ip, i_ib[ap->in_off]); 243 244 for (bp = NULL, ++ap; --num; ++ap) { 245 /* 246 * Exit the loop if there is no disk address assigned yet and 247 * the indirect block isn't in the cache, or if we were 248 * looking for an indirect block and we've found it. 249 */ 250 251 metalbn = ap->in_lbn; 252 if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) 253 break; 254 /* 255 * If we get here, we've either got the block in the cache 256 * or we have a disk address for it, go fetch it. 257 */ 258 if (bp) 259 bqrelse(bp); 260 error = readindir(vp, metalbn, daddr, &bp); 261 if (error != 0) 262 return (error); 263 264 if (I_IS_UFS1(ip)) 265 daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off]; 266 else 267 daddr = ((ufs2_daddr_t *)bp->b_data)[ap->in_off]; 268 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, daddr, 269 mp->mnt_stat.f_iosize)) != 0) { 270 bqrelse(bp); 271 return (error); 272 } 273 if (I_IS_UFS1(ip)) { 274 if (num == 1 && daddr && runp) { 275 for (bn = ap->in_off + 1; 276 bn < MNINDIR(ump) && *runp < maxrun && 277 is_sequential(ump, 278 ((ufs1_daddr_t *)bp->b_data)[bn - 1], 279 ((ufs1_daddr_t *)bp->b_data)[bn]); 280 ++bn, ++*runp); 281 bn = ap->in_off; 282 if (runb && bn) { 283 for (--bn; bn >= 0 && *runb < maxrun && 284 is_sequential(ump, 285 ((ufs1_daddr_t *)bp->b_data)[bn], 286 ((ufs1_daddr_t *)bp->b_data)[bn+1]); 287 --bn, ++*runb); 288 } 289 } 290 continue; 291 } 292 if (num == 1 && daddr && runp) { 293 for (bn = ap->in_off + 1; 294 bn < MNINDIR(ump) && *runp < maxrun && 295 is_sequential(ump, 296 ((ufs2_daddr_t *)bp->b_data)[bn - 1], 297 ((ufs2_daddr_t *)bp->b_data)[bn]); 298 ++bn, ++*runp); 299 bn = ap->in_off; 300 if (runb && bn) { 301 for (--bn; bn >= 0 && *runb < maxrun && 302 is_sequential(ump, 303 ((ufs2_daddr_t *)bp->b_data)[bn], 304 ((ufs2_daddr_t *)bp->b_data)[bn + 1]); 305 --bn, ++*runb); 306 } 307 } 308 } 309 if (bp) 310 bqrelse(bp); 311 312 /* 313 * Since this is FFS independent code, we are out of scope for the 314 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 315 * will fall in the range 1..um_seqinc, so we use that test and 316 * return a request for a zeroed out buffer if attempts are made 317 * to read a BLK_NOCOPY or BLK_SNAP block. 318 */ 319 if (IS_SNAPSHOT(ip) && daddr > 0 && daddr < ump->um_seqinc){ 320 *bnp = -1; 321 return (0); 322 } 323 *bnp = blkptrtodb(ump, daddr); 324 if (*bnp == 0) { 325 if (IS_SNAPSHOT(ip)) 326 *bnp = blkptrtodb(ump, bn * ump->um_seqinc); 327 else 328 *bnp = -1; 329 } 330 return (0); 331 } 332 333 static ufs_lbn_t 334 lbn_count(struct ufsmount *ump, int level) 335 { 336 ufs_lbn_t blockcnt; 337 338 for (blockcnt = 1; level > 0; level--) 339 blockcnt *= MNINDIR(ump); 340 return (blockcnt); 341 } 342 343 int 344 ufs_bmap_seekdata(struct vnode *vp, off_t *offp) 345 { 346 struct buf *bp; 347 struct indir a[UFS_NIADDR + 1], *ap; 348 struct inode *ip; 349 struct mount *mp; 350 struct ufsmount *ump; 351 ufs2_daddr_t bn, daddr, nextbn; 352 uint64_t bsize; 353 off_t numblks; 354 int error, num, num1, off; 355 356 bp = NULL; 357 error = 0; 358 ip = VTOI(vp); 359 mp = vp->v_mount; 360 ump = VFSTOUFS(mp); 361 362 if (vp->v_type != VREG || IS_SNAPSHOT(ip)) 363 return (EINVAL); 364 if (*offp < 0 || *offp >= ip->i_size) 365 return (ENXIO); 366 367 /* 368 * We could have pages on the vnode' object queue which still 369 * do not have the data blocks allocated. Convert all dirty 370 * pages into buffer writes to ensure that we see all 371 * allocated data. 372 */ 373 vnode_pager_clean_sync(vp); 374 375 bsize = mp->mnt_stat.f_iosize; 376 for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize); 377 bn < numblks; bn = nextbn) { 378 if (bn < UFS_NDADDR) { 379 daddr = DIP(ip, i_db[bn]); 380 if (daddr != 0) 381 break; 382 nextbn = bn + 1; 383 continue; 384 } 385 386 ap = a; 387 error = ufs_getlbns(vp, bn, ap, &num); 388 if (error != 0) 389 break; 390 MPASS(num >= 2); 391 daddr = DIP(ip, i_ib[ap->in_off]); 392 ap++, num--; 393 for (nextbn = UFS_NDADDR, num1 = num - 1; num1 > 0; num1--) 394 nextbn += lbn_count(ump, num1); 395 if (daddr == 0) { 396 nextbn += lbn_count(ump, num); 397 continue; 398 } 399 400 for (; daddr != 0 && num > 0; ap++, num--) { 401 if (bp != NULL) 402 bqrelse(bp); 403 error = readindir(vp, ap->in_lbn, daddr, &bp); 404 if (error != 0) 405 return (error); 406 407 /* 408 * Scan the indirect block until we find a non-zero 409 * pointer. 410 */ 411 off = ap->in_off; 412 do { 413 daddr = I_IS_UFS1(ip) ? 414 ((ufs1_daddr_t *)bp->b_data)[off] : 415 ((ufs2_daddr_t *)bp->b_data)[off]; 416 } while (daddr == 0 && ++off < MNINDIR(ump)); 417 nextbn += off * lbn_count(ump, num - 1); 418 419 /* 420 * We need to recompute the LBNs of indirect 421 * blocks, so restart with the updated block offset. 422 */ 423 if (off != ap->in_off) 424 break; 425 } 426 if (num == 0) { 427 /* 428 * We found a data block. 429 */ 430 bn = nextbn; 431 break; 432 } 433 } 434 if (bp != NULL) 435 bqrelse(bp); 436 if (bn >= numblks) 437 error = ENXIO; 438 if (error == 0 && *offp < bn * bsize) 439 *offp = bn * bsize; 440 return (error); 441 } 442 443 /* 444 * Create an array of logical block number/offset pairs which represent the 445 * path of indirect blocks required to access a data block. The first "pair" 446 * contains the logical block number of the appropriate single, double or 447 * triple indirect block and the offset into the inode indirect block array. 448 * Note, the logical block number of the inode single/double/triple indirect 449 * block appears twice in the array, once with the offset into the i_ib and 450 * once with the offset into the page itself. 451 */ 452 int 453 ufs_getlbns(struct vnode *vp, 454 ufs2_daddr_t bn, 455 struct indir *ap, 456 int *nump) 457 { 458 ufs2_daddr_t blockcnt; 459 ufs_lbn_t metalbn, realbn; 460 struct ufsmount *ump; 461 int i, numlevels, off; 462 463 ump = VFSTOUFS(vp->v_mount); 464 if (nump) 465 *nump = 0; 466 numlevels = 0; 467 realbn = bn; 468 if (bn < 0) 469 bn = -bn; 470 471 /* The first UFS_NDADDR blocks are direct blocks. */ 472 if (bn < UFS_NDADDR) 473 return (0); 474 475 /* 476 * Determine the number of levels of indirection. After this loop 477 * is done, blockcnt indicates the number of data blocks possible 478 * at the previous level of indirection, and UFS_NIADDR - i is the 479 * number of levels of indirection needed to locate the requested block. 480 */ 481 for (blockcnt = 1, i = UFS_NIADDR, bn -= UFS_NDADDR; ; 482 i--, bn -= blockcnt) { 483 if (i == 0) 484 return (EFBIG); 485 blockcnt *= MNINDIR(ump); 486 if (bn < blockcnt) 487 break; 488 } 489 490 /* Calculate the address of the first meta-block. */ 491 if (realbn >= 0) 492 metalbn = -(realbn - bn + UFS_NIADDR - i); 493 else 494 metalbn = -(-realbn - bn + UFS_NIADDR - i); 495 496 /* 497 * At each iteration, off is the offset into the bap array which is 498 * an array of disk addresses at the current level of indirection. 499 * The logical block number and the offset in that block are stored 500 * into the argument array. 501 */ 502 ap->in_lbn = metalbn; 503 ap->in_off = off = UFS_NIADDR - i; 504 ap++; 505 for (++numlevels; i <= UFS_NIADDR; i++) { 506 /* If searching for a meta-data block, quit when found. */ 507 if (metalbn == realbn) 508 break; 509 510 blockcnt /= MNINDIR(ump); 511 off = (bn / blockcnt) % MNINDIR(ump); 512 513 ++numlevels; 514 ap->in_lbn = metalbn; 515 ap->in_off = off; 516 ++ap; 517 518 metalbn -= -1 + off * blockcnt; 519 } 520 if (nump) 521 *nump = numlevels; 522 return (0); 523 } 524