1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/bio.h> 45 #include <sys/buf.h> 46 #include <sys/proc.h> 47 #include <sys/rwlock.h> 48 #include <sys/vnode.h> 49 #include <sys/mount.h> 50 #include <sys/racct.h> 51 #include <sys/resourcevar.h> 52 #include <sys/stat.h> 53 54 #include <vm/vm.h> 55 #include <vm/vm_object.h> 56 57 #include <ufs/ufs/extattr.h> 58 #include <ufs/ufs/quota.h> 59 #include <ufs/ufs/inode.h> 60 #include <ufs/ufs/ufsmount.h> 61 #include <ufs/ufs/ufs_extern.h> 62 63 static ufs_lbn_t lbn_count(struct ufsmount *, int); 64 static int readindir(struct vnode *, ufs_lbn_t, ufs2_daddr_t, struct buf **); 65 66 /* 67 * Bmap converts the logical block number of a file to its physical block 68 * number on the disk. The conversion is done by using the logical block 69 * number to index into the array of block pointers described by the dinode. 70 */ 71 int 72 ufs_bmap( 73 struct vop_bmap_args /* { 74 struct vnode *a_vp; 75 daddr_t a_bn; 76 struct bufobj **a_bop; 77 daddr_t *a_bnp; 78 int *a_runp; 79 int *a_runb; 80 } */ *ap) 81 { 82 ufs2_daddr_t blkno; 83 int error; 84 85 /* 86 * Check for underlying vnode requests and ensure that logical 87 * to physical mapping is requested. 88 */ 89 if (ap->a_bop != NULL) 90 *ap->a_bop = &VFSTOUFS(ap->a_vp->v_mount)->um_devvp->v_bufobj; 91 if (ap->a_bnp == NULL) 92 return (0); 93 94 error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL, 95 ap->a_runp, ap->a_runb); 96 *ap->a_bnp = blkno; 97 return (error); 98 } 99 100 static int 101 readindir(struct vnode *vp, 102 ufs_lbn_t lbn, 103 ufs2_daddr_t daddr, 104 struct buf **bpp) 105 { 106 struct buf *bp; 107 struct mount *mp; 108 struct ufsmount *ump; 109 int error; 110 111 mp = vp->v_mount; 112 ump = VFSTOUFS(mp); 113 114 bp = getblk(vp, lbn, mp->mnt_stat.f_iosize, 0, 0, 0); 115 if ((bp->b_flags & B_CACHE) == 0) { 116 KASSERT(daddr != 0, 117 ("readindir: indirect block not in cache")); 118 119 bp->b_blkno = blkptrtodb(ump, daddr); 120 bp->b_iocmd = BIO_READ; 121 bp->b_flags &= ~B_INVAL; 122 bp->b_ioflags &= ~BIO_ERROR; 123 vfs_busy_pages(bp, 0); 124 bp->b_iooffset = dbtob(bp->b_blkno); 125 bstrategy(bp); 126 #ifdef RACCT 127 if (racct_enable) { 128 PROC_LOCK(curproc); 129 racct_add_buf(curproc, bp, 0); 130 PROC_UNLOCK(curproc); 131 } 132 #endif 133 curthread->td_ru.ru_inblock++; 134 error = bufwait(bp); 135 if (error != 0) { 136 brelse(bp); 137 return (error); 138 } 139 } 140 *bpp = bp; 141 return (0); 142 } 143 144 /* 145 * Indirect blocks are now on the vnode for the file. They are given negative 146 * logical block numbers. Indirect blocks are addressed by the negative 147 * address of the first data block to which they point. Double indirect blocks 148 * are addressed by one less than the address of the first indirect block to 149 * which they point. Triple indirect blocks are addressed by one less than 150 * the address of the first double indirect block to which they point. 151 * 152 * ufs_bmaparray does the bmap conversion, and if requested returns the 153 * array of logical blocks which must be traversed to get to a block. 154 * Each entry contains the offset into that block that gets you to the 155 * next block and the disk address of the block (if it is assigned). 156 */ 157 158 int 159 ufs_bmaparray(struct vnode *vp, 160 ufs2_daddr_t bn, 161 ufs2_daddr_t *bnp, 162 struct buf *nbp, 163 int *runp, 164 int *runb) 165 { 166 struct inode *ip; 167 struct buf *bp; 168 struct ufsmount *ump; 169 struct mount *mp; 170 struct indir a[UFS_NIADDR+1], *ap; 171 ufs2_daddr_t daddr; 172 ufs_lbn_t metalbn; 173 int error, num, maxrun = 0; 174 int *nump; 175 176 ap = NULL; 177 ip = VTOI(vp); 178 mp = vp->v_mount; 179 ump = VFSTOUFS(mp); 180 181 if (runp) { 182 maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; 183 *runp = 0; 184 } 185 186 if (runb) { 187 *runb = 0; 188 } 189 190 ap = a; 191 nump = # 192 error = ufs_getlbns(vp, bn, ap, nump); 193 if (error) 194 return (error); 195 196 num = *nump; 197 if (num == 0) { 198 if (bn >= 0 && bn < UFS_NDADDR) { 199 *bnp = blkptrtodb(ump, DIP(ip, i_db[bn])); 200 } else if (bn < 0 && bn >= -UFS_NXADDR) { 201 *bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]); 202 if (*bnp == 0) 203 *bnp = -1; 204 if (nbp == NULL) { 205 /* indirect block not found */ 206 return (EINVAL); 207 } 208 nbp->b_xflags |= BX_ALTDATA; 209 return (0); 210 } else { 211 /* blkno out of range */ 212 return (EINVAL); 213 } 214 /* 215 * Since this is FFS independent code, we are out of 216 * scope for the definitions of BLK_NOCOPY and 217 * BLK_SNAP, but we do know that they will fall in 218 * the range 1..um_seqinc, so we use that test and 219 * return a request for a zeroed out buffer if attempts 220 * are made to read a BLK_NOCOPY or BLK_SNAP block. 221 */ 222 if (IS_SNAPSHOT(ip) && DIP(ip, i_db[bn]) > 0 && 223 DIP(ip, i_db[bn]) < ump->um_seqinc) { 224 *bnp = -1; 225 } else if (*bnp == 0) { 226 *bnp = IS_SNAPSHOT(ip) ? blkptrtodb(ump, 227 bn * ump->um_seqinc) : -1; 228 } else if (runp) { 229 ufs2_daddr_t bnb = bn; 230 for (++bn; bn < UFS_NDADDR && *runp < maxrun && 231 is_sequential(ump, DIP(ip, i_db[bn - 1]), 232 DIP(ip, i_db[bn])); 233 ++bn, ++*runp); 234 bn = bnb; 235 if (runb && (bn > 0)) { 236 for (--bn; (bn >= 0) && (*runb < maxrun) && 237 is_sequential(ump, DIP(ip, i_db[bn]), 238 DIP(ip, i_db[bn+1])); 239 --bn, ++*runb); 240 } 241 } 242 return (0); 243 } 244 245 /* Get disk address out of indirect block array */ 246 daddr = DIP(ip, i_ib[ap->in_off]); 247 248 for (bp = NULL, ++ap; --num; ++ap) { 249 /* 250 * Exit the loop if there is no disk address assigned yet and 251 * the indirect block isn't in the cache, or if we were 252 * looking for an indirect block and we've found it. 253 */ 254 255 metalbn = ap->in_lbn; 256 if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) 257 break; 258 /* 259 * If we get here, we've either got the block in the cache 260 * or we have a disk address for it, go fetch it. 261 */ 262 if (bp) 263 bqrelse(bp); 264 error = readindir(vp, metalbn, daddr, &bp); 265 if (error != 0) 266 return (error); 267 268 if (I_IS_UFS1(ip)) 269 daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off]; 270 else 271 daddr = ((ufs2_daddr_t *)bp->b_data)[ap->in_off]; 272 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, daddr, 273 mp->mnt_stat.f_iosize)) != 0) { 274 bqrelse(bp); 275 return (error); 276 } 277 if (I_IS_UFS1(ip)) { 278 if (num == 1 && daddr && runp) { 279 for (bn = ap->in_off + 1; 280 bn < MNINDIR(ump) && *runp < maxrun && 281 is_sequential(ump, 282 ((ufs1_daddr_t *)bp->b_data)[bn - 1], 283 ((ufs1_daddr_t *)bp->b_data)[bn]); 284 ++bn, ++*runp); 285 bn = ap->in_off; 286 if (runb && bn) { 287 for (--bn; bn >= 0 && *runb < maxrun && 288 is_sequential(ump, 289 ((ufs1_daddr_t *)bp->b_data)[bn], 290 ((ufs1_daddr_t *)bp->b_data)[bn+1]); 291 --bn, ++*runb); 292 } 293 } 294 continue; 295 } 296 if (num == 1 && daddr && runp) { 297 for (bn = ap->in_off + 1; 298 bn < MNINDIR(ump) && *runp < maxrun && 299 is_sequential(ump, 300 ((ufs2_daddr_t *)bp->b_data)[bn - 1], 301 ((ufs2_daddr_t *)bp->b_data)[bn]); 302 ++bn, ++*runp); 303 bn = ap->in_off; 304 if (runb && bn) { 305 for (--bn; bn >= 0 && *runb < maxrun && 306 is_sequential(ump, 307 ((ufs2_daddr_t *)bp->b_data)[bn], 308 ((ufs2_daddr_t *)bp->b_data)[bn + 1]); 309 --bn, ++*runb); 310 } 311 } 312 } 313 if (bp) 314 bqrelse(bp); 315 316 /* 317 * Since this is FFS independent code, we are out of scope for the 318 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 319 * will fall in the range 1..um_seqinc, so we use that test and 320 * return a request for a zeroed out buffer if attempts are made 321 * to read a BLK_NOCOPY or BLK_SNAP block. 322 */ 323 if (IS_SNAPSHOT(ip) && daddr > 0 && daddr < ump->um_seqinc){ 324 *bnp = -1; 325 return (0); 326 } 327 *bnp = blkptrtodb(ump, daddr); 328 if (*bnp == 0) { 329 if (IS_SNAPSHOT(ip)) 330 *bnp = blkptrtodb(ump, bn * ump->um_seqinc); 331 else 332 *bnp = -1; 333 } 334 return (0); 335 } 336 337 static ufs_lbn_t 338 lbn_count(struct ufsmount *ump, int level) 339 { 340 ufs_lbn_t blockcnt; 341 342 for (blockcnt = 1; level > 0; level--) 343 blockcnt *= MNINDIR(ump); 344 return (blockcnt); 345 } 346 347 int 348 ufs_bmap_seekdata(struct vnode *vp, off_t *offp) 349 { 350 struct buf *bp; 351 struct indir a[UFS_NIADDR + 1], *ap; 352 struct inode *ip; 353 struct mount *mp; 354 struct ufsmount *ump; 355 vm_object_t obj; 356 ufs2_daddr_t bn, daddr, nextbn; 357 uint64_t bsize; 358 off_t numblks; 359 int error, num, num1, off; 360 361 bp = NULL; 362 error = 0; 363 ip = VTOI(vp); 364 mp = vp->v_mount; 365 ump = VFSTOUFS(mp); 366 367 if (vp->v_type != VREG || IS_SNAPSHOT(ip)) 368 return (EINVAL); 369 if (*offp < 0 || *offp >= ip->i_size) 370 return (ENXIO); 371 372 /* 373 * We could have pages on the vnode' object queue which still 374 * do not have the data blocks allocated. Convert all dirty 375 * pages into buffer writes to ensure that we see all 376 * allocated data. 377 */ 378 obj = vp->v_object; 379 if (obj != NULL) { 380 VM_OBJECT_WLOCK(obj); 381 vm_object_page_clean(obj, 0, 0, OBJPC_SYNC); 382 VM_OBJECT_WUNLOCK(obj); 383 } 384 385 bsize = mp->mnt_stat.f_iosize; 386 for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize); 387 bn < numblks; bn = nextbn) { 388 if (bn < UFS_NDADDR) { 389 daddr = DIP(ip, i_db[bn]); 390 if (daddr != 0) 391 break; 392 nextbn = bn + 1; 393 continue; 394 } 395 396 ap = a; 397 error = ufs_getlbns(vp, bn, ap, &num); 398 if (error != 0) 399 break; 400 MPASS(num >= 2); 401 daddr = DIP(ip, i_ib[ap->in_off]); 402 ap++, num--; 403 for (nextbn = UFS_NDADDR, num1 = num - 1; num1 > 0; num1--) 404 nextbn += lbn_count(ump, num1); 405 if (daddr == 0) { 406 nextbn += lbn_count(ump, num); 407 continue; 408 } 409 410 for (; daddr != 0 && num > 0; ap++, num--) { 411 if (bp != NULL) 412 bqrelse(bp); 413 error = readindir(vp, ap->in_lbn, daddr, &bp); 414 if (error != 0) 415 return (error); 416 417 /* 418 * Scan the indirect block until we find a non-zero 419 * pointer. 420 */ 421 off = ap->in_off; 422 do { 423 daddr = I_IS_UFS1(ip) ? 424 ((ufs1_daddr_t *)bp->b_data)[off] : 425 ((ufs2_daddr_t *)bp->b_data)[off]; 426 } while (daddr == 0 && ++off < MNINDIR(ump)); 427 nextbn += off * lbn_count(ump, num - 1); 428 429 /* 430 * We need to recompute the LBNs of indirect 431 * blocks, so restart with the updated block offset. 432 */ 433 if (off != ap->in_off) 434 break; 435 } 436 if (num == 0) { 437 /* 438 * We found a data block. 439 */ 440 bn = nextbn; 441 break; 442 } 443 } 444 if (bp != NULL) 445 bqrelse(bp); 446 if (bn >= numblks) 447 error = ENXIO; 448 if (error == 0 && *offp < bn * bsize) 449 *offp = bn * bsize; 450 return (error); 451 } 452 453 /* 454 * Create an array of logical block number/offset pairs which represent the 455 * path of indirect blocks required to access a data block. The first "pair" 456 * contains the logical block number of the appropriate single, double or 457 * triple indirect block and the offset into the inode indirect block array. 458 * Note, the logical block number of the inode single/double/triple indirect 459 * block appears twice in the array, once with the offset into the i_ib and 460 * once with the offset into the page itself. 461 */ 462 int 463 ufs_getlbns(struct vnode *vp, 464 ufs2_daddr_t bn, 465 struct indir *ap, 466 int *nump) 467 { 468 ufs2_daddr_t blockcnt; 469 ufs_lbn_t metalbn, realbn; 470 struct ufsmount *ump; 471 int i, numlevels, off; 472 473 ump = VFSTOUFS(vp->v_mount); 474 if (nump) 475 *nump = 0; 476 numlevels = 0; 477 realbn = bn; 478 if (bn < 0) 479 bn = -bn; 480 481 /* The first UFS_NDADDR blocks are direct blocks. */ 482 if (bn < UFS_NDADDR) 483 return (0); 484 485 /* 486 * Determine the number of levels of indirection. After this loop 487 * is done, blockcnt indicates the number of data blocks possible 488 * at the previous level of indirection, and UFS_NIADDR - i is the 489 * number of levels of indirection needed to locate the requested block. 490 */ 491 for (blockcnt = 1, i = UFS_NIADDR, bn -= UFS_NDADDR; ; 492 i--, bn -= blockcnt) { 493 if (i == 0) 494 return (EFBIG); 495 blockcnt *= MNINDIR(ump); 496 if (bn < blockcnt) 497 break; 498 } 499 500 /* Calculate the address of the first meta-block. */ 501 if (realbn >= 0) 502 metalbn = -(realbn - bn + UFS_NIADDR - i); 503 else 504 metalbn = -(-realbn - bn + UFS_NIADDR - i); 505 506 /* 507 * At each iteration, off is the offset into the bap array which is 508 * an array of disk addresses at the current level of indirection. 509 * The logical block number and the offset in that block are stored 510 * into the argument array. 511 */ 512 ap->in_lbn = metalbn; 513 ap->in_off = off = UFS_NIADDR - i; 514 ap++; 515 for (++numlevels; i <= UFS_NIADDR; i++) { 516 /* If searching for a meta-data block, quit when found. */ 517 if (metalbn == realbn) 518 break; 519 520 blockcnt /= MNINDIR(ump); 521 off = (bn / blockcnt) % MNINDIR(ump); 522 523 ++numlevels; 524 ap->in_lbn = metalbn; 525 ap->in_off = off; 526 ++ap; 527 528 metalbn -= -1 + off * blockcnt; 529 } 530 if (nump) 531 *nump = numlevels; 532 return (0); 533 } 534