1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1991, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)ufs_bmap.c 8.7 (Berkeley) 3/21/95 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include <sys/param.h> 43 #include <sys/systm.h> 44 #include <sys/bio.h> 45 #include <sys/buf.h> 46 #include <sys/proc.h> 47 #include <sys/vnode.h> 48 #include <sys/mount.h> 49 #include <sys/racct.h> 50 #include <sys/resourcevar.h> 51 #include <sys/stat.h> 52 53 #include <ufs/ufs/extattr.h> 54 #include <ufs/ufs/quota.h> 55 #include <ufs/ufs/inode.h> 56 #include <ufs/ufs/ufsmount.h> 57 #include <ufs/ufs/ufs_extern.h> 58 59 static ufs_lbn_t lbn_count(struct ufsmount *, int); 60 static int readindir(struct vnode *, ufs_lbn_t, ufs2_daddr_t, struct buf **); 61 62 /* 63 * Bmap converts the logical block number of a file to its physical block 64 * number on the disk. The conversion is done by using the logical block 65 * number to index into the array of block pointers described by the dinode. 66 */ 67 int 68 ufs_bmap( 69 struct vop_bmap_args /* { 70 struct vnode *a_vp; 71 daddr_t a_bn; 72 struct bufobj **a_bop; 73 daddr_t *a_bnp; 74 int *a_runp; 75 int *a_runb; 76 } */ *ap) 77 { 78 ufs2_daddr_t blkno; 79 int error; 80 81 /* 82 * Check for underlying vnode requests and ensure that logical 83 * to physical mapping is requested. 84 */ 85 if (ap->a_bop != NULL) 86 *ap->a_bop = &VFSTOUFS(ap->a_vp->v_mount)->um_devvp->v_bufobj; 87 if (ap->a_bnp == NULL) 88 return (0); 89 90 error = ufs_bmaparray(ap->a_vp, ap->a_bn, &blkno, NULL, 91 ap->a_runp, ap->a_runb); 92 *ap->a_bnp = blkno; 93 return (error); 94 } 95 96 static int 97 readindir(struct vnode *vp, 98 ufs_lbn_t lbn, 99 ufs2_daddr_t daddr, 100 struct buf **bpp) 101 { 102 struct buf *bp; 103 struct mount *mp; 104 struct ufsmount *ump; 105 int error; 106 107 mp = vp->v_mount; 108 ump = VFSTOUFS(mp); 109 110 bp = getblk(vp, lbn, mp->mnt_stat.f_iosize, 0, 0, 0); 111 if ((bp->b_flags & B_CACHE) == 0) { 112 KASSERT(daddr != 0, 113 ("readindir: indirect block not in cache")); 114 115 bp->b_blkno = blkptrtodb(ump, daddr); 116 bp->b_iocmd = BIO_READ; 117 bp->b_flags &= ~B_INVAL; 118 bp->b_ioflags &= ~BIO_ERROR; 119 vfs_busy_pages(bp, 0); 120 bp->b_iooffset = dbtob(bp->b_blkno); 121 bstrategy(bp); 122 #ifdef RACCT 123 if (racct_enable) { 124 PROC_LOCK(curproc); 125 racct_add_buf(curproc, bp, 0); 126 PROC_UNLOCK(curproc); 127 } 128 #endif 129 curthread->td_ru.ru_inblock++; 130 error = bufwait(bp); 131 if (error != 0) { 132 brelse(bp); 133 return (error); 134 } 135 } 136 *bpp = bp; 137 return (0); 138 } 139 140 /* 141 * Indirect blocks are now on the vnode for the file. They are given negative 142 * logical block numbers. Indirect blocks are addressed by the negative 143 * address of the first data block to which they point. Double indirect blocks 144 * are addressed by one less than the address of the first indirect block to 145 * which they point. Triple indirect blocks are addressed by one less than 146 * the address of the first double indirect block to which they point. 147 * 148 * ufs_bmaparray does the bmap conversion, and if requested returns the 149 * array of logical blocks which must be traversed to get to a block. 150 * Each entry contains the offset into that block that gets you to the 151 * next block and the disk address of the block (if it is assigned). 152 */ 153 154 int 155 ufs_bmaparray(struct vnode *vp, 156 ufs2_daddr_t bn, 157 ufs2_daddr_t *bnp, 158 struct buf *nbp, 159 int *runp, 160 int *runb) 161 { 162 struct inode *ip; 163 struct buf *bp; 164 struct ufsmount *ump; 165 struct mount *mp; 166 struct indir a[UFS_NIADDR+1], *ap; 167 ufs2_daddr_t daddr; 168 ufs_lbn_t metalbn; 169 int error, num, maxrun = 0; 170 int *nump; 171 172 ap = NULL; 173 ip = VTOI(vp); 174 mp = vp->v_mount; 175 ump = VFSTOUFS(mp); 176 177 if (runp) { 178 maxrun = mp->mnt_iosize_max / mp->mnt_stat.f_iosize - 1; 179 *runp = 0; 180 } 181 182 if (runb) { 183 *runb = 0; 184 } 185 186 ap = a; 187 nump = # 188 error = ufs_getlbns(vp, bn, ap, nump); 189 if (error) 190 return (error); 191 192 num = *nump; 193 if (num == 0) { 194 if (bn >= 0 && bn < UFS_NDADDR) { 195 *bnp = blkptrtodb(ump, DIP(ip, i_db[bn])); 196 } else if (bn < 0 && bn >= -UFS_NXADDR) { 197 *bnp = blkptrtodb(ump, ip->i_din2->di_extb[-1 - bn]); 198 if (*bnp == 0) 199 *bnp = -1; 200 if (nbp == NULL) { 201 /* indirect block not found */ 202 return (EINVAL); 203 } 204 nbp->b_xflags |= BX_ALTDATA; 205 return (0); 206 } else { 207 /* blkno out of range */ 208 return (EINVAL); 209 } 210 /* 211 * Since this is FFS independent code, we are out of 212 * scope for the definitions of BLK_NOCOPY and 213 * BLK_SNAP, but we do know that they will fall in 214 * the range 1..um_seqinc, so we use that test and 215 * return a request for a zeroed out buffer if attempts 216 * are made to read a BLK_NOCOPY or BLK_SNAP block. 217 */ 218 if (IS_SNAPSHOT(ip) && DIP(ip, i_db[bn]) > 0 && 219 DIP(ip, i_db[bn]) < ump->um_seqinc) { 220 *bnp = -1; 221 } else if (*bnp == 0) { 222 *bnp = IS_SNAPSHOT(ip) ? blkptrtodb(ump, 223 bn * ump->um_seqinc) : -1; 224 } else if (runp) { 225 ufs2_daddr_t bnb = bn; 226 for (++bn; bn < UFS_NDADDR && *runp < maxrun && 227 is_sequential(ump, DIP(ip, i_db[bn - 1]), 228 DIP(ip, i_db[bn])); 229 ++bn, ++*runp); 230 bn = bnb; 231 if (runb && (bn > 0)) { 232 for (--bn; (bn >= 0) && (*runb < maxrun) && 233 is_sequential(ump, DIP(ip, i_db[bn]), 234 DIP(ip, i_db[bn+1])); 235 --bn, ++*runb); 236 } 237 } 238 return (0); 239 } 240 241 /* Get disk address out of indirect block array */ 242 daddr = DIP(ip, i_ib[ap->in_off]); 243 244 for (bp = NULL, ++ap; --num; ++ap) { 245 /* 246 * Exit the loop if there is no disk address assigned yet and 247 * the indirect block isn't in the cache, or if we were 248 * looking for an indirect block and we've found it. 249 */ 250 251 metalbn = ap->in_lbn; 252 if ((daddr == 0 && !incore(&vp->v_bufobj, metalbn)) || metalbn == bn) 253 break; 254 /* 255 * If we get here, we've either got the block in the cache 256 * or we have a disk address for it, go fetch it. 257 */ 258 if (bp) 259 bqrelse(bp); 260 error = readindir(vp, metalbn, daddr, &bp); 261 if (error != 0) 262 return (error); 263 264 if (I_IS_UFS1(ip)) 265 daddr = ((ufs1_daddr_t *)bp->b_data)[ap->in_off]; 266 else 267 daddr = ((ufs2_daddr_t *)bp->b_data)[ap->in_off]; 268 if ((error = UFS_CHECK_BLKNO(mp, ip->i_number, daddr, 269 mp->mnt_stat.f_iosize)) != 0) { 270 bqrelse(bp); 271 return (error); 272 } 273 if (I_IS_UFS1(ip)) { 274 if (num == 1 && daddr && runp) { 275 for (bn = ap->in_off + 1; 276 bn < MNINDIR(ump) && *runp < maxrun && 277 is_sequential(ump, 278 ((ufs1_daddr_t *)bp->b_data)[bn - 1], 279 ((ufs1_daddr_t *)bp->b_data)[bn]); 280 ++bn, ++*runp); 281 bn = ap->in_off; 282 if (runb && bn) { 283 for (--bn; bn >= 0 && *runb < maxrun && 284 is_sequential(ump, 285 ((ufs1_daddr_t *)bp->b_data)[bn], 286 ((ufs1_daddr_t *)bp->b_data)[bn+1]); 287 --bn, ++*runb); 288 } 289 } 290 continue; 291 } 292 if (num == 1 && daddr && runp) { 293 for (bn = ap->in_off + 1; 294 bn < MNINDIR(ump) && *runp < maxrun && 295 is_sequential(ump, 296 ((ufs2_daddr_t *)bp->b_data)[bn - 1], 297 ((ufs2_daddr_t *)bp->b_data)[bn]); 298 ++bn, ++*runp); 299 bn = ap->in_off; 300 if (runb && bn) { 301 for (--bn; bn >= 0 && *runb < maxrun && 302 is_sequential(ump, 303 ((ufs2_daddr_t *)bp->b_data)[bn], 304 ((ufs2_daddr_t *)bp->b_data)[bn + 1]); 305 --bn, ++*runb); 306 } 307 } 308 } 309 if (bp) 310 bqrelse(bp); 311 312 /* 313 * Since this is FFS independent code, we are out of scope for the 314 * definitions of BLK_NOCOPY and BLK_SNAP, but we do know that they 315 * will fall in the range 1..um_seqinc, so we use that test and 316 * return a request for a zeroed out buffer if attempts are made 317 * to read a BLK_NOCOPY or BLK_SNAP block. 318 */ 319 if (IS_SNAPSHOT(ip) && daddr > 0 && daddr < ump->um_seqinc){ 320 *bnp = -1; 321 return (0); 322 } 323 *bnp = blkptrtodb(ump, daddr); 324 if (*bnp == 0) { 325 if (IS_SNAPSHOT(ip)) 326 *bnp = blkptrtodb(ump, bn * ump->um_seqinc); 327 else 328 *bnp = -1; 329 } 330 return (0); 331 } 332 333 static ufs_lbn_t 334 lbn_count(struct ufsmount *ump, int level) 335 { 336 ufs_lbn_t blockcnt; 337 338 for (blockcnt = 1; level > 0; level--) 339 blockcnt *= MNINDIR(ump); 340 return (blockcnt); 341 } 342 343 int 344 ufs_bmap_seekdata(struct vnode *vp, off_t *offp) 345 { 346 struct buf *bp; 347 struct indir a[UFS_NIADDR + 1], *ap; 348 struct inode *ip; 349 struct mount *mp; 350 struct ufsmount *ump; 351 ufs2_daddr_t bn, daddr, nextbn; 352 uint64_t bsize; 353 off_t numblks; 354 int error, num, num1, off; 355 356 bp = NULL; 357 error = 0; 358 ip = VTOI(vp); 359 mp = vp->v_mount; 360 ump = VFSTOUFS(mp); 361 362 if (vp->v_type != VREG || IS_SNAPSHOT(ip)) 363 return (EINVAL); 364 if (*offp < 0 || *offp >= ip->i_size) 365 return (ENXIO); 366 367 bsize = mp->mnt_stat.f_iosize; 368 for (bn = *offp / bsize, numblks = howmany(ip->i_size, bsize); 369 bn < numblks; bn = nextbn) { 370 if (bn < UFS_NDADDR) { 371 daddr = DIP(ip, i_db[bn]); 372 if (daddr != 0) 373 break; 374 nextbn = bn + 1; 375 continue; 376 } 377 378 ap = a; 379 error = ufs_getlbns(vp, bn, ap, &num); 380 if (error != 0) 381 break; 382 MPASS(num >= 2); 383 daddr = DIP(ip, i_ib[ap->in_off]); 384 ap++, num--; 385 for (nextbn = UFS_NDADDR, num1 = num - 1; num1 > 0; num1--) 386 nextbn += lbn_count(ump, num1); 387 if (daddr == 0) { 388 nextbn += lbn_count(ump, num); 389 continue; 390 } 391 392 for (; daddr != 0 && num > 0; ap++, num--) { 393 if (bp != NULL) 394 bqrelse(bp); 395 error = readindir(vp, ap->in_lbn, daddr, &bp); 396 if (error != 0) 397 return (error); 398 399 /* 400 * Scan the indirect block until we find a non-zero 401 * pointer. 402 */ 403 off = ap->in_off; 404 do { 405 daddr = I_IS_UFS1(ip) ? 406 ((ufs1_daddr_t *)bp->b_data)[off] : 407 ((ufs2_daddr_t *)bp->b_data)[off]; 408 } while (daddr == 0 && ++off < MNINDIR(ump)); 409 nextbn += off * lbn_count(ump, num - 1); 410 411 /* 412 * We need to recompute the LBNs of indirect 413 * blocks, so restart with the updated block offset. 414 */ 415 if (off != ap->in_off) 416 break; 417 } 418 if (num == 0) { 419 /* 420 * We found a data block. 421 */ 422 bn = nextbn; 423 break; 424 } 425 } 426 if (bp != NULL) 427 bqrelse(bp); 428 if (bn >= numblks) 429 error = ENXIO; 430 if (error == 0 && *offp < bn * bsize) 431 *offp = bn * bsize; 432 return (error); 433 } 434 435 /* 436 * Create an array of logical block number/offset pairs which represent the 437 * path of indirect blocks required to access a data block. The first "pair" 438 * contains the logical block number of the appropriate single, double or 439 * triple indirect block and the offset into the inode indirect block array. 440 * Note, the logical block number of the inode single/double/triple indirect 441 * block appears twice in the array, once with the offset into the i_ib and 442 * once with the offset into the page itself. 443 */ 444 int 445 ufs_getlbns(struct vnode *vp, 446 ufs2_daddr_t bn, 447 struct indir *ap, 448 int *nump) 449 { 450 ufs2_daddr_t blockcnt; 451 ufs_lbn_t metalbn, realbn; 452 struct ufsmount *ump; 453 int i, numlevels, off; 454 455 ump = VFSTOUFS(vp->v_mount); 456 if (nump) 457 *nump = 0; 458 numlevels = 0; 459 realbn = bn; 460 if (bn < 0) 461 bn = -bn; 462 463 /* The first UFS_NDADDR blocks are direct blocks. */ 464 if (bn < UFS_NDADDR) 465 return (0); 466 467 /* 468 * Determine the number of levels of indirection. After this loop 469 * is done, blockcnt indicates the number of data blocks possible 470 * at the previous level of indirection, and UFS_NIADDR - i is the 471 * number of levels of indirection needed to locate the requested block. 472 */ 473 for (blockcnt = 1, i = UFS_NIADDR, bn -= UFS_NDADDR; ; 474 i--, bn -= blockcnt) { 475 if (i == 0) 476 return (EFBIG); 477 blockcnt *= MNINDIR(ump); 478 if (bn < blockcnt) 479 break; 480 } 481 482 /* Calculate the address of the first meta-block. */ 483 if (realbn >= 0) 484 metalbn = -(realbn - bn + UFS_NIADDR - i); 485 else 486 metalbn = -(-realbn - bn + UFS_NIADDR - i); 487 488 /* 489 * At each iteration, off is the offset into the bap array which is 490 * an array of disk addresses at the current level of indirection. 491 * The logical block number and the offset in that block are stored 492 * into the argument array. 493 */ 494 ap->in_lbn = metalbn; 495 ap->in_off = off = UFS_NIADDR - i; 496 ap++; 497 for (++numlevels; i <= UFS_NIADDR; i++) { 498 /* If searching for a meta-data block, quit when found. */ 499 if (metalbn == realbn) 500 break; 501 502 blockcnt /= MNINDIR(ump); 503 off = (bn / blockcnt) % MNINDIR(ump); 504 505 ++numlevels; 506 ap->in_lbn = metalbn; 507 ap->in_off = off; 508 ++ap; 509 510 metalbn -= -1 + off * blockcnt; 511 } 512 if (nump) 513 *nump = numlevels; 514 return (0); 515 } 516