1 /* 2 * Copyright (c) 1982, 1986, 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. All advertising materials mentioning features or use of this software 14 * must display the following acknowledgement: 15 * This product includes software developed by the University of 16 * California, Berkeley and its contributors. 17 * 4. Neither the name of the University nor the names of its contributors 18 * may be used to endorse or promote products derived from this software 19 * without specific prior written permission. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 31 * SUCH DAMAGE. 32 * 33 * @(#)ffs_balloc.c 8.8 (Berkeley) 6/16/95 34 * $FreeBSD: src/sys/ufs/ffs/ffs_balloc.c,v 1.26.2.1 2002/10/10 19:48:20 dillon Exp $ 35 * $DragonFly: src/sys/vfs/ufs/ffs_balloc.c,v 1.6 2003/08/07 21:17:44 dillon Exp $ 36 */ 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/proc.h> 41 #include <sys/buf.h> 42 #include <sys/lock.h> 43 #include <sys/mount.h> 44 #include <sys/vnode.h> 45 46 #include "quota.h" 47 #include "inode.h" 48 #include "ufs_extern.h" 49 50 #include "fs.h" 51 #include "ffs_extern.h" 52 53 /* 54 * Balloc defines the structure of file system storage 55 * by allocating the physical blocks on a device given 56 * the inode and the logical block number in a file. 57 */ 58 int 59 ffs_balloc(ap) 60 struct vop_balloc_args /* { 61 struct vnode *a_vp; 62 ufs_daddr_t a_lbn; 63 int a_size; 64 struct ucred *a_cred; 65 int a_flags; 66 struct buf *a_bpp; 67 } */ *ap; 68 { 69 struct inode *ip; 70 ufs_daddr_t lbn; 71 int size; 72 struct ucred *cred; 73 int flags; 74 struct fs *fs; 75 ufs_daddr_t nb; 76 struct buf *bp, *nbp; 77 struct vnode *vp; 78 struct indir indirs[NIADDR + 2]; 79 ufs_daddr_t newb, *bap, pref; 80 int deallocated, osize, nsize, num, i, error; 81 ufs_daddr_t *allocib, *blkp, *allocblk, allociblk[NIADDR + 1]; 82 int unwindidx = -1; 83 struct thread *td = curthread; /* XXX */ 84 85 vp = ap->a_vp; 86 ip = VTOI(vp); 87 fs = ip->i_fs; 88 lbn = lblkno(fs, ap->a_startoffset); 89 size = blkoff(fs, ap->a_startoffset) + ap->a_size; 90 if (size > fs->fs_bsize) 91 panic("ffs_balloc: blk too big"); 92 *ap->a_bpp = NULL; 93 if (lbn < 0) 94 return (EFBIG); 95 cred = ap->a_cred; 96 flags = ap->a_flags; 97 98 /* 99 * If the next write will extend the file into a new block, 100 * and the file is currently composed of a fragment 101 * this fragment has to be extended to be a full block. 102 */ 103 nb = lblkno(fs, ip->i_size); 104 if (nb < NDADDR && nb < lbn) { 105 osize = blksize(fs, ip, nb); 106 if (osize < fs->fs_bsize && osize > 0) { 107 error = ffs_realloccg(ip, nb, 108 ffs_blkpref(ip, nb, (int)nb, &ip->i_db[0]), 109 osize, (int)fs->fs_bsize, cred, &bp); 110 if (error) 111 return (error); 112 if (DOINGSOFTDEP(vp)) 113 softdep_setup_allocdirect(ip, nb, 114 dbtofsb(fs, bp->b_blkno), ip->i_db[nb], 115 fs->fs_bsize, osize, bp); 116 ip->i_size = smalllblktosize(fs, nb + 1); 117 ip->i_db[nb] = dbtofsb(fs, bp->b_blkno); 118 ip->i_flag |= IN_CHANGE | IN_UPDATE; 119 if (flags & B_SYNC) 120 bwrite(bp); 121 else 122 bawrite(bp); 123 } 124 } 125 /* 126 * The first NDADDR blocks are direct blocks 127 */ 128 if (lbn < NDADDR) { 129 nb = ip->i_db[lbn]; 130 if (nb != 0 && ip->i_size >= smalllblktosize(fs, lbn + 1)) { 131 error = bread(vp, lbn, fs->fs_bsize, &bp); 132 if (error) { 133 brelse(bp); 134 return (error); 135 } 136 bp->b_blkno = fsbtodb(fs, nb); 137 *ap->a_bpp = bp; 138 return (0); 139 } 140 if (nb != 0) { 141 /* 142 * Consider need to reallocate a fragment. 143 */ 144 osize = fragroundup(fs, blkoff(fs, ip->i_size)); 145 nsize = fragroundup(fs, size); 146 if (nsize <= osize) { 147 error = bread(vp, lbn, osize, &bp); 148 if (error) { 149 brelse(bp); 150 return (error); 151 } 152 bp->b_blkno = fsbtodb(fs, nb); 153 } else { 154 error = ffs_realloccg(ip, lbn, 155 ffs_blkpref(ip, lbn, (int)lbn, 156 &ip->i_db[0]), osize, nsize, cred, &bp); 157 if (error) 158 return (error); 159 if (DOINGSOFTDEP(vp)) 160 softdep_setup_allocdirect(ip, lbn, 161 dbtofsb(fs, bp->b_blkno), nb, 162 nsize, osize, bp); 163 } 164 } else { 165 if (ip->i_size < smalllblktosize(fs, lbn + 1)) 166 nsize = fragroundup(fs, size); 167 else 168 nsize = fs->fs_bsize; 169 error = ffs_alloc(ip, lbn, 170 ffs_blkpref(ip, lbn, (int)lbn, &ip->i_db[0]), 171 nsize, cred, &newb); 172 if (error) 173 return (error); 174 bp = getblk(vp, lbn, nsize, 0, 0); 175 bp->b_blkno = fsbtodb(fs, newb); 176 if (flags & B_CLRBUF) 177 vfs_bio_clrbuf(bp); 178 if (DOINGSOFTDEP(vp)) 179 softdep_setup_allocdirect(ip, lbn, newb, 0, 180 nsize, 0, bp); 181 } 182 ip->i_db[lbn] = dbtofsb(fs, bp->b_blkno); 183 ip->i_flag |= IN_CHANGE | IN_UPDATE; 184 *ap->a_bpp = bp; 185 return (0); 186 } 187 /* 188 * Determine the number of levels of indirection. 189 */ 190 pref = 0; 191 if ((error = ufs_getlbns(vp, lbn, indirs, &num)) != 0) 192 return(error); 193 #ifdef DIAGNOSTIC 194 if (num < 1) 195 panic ("ffs_balloc: ufs_bmaparray returned indirect block"); 196 #endif 197 /* 198 * Fetch the first indirect block allocating if necessary. 199 */ 200 --num; 201 nb = ip->i_ib[indirs[0].in_off]; 202 allocib = NULL; 203 allocblk = allociblk; 204 if (nb == 0) { 205 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); 206 if ((error = ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, 207 cred, &newb)) != 0) 208 return (error); 209 nb = newb; 210 *allocblk++ = nb; 211 bp = getblk(vp, indirs[1].in_lbn, fs->fs_bsize, 0, 0); 212 bp->b_blkno = fsbtodb(fs, nb); 213 vfs_bio_clrbuf(bp); 214 if (DOINGSOFTDEP(vp)) { 215 softdep_setup_allocdirect(ip, NDADDR + indirs[0].in_off, 216 newb, 0, fs->fs_bsize, 0, bp); 217 bdwrite(bp); 218 } else { 219 /* 220 * Write synchronously so that indirect blocks 221 * never point at garbage. 222 */ 223 if (DOINGASYNC(vp)) 224 bdwrite(bp); 225 else if ((error = bwrite(bp)) != 0) 226 goto fail; 227 } 228 allocib = &ip->i_ib[indirs[0].in_off]; 229 *allocib = nb; 230 ip->i_flag |= IN_CHANGE | IN_UPDATE; 231 } 232 /* 233 * Fetch through the indirect blocks, allocating as necessary. 234 */ 235 for (i = 1;;) { 236 error = bread(vp, indirs[i].in_lbn, (int)fs->fs_bsize, &bp); 237 if (error) { 238 brelse(bp); 239 goto fail; 240 } 241 bap = (ufs_daddr_t *)bp->b_data; 242 nb = bap[indirs[i].in_off]; 243 if (i == num) 244 break; 245 i += 1; 246 if (nb != 0) { 247 bqrelse(bp); 248 continue; 249 } 250 if (pref == 0) 251 pref = ffs_blkpref(ip, lbn, 0, (ufs_daddr_t *)0); 252 if ((error = 253 ffs_alloc(ip, lbn, pref, (int)fs->fs_bsize, cred, &newb)) != 0) { 254 brelse(bp); 255 goto fail; 256 } 257 nb = newb; 258 *allocblk++ = nb; 259 nbp = getblk(vp, indirs[i].in_lbn, fs->fs_bsize, 0, 0); 260 nbp->b_blkno = fsbtodb(fs, nb); 261 vfs_bio_clrbuf(nbp); 262 if (DOINGSOFTDEP(vp)) { 263 softdep_setup_allocindir_meta(nbp, ip, bp, 264 indirs[i - 1].in_off, nb); 265 bdwrite(nbp); 266 } else { 267 /* 268 * Write synchronously so that indirect blocks 269 * never point at garbage. 270 */ 271 if ((error = bwrite(nbp)) != 0) { 272 brelse(bp); 273 goto fail; 274 } 275 } 276 bap[indirs[i - 1].in_off] = nb; 277 if (allocib == NULL && unwindidx < 0) 278 unwindidx = i - 1; 279 /* 280 * If required, write synchronously, otherwise use 281 * delayed write. 282 */ 283 if (flags & B_SYNC) { 284 bwrite(bp); 285 } else { 286 if (bp->b_bufsize == fs->fs_bsize) 287 bp->b_flags |= B_CLUSTEROK; 288 bdwrite(bp); 289 } 290 } 291 /* 292 * Get the data block, allocating if necessary. 293 */ 294 if (nb == 0) { 295 pref = ffs_blkpref(ip, lbn, indirs[i].in_off, &bap[0]); 296 error = ffs_alloc(ip, 297 lbn, pref, (int)fs->fs_bsize, cred, &newb); 298 if (error) { 299 brelse(bp); 300 goto fail; 301 } 302 nb = newb; 303 *allocblk++ = nb; 304 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 305 nbp->b_blkno = fsbtodb(fs, nb); 306 if (flags & B_CLRBUF) 307 vfs_bio_clrbuf(nbp); 308 if (DOINGSOFTDEP(vp)) 309 softdep_setup_allocindir_page(ip, lbn, bp, 310 indirs[i].in_off, nb, 0, nbp); 311 bap[indirs[i].in_off] = nb; 312 /* 313 * If required, write synchronously, otherwise use 314 * delayed write. 315 */ 316 if (flags & B_SYNC) { 317 bwrite(bp); 318 } else { 319 if (bp->b_bufsize == fs->fs_bsize) 320 bp->b_flags |= B_CLUSTEROK; 321 bdwrite(bp); 322 } 323 *ap->a_bpp = nbp; 324 return (0); 325 } 326 brelse(bp); 327 /* 328 * If requested clear invalid portions of the buffer. If we 329 * have to do a read-before-write (typical if B_CLRBUF is set), 330 * try to do some read-ahead in the sequential case to reduce 331 * the number of I/O transactions. 332 */ 333 if (flags & B_CLRBUF) { 334 int seqcount = (flags & B_SEQMASK) >> B_SEQSHIFT; 335 if (seqcount && 336 (vp->v_mount->mnt_flag & MNT_NOCLUSTERR) == 0) { 337 error = cluster_read(vp, ip->i_size, lbn, 338 (int)fs->fs_bsize, 339 MAXBSIZE, seqcount, &nbp); 340 } else { 341 error = bread(vp, lbn, (int)fs->fs_bsize, &nbp); 342 } 343 if (error) { 344 brelse(nbp); 345 goto fail; 346 } 347 } else { 348 nbp = getblk(vp, lbn, fs->fs_bsize, 0, 0); 349 nbp->b_blkno = fsbtodb(fs, nb); 350 } 351 *ap->a_bpp = nbp; 352 return (0); 353 fail: 354 /* 355 * If we have failed part way through block allocation, we 356 * have to deallocate any indirect blocks that we have allocated. 357 * We have to fsync the file before we start to get rid of all 358 * of its dependencies so that we do not leave them dangling. 359 * We have to sync it at the end so that the soft updates code 360 * does not find any untracked changes. Although this is really 361 * slow, running out of disk space is not expected to be a common 362 * occurence. The error return from fsync is ignored as we already 363 * have an error to return to the user. 364 */ 365 (void) VOP_FSYNC(vp, MNT_WAIT, td); 366 for (deallocated = 0, blkp = allociblk; blkp < allocblk; blkp++) { 367 ffs_blkfree(ip, *blkp, fs->fs_bsize); 368 deallocated += fs->fs_bsize; 369 } 370 if (allocib != NULL) { 371 *allocib = 0; 372 } else if (unwindidx >= 0) { 373 int r; 374 375 r = bread(vp, indirs[unwindidx].in_lbn, (int)fs->fs_bsize, &bp); 376 if (r) { 377 panic("Could not unwind indirect block, error %d", r); 378 brelse(bp); 379 } else { 380 bap = (ufs_daddr_t *)bp->b_data; 381 bap[indirs[unwindidx].in_off] = 0; 382 if (flags & B_SYNC) { 383 bwrite(bp); 384 } else { 385 if (bp->b_bufsize == fs->fs_bsize) 386 bp->b_flags |= B_CLUSTEROK; 387 bdwrite(bp); 388 } 389 } 390 } 391 if (deallocated) { 392 #ifdef QUOTA 393 /* 394 * Restore user's disk quota because allocation failed. 395 */ 396 (void) chkdq(ip, (long)-btodb(deallocated), cred, FORCE); 397 #endif 398 ip->i_blocks -= btodb(deallocated); 399 ip->i_flag |= IN_CHANGE | IN_UPDATE; 400 } 401 (void) VOP_FSYNC(vp, MNT_WAIT, td); 402 return (error); 403 } 404