1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 39 * $FreeBSD: src/sys/ufs/ufs/ufs_lookup.c,v 1.33.2.7 2001/09/22 19:22:13 iedowse Exp $ 40 * $DragonFly: src/sys/vfs/ufs/ufs_lookup.c,v 1.18 2005/09/14 01:13:48 dillon Exp $ 41 */ 42 43 #include "opt_ufs.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/buf.h> 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/stat.h> 52 #include <sys/mount.h> 53 #include <sys/vnode.h> 54 #include <sys/sysctl.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> 58 59 #include "quota.h" 60 #include "inode.h" 61 #include "dir.h" 62 #ifdef UFS_DIRHASH 63 #include "dirhash.h" 64 #endif 65 #include "ufsmount.h" 66 #include "ufs_extern.h" 67 68 #ifdef DIAGNOSTIC 69 int dirchk = 1; 70 #else 71 int dirchk = 0; 72 #endif 73 74 SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); 75 76 /* true if old FS format...*/ 77 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 78 79 /* 80 * Convert a component of a pathname into a pointer to a locked inode. 81 * This is a very central and rather complicated routine. 82 * If the filesystem is not maintained in a strict tree hierarchy, 83 * this can result in a deadlock situation (see comments in code below). 84 * 85 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 86 * on whether the name is to be looked up, created, renamed, or deleted. 87 * When CREATE, RENAME, or DELETE is specified, information usable in 88 * creating, renaming, or deleting a directory entry may be calculated. 89 * If flag has LOCKPARENT or'ed into it and the target of the pathname 90 * exists, lookup returns both the target and its parent directory locked. 91 * When creating or renaming and LOCKPARENT is specified, the target may 92 * not be ".". When deleting and LOCKPARENT is specified, the target may 93 * be "."., but the caller must check to ensure it does an vrele and vput 94 * instead of two vputs. 95 * 96 * Overall outline of ufs_lookup: 97 * 98 * search for name in directory, to found or notfound 99 * notfound: 100 * if creating, return locked directory, leaving info on available slots 101 * else return error 102 * found: 103 * if at end of path and deleting, return information to allow delete 104 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 105 * inode and return info to allow rewrite 106 * if not at end, add name to cache; if at end and neither creating 107 * nor deleting, add name to cache 108 * 109 * ufs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 110 * struct componentname *a_cnp) 111 */ 112 int 113 ufs_lookup(struct vop_old_lookup_args *ap) 114 { 115 struct vnode *vdp; /* vnode for directory being searched */ 116 struct inode *dp; /* inode for directory being searched */ 117 struct buf *bp; /* a buffer of directory entries */ 118 struct direct *ep; /* the current directory entry */ 119 int entryoffsetinblock; /* offset of ep in bp's buffer */ 120 enum {NONE, COMPACT, FOUND} slotstatus; 121 doff_t slotoffset; /* offset of area with free space */ 122 int slotsize; /* size of area at slotoffset */ 123 int slotfreespace; /* amount of space free in slot */ 124 int slotneeded; /* size of the entry we're seeking */ 125 int numdirpasses; /* strategy for directory search */ 126 doff_t endsearch; /* offset to end directory search */ 127 doff_t prevoff; /* prev entry dp->i_offset */ 128 struct vnode *pdp; /* saved dp during symlink work */ 129 struct vnode *tdp; /* returned by VFS_VGET */ 130 doff_t enduseful; /* pointer past last used dir slot */ 131 u_long bmask; /* block offset mask */ 132 int lockparent; /* 1 => lockparent flag is set */ 133 int wantparent; /* 1 => wantparent or lockparent flag */ 134 int namlen, error; 135 struct vnode **vpp = ap->a_vpp; 136 struct componentname *cnp = ap->a_cnp; 137 struct ucred *cred = cnp->cn_cred; 138 int flags = cnp->cn_flags; 139 int nameiop = cnp->cn_nameiop; 140 struct thread *td = cnp->cn_td; 141 globaldata_t gd = mycpu; 142 143 bp = NULL; 144 slotoffset = -1; 145 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 146 /* 147 * XXX there was a soft-update diff about this I couldn't merge. 148 * I think this was the equiv. 149 */ 150 *vpp = NULL; 151 152 vdp = ap->a_dvp; 153 dp = VTOI(vdp); 154 lockparent = flags & CNP_LOCKPARENT; 155 wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT); 156 157 /* 158 * We now have a segment name to search for, and a directory to search. 159 * 160 * Suppress search for slots unless creating 161 * file and at end of pathname, in which case 162 * we watch for a place to put the new file in 163 * case it doesn't already exist. 164 */ 165 slotstatus = FOUND; 166 slotfreespace = slotsize = slotneeded = 0; 167 if (nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) { 168 slotstatus = NONE; 169 slotneeded = DIRECTSIZ(cnp->cn_namelen); 170 } 171 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 172 173 #ifdef UFS_DIRHASH 174 /* 175 * Use dirhash for fast operations on large directories. The logic 176 * to determine whether to hash the directory is contained within 177 * ufsdirhash_build(); a zero return means that it decided to hash 178 * this directory and it successfully built up the hash table. 179 */ 180 if (ufsdirhash_build(dp) == 0) { 181 /* Look for a free slot if needed. */ 182 enduseful = dp->i_size; 183 if (slotstatus != FOUND) { 184 slotoffset = ufsdirhash_findfree(dp, slotneeded, 185 &slotsize); 186 if (slotoffset >= 0) { 187 slotstatus = COMPACT; 188 enduseful = ufsdirhash_enduseful(dp); 189 if (enduseful < 0) 190 enduseful = dp->i_size; 191 } 192 } 193 /* Look up the component. */ 194 numdirpasses = 1; 195 entryoffsetinblock = 0; /* silence compiler warning */ 196 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 197 &dp->i_offset, &bp, nameiop == NAMEI_DELETE ? &prevoff : NULL)) { 198 case 0: 199 ep = (struct direct *)((char *)bp->b_data + 200 (dp->i_offset & bmask)); 201 goto foundentry; 202 case ENOENT: 203 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 204 goto notfound; 205 default: 206 /* Something failed; just do a linear search. */ 207 break; 208 } 209 } 210 #endif /* UFS_DIRHASH */ 211 /* 212 * If there is cached information on a previous search of 213 * this directory, pick up where we last left off. 214 * We cache only lookups as these are the most common 215 * and have the greatest payoff. Caching CREATE has little 216 * benefit as it usually must search the entire directory 217 * to determine that the entry does not exist. Caching the 218 * location of the last DELETE or RENAME has not reduced 219 * profiling time and hence has been removed in the interest 220 * of simplicity. 221 */ 222 if (nameiop != NAMEI_LOOKUP || dp->i_diroff == 0 || 223 dp->i_diroff >= dp->i_size) { 224 entryoffsetinblock = 0; 225 dp->i_offset = 0; 226 numdirpasses = 1; 227 } else { 228 dp->i_offset = dp->i_diroff; 229 if ((entryoffsetinblock = dp->i_offset & bmask) && 230 (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) 231 return (error); 232 numdirpasses = 2; 233 gd->gd_nchstats->ncs_2passes++; 234 } 235 prevoff = dp->i_offset; 236 endsearch = roundup2(dp->i_size, DIRBLKSIZ); 237 enduseful = 0; 238 239 searchloop: 240 while (dp->i_offset < endsearch) { 241 /* 242 * If necessary, get the next directory block. 243 */ 244 if ((dp->i_offset & bmask) == 0) { 245 if (bp != NULL) 246 brelse(bp); 247 error = 248 UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp); 249 if (error) 250 return (error); 251 entryoffsetinblock = 0; 252 } 253 /* 254 * If still looking for a slot, and at a DIRBLKSIZE 255 * boundary, have to start looking for free space again. 256 */ 257 if (slotstatus == NONE && 258 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 259 slotoffset = -1; 260 slotfreespace = 0; 261 } 262 /* 263 * Get pointer to next entry. 264 * Full validation checks are slow, so we only check 265 * enough to insure forward progress through the 266 * directory. Complete checks can be run by patching 267 * "dirchk" to be true. 268 */ 269 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 270 if (ep->d_reclen == 0 || ep->d_reclen > 271 DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 272 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 273 int i; 274 275 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 276 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 277 dp->i_offset += i; 278 entryoffsetinblock += i; 279 continue; 280 } 281 282 /* 283 * If an appropriate sized slot has not yet been found, 284 * check to see if one is available. Also accumulate space 285 * in the current block so that we can determine if 286 * compaction is viable. 287 */ 288 if (slotstatus != FOUND) { 289 int size = ep->d_reclen; 290 291 if (ep->d_ino != 0) 292 size -= DIRSIZ(OFSFMT(vdp), ep); 293 if (size > 0) { 294 if (size >= slotneeded) { 295 slotstatus = FOUND; 296 slotoffset = dp->i_offset; 297 slotsize = ep->d_reclen; 298 } else if (slotstatus == NONE) { 299 slotfreespace += size; 300 if (slotoffset == -1) 301 slotoffset = dp->i_offset; 302 if (slotfreespace >= slotneeded) { 303 slotstatus = COMPACT; 304 slotsize = dp->i_offset + 305 ep->d_reclen - slotoffset; 306 } 307 } 308 } 309 } 310 311 /* 312 * Check for a name match. 313 */ 314 if (ep->d_ino) { 315 # if (BYTE_ORDER == LITTLE_ENDIAN) 316 if (OFSFMT(vdp)) 317 namlen = ep->d_type; 318 else 319 namlen = ep->d_namlen; 320 # else 321 namlen = ep->d_namlen; 322 # endif 323 if (namlen == cnp->cn_namelen && 324 (cnp->cn_nameptr[0] == ep->d_name[0]) && 325 !bcmp(cnp->cn_nameptr, ep->d_name, 326 (unsigned)namlen)) { 327 #ifdef UFS_DIRHASH 328 foundentry: 329 #endif 330 /* 331 * Save directory entry's inode number and 332 * reclen in ndp->ni_ufs area, and release 333 * directory buffer. 334 */ 335 if (vdp->v_mount->mnt_maxsymlinklen > 0 && 336 ep->d_type == DT_WHT) { 337 slotstatus = FOUND; 338 slotoffset = dp->i_offset; 339 slotsize = ep->d_reclen; 340 dp->i_reclen = slotsize; 341 enduseful = dp->i_size; 342 ap->a_cnp->cn_flags |= CNP_ISWHITEOUT; 343 numdirpasses--; 344 goto notfound; 345 } 346 dp->i_ino = ep->d_ino; 347 dp->i_reclen = ep->d_reclen; 348 goto found; 349 } 350 } 351 prevoff = dp->i_offset; 352 dp->i_offset += ep->d_reclen; 353 entryoffsetinblock += ep->d_reclen; 354 if (ep->d_ino) 355 enduseful = dp->i_offset; 356 } 357 notfound: 358 /* 359 * If we started in the middle of the directory and failed 360 * to find our target, we must check the beginning as well. 361 */ 362 if (numdirpasses == 2) { 363 numdirpasses--; 364 dp->i_offset = 0; 365 endsearch = dp->i_diroff; 366 goto searchloop; 367 } 368 if (bp != NULL) 369 brelse(bp); 370 /* 371 * If creating, and at end of pathname and current 372 * directory has not been removed, then can consider 373 * allowing file to be created. 374 */ 375 if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME || 376 (nameiop == NAMEI_DELETE && 377 (ap->a_cnp->cn_flags & CNP_DOWHITEOUT) && 378 (ap->a_cnp->cn_flags & CNP_ISWHITEOUT))) && 379 dp->i_effnlink != 0) { 380 /* 381 * Access for write is interpreted as allowing 382 * creation of files in the directory. 383 */ 384 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td); 385 if (error) 386 return (error); 387 /* 388 * Return an indication of where the new directory 389 * entry should be put. If we didn't find a slot, 390 * then set dp->i_count to 0 indicating 391 * that the new slot belongs at the end of the 392 * directory. If we found a slot, then the new entry 393 * can be put in the range from dp->i_offset to 394 * dp->i_offset + dp->i_count. 395 */ 396 if (slotstatus == NONE) { 397 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 398 dp->i_count = 0; 399 enduseful = dp->i_offset; 400 } else if (nameiop == NAMEI_DELETE) { 401 dp->i_offset = slotoffset; 402 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 403 dp->i_count = 0; 404 else 405 dp->i_count = dp->i_offset - prevoff; 406 } else { 407 dp->i_offset = slotoffset; 408 dp->i_count = slotsize; 409 if (enduseful < slotoffset + slotsize) 410 enduseful = slotoffset + slotsize; 411 } 412 dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); 413 dp->i_flag |= IN_CHANGE | IN_UPDATE; 414 /* 415 * We return with the directory locked, so that 416 * the parameters we set up above will still be 417 * valid if we actually decide to do a direnter(). 418 * We return ni_vp == NULL to indicate that the entry 419 * does not currently exist; we leave a pointer to 420 * the (locked) directory inode in ndp->ni_dvp. 421 * The pathname buffer is saved so that the name 422 * can be obtained later. 423 * 424 * NB - if the directory is unlocked, then this 425 * information cannot be used. 426 */ 427 if (!lockparent) { 428 VOP_UNLOCK(vdp, 0, td); 429 cnp->cn_flags |= CNP_PDIRUNLOCK; 430 } 431 return (EJUSTRETURN); 432 } 433 return (ENOENT); 434 435 found: 436 if (numdirpasses == 2) 437 gd->gd_nchstats->ncs_pass2++; 438 /* 439 * Check that directory length properly reflects presence 440 * of this entry. 441 */ 442 if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { 443 ufs_dirbad(dp, dp->i_offset, "i_size too small"); 444 dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep); 445 dp->i_flag |= IN_CHANGE | IN_UPDATE; 446 } 447 brelse(bp); 448 449 /* 450 * Found component in pathname. 451 * If the final component of path name, save information 452 * in the cache as to where the entry was found. 453 */ 454 if (nameiop == NAMEI_LOOKUP) 455 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 456 457 /* 458 * If deleting, and at end of pathname, return 459 * parameters which can be used to remove file. 460 * If the wantparent flag isn't set, we return only 461 * the directory (in ndp->ni_dvp), otherwise we go 462 * on and lock the inode, being careful with ".". 463 */ 464 if (nameiop == NAMEI_DELETE) { 465 /* 466 * Write access to directory required to delete files. 467 */ 468 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td); 469 if (error) 470 return (error); 471 /* 472 * Return pointer to current entry in dp->i_offset, 473 * and distance past previous entry (if there 474 * is a previous entry in this block) in dp->i_count. 475 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 476 */ 477 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 478 dp->i_count = 0; 479 else 480 dp->i_count = dp->i_offset - prevoff; 481 if (dp->i_number == dp->i_ino) { 482 vref(vdp); 483 *vpp = vdp; 484 return (0); 485 } 486 if (flags & CNP_ISDOTDOT) 487 VOP_UNLOCK(vdp, 0, td);/* race to get the inode */ 488 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 489 if (flags & CNP_ISDOTDOT) { 490 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY, td) != 0) 491 cnp->cn_flags |= CNP_PDIRUNLOCK; 492 } 493 if (error) 494 return (error); 495 /* 496 * If directory is "sticky", then user must own 497 * the directory, or the file in it, else she 498 * may not delete it (unless she's root). This 499 * implements append-only directories. 500 */ 501 if ((dp->i_mode & ISVTX) && 502 cred->cr_uid != 0 && 503 cred->cr_uid != dp->i_uid && 504 VTOI(tdp)->i_uid != cred->cr_uid) { 505 vput(tdp); 506 return (EPERM); 507 } 508 *vpp = tdp; 509 if (!lockparent) { 510 VOP_UNLOCK(vdp, 0, td); 511 cnp->cn_flags |= CNP_PDIRUNLOCK; 512 } 513 return (0); 514 } 515 516 /* 517 * If rewriting (RENAME), return the inode and the 518 * information required to rewrite the present directory 519 * Must get inode of directory entry to verify it's a 520 * regular file, or empty directory. 521 */ 522 if (nameiop == NAMEI_RENAME && wantparent) { 523 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td)) != 0) 524 return (error); 525 /* 526 * Careful about locking second inode. 527 * This can only occur if the target is ".". 528 */ 529 if (dp->i_number == dp->i_ino) 530 return (EISDIR); 531 if (flags & CNP_ISDOTDOT) 532 VOP_UNLOCK(vdp, 0, td); /* race to get the inode */ 533 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 534 if (flags & CNP_ISDOTDOT) { 535 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY, td) != 0) 536 cnp->cn_flags |= CNP_PDIRUNLOCK; 537 } 538 if (error) 539 return (error); 540 *vpp = tdp; 541 if (!lockparent) { 542 VOP_UNLOCK(vdp, 0, td); 543 cnp->cn_flags |= CNP_PDIRUNLOCK; 544 } 545 return (0); 546 } 547 548 /* 549 * Step through the translation in the name. We do not `vput' the 550 * directory because we may need it again if a symbolic link 551 * is relative to the current directory. Instead we save it 552 * unlocked as "pdp". We must get the target inode before unlocking 553 * the directory to insure that the inode will not be removed 554 * before we get it. We prevent deadlock by always fetching 555 * inodes from the root, moving down the directory tree. Thus 556 * when following backward pointers ".." we must unlock the 557 * parent directory before getting the requested directory. 558 * There is a potential race condition here if both the current 559 * and parent directories are removed before the VFS_VGET for the 560 * inode associated with ".." returns. We hope that this occurs 561 * infrequently since we cannot avoid this race condition without 562 * implementing a sophisticated deadlock detection algorithm. 563 * Note also that this simple deadlock detection scheme will not 564 * work if the filesystem has any hard links other than ".." 565 * that point backwards in the directory structure. 566 */ 567 pdp = vdp; 568 if (flags & CNP_ISDOTDOT) { 569 VOP_UNLOCK(pdp, 0, td); /* race to get the inode */ 570 cnp->cn_flags |= CNP_PDIRUNLOCK; 571 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) { 572 if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, td) == 0) 573 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 574 return (error); 575 } 576 if (lockparent) { 577 if ((error = vn_lock(pdp, LK_EXCLUSIVE, td)) != 0) { 578 vput(tdp); 579 return (error); 580 } 581 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 582 } 583 *vpp = tdp; 584 } else if (dp->i_number == dp->i_ino) { 585 vref(vdp); /* we want ourself, ie "." */ 586 *vpp = vdp; 587 } else { 588 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 589 if (error) 590 return (error); 591 if (!lockparent) { 592 VOP_UNLOCK(pdp, 0, td); 593 cnp->cn_flags |= CNP_PDIRUNLOCK; 594 } 595 *vpp = tdp; 596 } 597 return (0); 598 } 599 600 void 601 ufs_dirbad(struct inode *ip, doff_t offset, char *how) 602 { 603 struct mount *mp; 604 605 mp = ITOV(ip)->v_mount; 606 (void)printf("%s: bad dir ino %lu at offset %ld: %s\n", 607 mp->mnt_stat.f_mntfromname, (u_long)ip->i_number, (long)offset, how); 608 if ((mp->mnt_flag & MNT_RDONLY) == 0) 609 panic("ufs_dirbad: bad dir"); 610 } 611 612 /* 613 * Do consistency checking on a directory entry: 614 * record length must be multiple of 4 615 * entry must fit in rest of its DIRBLKSIZ block 616 * record must be large enough to contain entry 617 * name is not longer than MAXNAMLEN 618 * name must be as long as advertised, and null terminated 619 */ 620 int 621 ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock) 622 { 623 int i; 624 int namlen; 625 626 # if (BYTE_ORDER == LITTLE_ENDIAN) 627 if (OFSFMT(dp)) 628 namlen = ep->d_type; 629 else 630 namlen = ep->d_namlen; 631 # else 632 namlen = ep->d_namlen; 633 # endif 634 if ((ep->d_reclen & 0x3) != 0 || 635 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 636 ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { 637 /*return (1); */ 638 printf("First bad\n"); 639 goto bad; 640 } 641 if (ep->d_ino == 0) 642 return (0); 643 for (i = 0; i < namlen; i++) 644 if (ep->d_name[i] == '\0') { 645 /*return (1); */ 646 printf("Second bad\n"); 647 goto bad; 648 } 649 if (ep->d_name[i]) 650 goto bad; 651 return (0); 652 bad: 653 return (1); 654 } 655 656 /* 657 * Construct a new directory entry after a call to namei, using the 658 * parameters that it left in the componentname argument cnp. The 659 * argument ip is the inode to which the new directory entry will refer. 660 */ 661 void 662 ufs_makedirentry(struct inode *ip, struct componentname *cnp, 663 struct direct *newdirp) 664 { 665 666 newdirp->d_ino = ip->i_number; 667 newdirp->d_namlen = cnp->cn_namelen; 668 bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); 669 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 670 newdirp->d_type = IFTODT(ip->i_mode); 671 else { 672 newdirp->d_type = 0; 673 # if (BYTE_ORDER == LITTLE_ENDIAN) 674 { u_char tmp = newdirp->d_namlen; 675 newdirp->d_namlen = newdirp->d_type; 676 newdirp->d_type = tmp; } 677 # endif 678 } 679 } 680 681 /* 682 * Write a directory entry after a call to namei, using the parameters 683 * that it left in the directory inode. The argument dirp is the new directory 684 * entry contents. Dvp is a pointer to the directory to be written, 685 * which was left locked by namei. Remaining parameters (dp->i_offset, 686 * dp->i_count) indicate how the space for the new entry is to be obtained. 687 * Non-null bp indicates that a directory is being created (for the 688 * soft dependency code). 689 */ 690 int 691 ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, 692 struct componentname *cnp, struct buf *newdirbp) 693 { 694 struct ucred *cred; 695 struct thread *td = curthread; /* XXX */ 696 int newentrysize; 697 struct inode *dp; 698 struct buf *bp; 699 uint dsize; 700 struct direct *ep, *nep; 701 int error, ret, blkoff, loc, spacefree, flags; 702 char *dirbuf; 703 704 KKASSERT(td->td_proc); /* YYY use/require cred passed in cnp? */ 705 cred = td->td_proc->p_ucred; 706 707 dp = VTOI(dvp); 708 newentrysize = DIRSIZ(OFSFMT(dvp), dirp); 709 710 if (dp->i_count == 0) { 711 /* 712 * If dp->i_count is 0, then namei could find no 713 * space in the directory. Here, dp->i_offset will 714 * be on a directory block boundary and we will write the 715 * new entry into a fresh block. 716 */ 717 if (dp->i_offset & (DIRBLKSIZ - 1)) 718 panic("ufs_direnter: newblk"); 719 flags = B_CLRBUF; 720 if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) 721 flags |= B_SYNC; 722 if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, 723 cred, flags, &bp)) != 0) { 724 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 725 bdwrite(newdirbp); 726 return (error); 727 } 728 dp->i_size = dp->i_offset + DIRBLKSIZ; 729 dp->i_flag |= IN_CHANGE | IN_UPDATE; 730 vnode_pager_setsize(dvp, (u_long)dp->i_size); 731 dirp->d_reclen = DIRBLKSIZ; 732 blkoff = dp->i_offset & 733 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); 734 bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); 735 #ifdef UFS_DIRHASH 736 if (dp->i_dirhash != NULL) { 737 ufsdirhash_newblk(dp, dp->i_offset); 738 ufsdirhash_add(dp, dirp, dp->i_offset); 739 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 740 dp->i_offset); 741 } 742 #endif 743 if (DOINGSOFTDEP(dvp)) { 744 /* 745 * Ensure that the entire newly allocated block is a 746 * valid directory so that future growth within the 747 * block does not have to ensure that the block is 748 * written before the inode. 749 */ 750 blkoff += DIRBLKSIZ; 751 while (blkoff < bp->b_bcount) { 752 ((struct direct *) 753 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 754 blkoff += DIRBLKSIZ; 755 } 756 softdep_setup_directory_add(bp, dp, dp->i_offset, 757 dirp->d_ino, newdirbp); 758 bdwrite(bp); 759 return (UFS_UPDATE(dvp, 0)); 760 } 761 if (DOINGASYNC(dvp)) { 762 bdwrite(bp); 763 return (UFS_UPDATE(dvp, 0)); 764 } 765 error = VOP_BWRITE(bp->b_vp, bp); 766 ret = UFS_UPDATE(dvp, 1); 767 if (error == 0) 768 return (ret); 769 return (error); 770 } 771 772 /* 773 * If dp->i_count is non-zero, then namei found space for the new 774 * entry in the range dp->i_offset to dp->i_offset + dp->i_count 775 * in the directory. To use this space, we may have to compact 776 * the entries located there, by copying them together towards the 777 * beginning of the block, leaving the free space in one usable 778 * chunk at the end. 779 */ 780 781 /* 782 * Increase size of directory if entry eats into new space. 783 * This should never push the size past a new multiple of 784 * DIRBLKSIZE. 785 * 786 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 787 */ 788 if (dp->i_offset + dp->i_count > dp->i_size) 789 dp->i_size = dp->i_offset + dp->i_count; 790 /* 791 * Get the block containing the space for the new directory entry. 792 */ 793 error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); 794 if (error) { 795 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 796 bdwrite(newdirbp); 797 return (error); 798 } 799 /* 800 * Find space for the new entry. In the simple case, the entry at 801 * offset base will have the space. If it does not, then namei 802 * arranged that compacting the region dp->i_offset to 803 * dp->i_offset + dp->i_count would yield the space. 804 */ 805 ep = (struct direct *)dirbuf; 806 dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; 807 spacefree = ep->d_reclen - dsize; 808 for (loc = ep->d_reclen; loc < dp->i_count; ) { 809 nep = (struct direct *)(dirbuf + loc); 810 811 /* Trim the existing slot (NB: dsize may be zero). */ 812 ep->d_reclen = dsize; 813 ep = (struct direct *)((char *)ep + dsize); 814 815 /* Read nep->d_reclen now as the bcopy() may clobber it. */ 816 loc += nep->d_reclen; 817 if (nep->d_ino == 0) { 818 /* 819 * A mid-block unused entry. Such entries are 820 * never created by the kernel, but fsck_ffs 821 * can create them (and it doesn't fix them). 822 * 823 * Add up the free space, and initialise the 824 * relocated entry since we don't bcopy it. 825 */ 826 spacefree += nep->d_reclen; 827 ep->d_ino = 0; 828 dsize = 0; 829 continue; 830 } 831 dsize = DIRSIZ(OFSFMT(dvp), nep); 832 spacefree += nep->d_reclen - dsize; 833 #ifdef UFS_DIRHASH 834 if (dp->i_dirhash != NULL) 835 ufsdirhash_move(dp, nep, 836 dp->i_offset + ((char *)nep - dirbuf), 837 dp->i_offset + ((char *)ep - dirbuf)); 838 #endif 839 if (DOINGSOFTDEP(dvp)) 840 softdep_change_directoryentry_offset(dp, dirbuf, 841 (caddr_t)nep, (caddr_t)ep, dsize); 842 else 843 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 844 } 845 /* 846 * Here, `ep' points to a directory entry containing `dsize' in-use 847 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 848 * then the entry is completely unused (dsize == 0). The value 849 * of ep->d_reclen is always indeterminate. 850 * 851 * Update the pointer fields in the previous entry (if any), 852 * copy in the new entry, and write out the block. 853 */ 854 if (ep->d_ino == 0 || 855 (ep->d_ino == WINO && 856 bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 857 if (spacefree + dsize < newentrysize) 858 panic("ufs_direnter: compact1"); 859 dirp->d_reclen = spacefree + dsize; 860 } else { 861 if (spacefree < newentrysize) 862 panic("ufs_direnter: compact2"); 863 dirp->d_reclen = spacefree; 864 ep->d_reclen = dsize; 865 ep = (struct direct *)((char *)ep + dsize); 866 } 867 #ifdef UFS_DIRHASH 868 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 869 dirp->d_reclen == spacefree)) 870 ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); 871 #endif 872 bcopy((caddr_t)dirp, (caddr_t)ep, (uint)newentrysize); 873 #ifdef UFS_DIRHASH 874 if (dp->i_dirhash != NULL) 875 ufsdirhash_checkblock(dp, dirbuf - 876 (dp->i_offset & (DIRBLKSIZ - 1)), 877 dp->i_offset & ~(DIRBLKSIZ - 1)); 878 #endif 879 880 if (DOINGSOFTDEP(dvp)) { 881 softdep_setup_directory_add(bp, dp, 882 dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); 883 bdwrite(bp); 884 } else { 885 if (DOINGASYNC(dvp)) { 886 bdwrite(bp); 887 error = 0; 888 } else { 889 error = bowrite(bp); 890 } 891 } 892 dp->i_flag |= IN_CHANGE | IN_UPDATE; 893 /* 894 * If all went well, and the directory can be shortened, proceed 895 * with the truncation. Note that we have to unlock the inode for 896 * the entry that we just entered, as the truncation may need to 897 * lock other inodes which can lead to deadlock if we also hold a 898 * lock on the newly entered node. 899 */ 900 if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { 901 if (tvp != NULL) 902 VOP_UNLOCK(tvp, 0, td); 903 #ifdef UFS_DIRHASH 904 if (dp->i_dirhash != NULL) 905 ufsdirhash_dirtrunc(dp, dp->i_endoff); 906 #endif 907 (void)UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cred, td); 908 if (tvp != NULL) 909 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, td); 910 } 911 return (error); 912 } 913 914 /* 915 * Remove a directory entry after a call to namei, using 916 * the parameters which it left in the directory inode. The entry 917 * dp->i_offset contains the offset into the directory of the 918 * entry to be eliminated. The dp->i_count field contains the 919 * size of the previous record in the directory. If this 920 * is 0, the first entry is being deleted, so we need only 921 * zero the inode number to mark the entry as free. If the 922 * entry is not the first in the directory, we must reclaim 923 * the space of the now empty record by adding the record size 924 * to the size of the previous entry. 925 */ 926 int 927 ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) 928 { 929 struct inode *dp; 930 struct direct *ep; 931 struct buf *bp; 932 int error; 933 934 dp = VTOI(dvp); 935 936 if (flags & CNP_DOWHITEOUT) { 937 /* 938 * Whiteout entry: set d_ino to WINO. 939 */ 940 if ((error = 941 UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) 942 return (error); 943 ep->d_ino = WINO; 944 ep->d_type = DT_WHT; 945 goto out; 946 } 947 948 if ((error = UFS_BLKATOFF(dvp, 949 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) 950 return (error); 951 #ifdef UFS_DIRHASH 952 /* 953 * Remove the dirhash entry. This is complicated by the fact 954 * that `ep' is the previous entry when dp->i_count != 0. 955 */ 956 if (dp->i_dirhash != NULL) 957 ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : 958 (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); 959 #endif 960 if (dp->i_count == 0) { 961 /* 962 * First entry in block: set d_ino to zero. 963 */ 964 ep->d_ino = 0; 965 } else { 966 /* 967 * Collapse new free space into previous entry. 968 */ 969 ep->d_reclen += dp->i_reclen; 970 } 971 #ifdef UFS_DIRHASH 972 if (dp->i_dirhash != NULL) 973 ufsdirhash_checkblock(dp, (char *)ep - 974 ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), 975 dp->i_offset & ~(DIRBLKSIZ - 1)); 976 #endif 977 out: 978 if (DOINGSOFTDEP(dvp)) { 979 if (ip) { 980 ip->i_effnlink--; 981 softdep_change_linkcnt(ip); 982 softdep_setup_remove(bp, dp, ip, isrmdir); 983 } 984 if (softdep_slowdown(dvp)) { 985 error = VOP_BWRITE(bp->b_vp, bp); 986 } else { 987 bdwrite(bp); 988 error = 0; 989 } 990 } else { 991 if (ip) { 992 ip->i_effnlink--; 993 ip->i_nlink--; 994 ip->i_flag |= IN_CHANGE; 995 } 996 if (flags & CNP_DOWHITEOUT) 997 error = VOP_BWRITE(bp->b_vp, bp); 998 else if (DOINGASYNC(dvp) && dp->i_count != 0) { 999 bdwrite(bp); 1000 error = 0; 1001 } else 1002 error = bowrite(bp); 1003 } 1004 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1005 return (error); 1006 } 1007 1008 /* 1009 * Rewrite an existing directory entry to point at the inode 1010 * supplied. The parameters describing the directory entry are 1011 * set up by a call to namei. 1012 */ 1013 int 1014 ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype, 1015 int isrmdir) 1016 { 1017 struct buf *bp; 1018 struct direct *ep; 1019 struct vnode *vdp = ITOV(dp); 1020 int error; 1021 1022 error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); 1023 if (error) 1024 return (error); 1025 ep->d_ino = newinum; 1026 if (!OFSFMT(vdp)) 1027 ep->d_type = newtype; 1028 oip->i_effnlink--; 1029 if (DOINGSOFTDEP(vdp)) { 1030 softdep_change_linkcnt(oip); 1031 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); 1032 bdwrite(bp); 1033 } else { 1034 oip->i_nlink--; 1035 oip->i_flag |= IN_CHANGE; 1036 if (DOINGASYNC(vdp)) { 1037 bdwrite(bp); 1038 error = 0; 1039 } else { 1040 error = bowrite(bp); 1041 } 1042 } 1043 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1044 return (error); 1045 } 1046 1047 /* 1048 * Check if a directory is empty or not. 1049 * Inode supplied must be locked. 1050 * 1051 * Using a struct dirtemplate here is not precisely 1052 * what we want, but better than using a struct direct. 1053 * 1054 * NB: does not handle corrupted directories. 1055 */ 1056 int 1057 ufs_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred) 1058 { 1059 off_t off; 1060 struct dirtemplate dbuf; 1061 struct direct *dp = (struct direct *)&dbuf; 1062 int error, count, namlen; 1063 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1064 1065 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1066 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 1067 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); 1068 /* 1069 * Since we read MINDIRSIZ, residual must 1070 * be 0 unless we're at end of file. 1071 */ 1072 if (error || count != 0) 1073 return (0); 1074 /* avoid infinite loops */ 1075 if (dp->d_reclen == 0) 1076 return (0); 1077 /* skip empty entries */ 1078 if (dp->d_ino == 0 || dp->d_ino == WINO) 1079 continue; 1080 /* accept only "." and ".." */ 1081 # if (BYTE_ORDER == LITTLE_ENDIAN) 1082 if (OFSFMT(ITOV(ip))) 1083 namlen = dp->d_type; 1084 else 1085 namlen = dp->d_namlen; 1086 # else 1087 namlen = dp->d_namlen; 1088 # endif 1089 if (namlen > 2) 1090 return (0); 1091 if (dp->d_name[0] != '.') 1092 return (0); 1093 /* 1094 * At this point namlen must be 1 or 2. 1095 * 1 implies ".", 2 implies ".." if second 1096 * char is also "." 1097 */ 1098 if (namlen == 1 && dp->d_ino == ip->i_number) 1099 continue; 1100 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1101 continue; 1102 return (0); 1103 } 1104 return (1); 1105 } 1106 1107 /* 1108 * Check if source directory is in the path of the target directory. 1109 * Target is supplied locked, source is unlocked. 1110 * The target is always vput before returning. 1111 */ 1112 int 1113 ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) 1114 { 1115 struct vnode *vp; 1116 int error, rootino, namlen; 1117 struct dirtemplate dirbuf; 1118 1119 vp = ITOV(target); 1120 if (target->i_number == source->i_number) { 1121 error = EEXIST; 1122 goto out; 1123 } 1124 rootino = ROOTINO; 1125 error = 0; 1126 if (target->i_number == rootino) 1127 goto out; 1128 1129 for (;;) { 1130 if (vp->v_type != VDIR) { 1131 error = ENOTDIR; 1132 break; 1133 } 1134 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1135 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1136 IO_NODELOCKED, cred, (int *)0, NULL); 1137 if (error != 0) 1138 break; 1139 # if (BYTE_ORDER == LITTLE_ENDIAN) 1140 if (OFSFMT(vp)) 1141 namlen = dirbuf.dotdot_type; 1142 else 1143 namlen = dirbuf.dotdot_namlen; 1144 # else 1145 namlen = dirbuf.dotdot_namlen; 1146 # endif 1147 if (namlen != 2 || 1148 dirbuf.dotdot_name[0] != '.' || 1149 dirbuf.dotdot_name[1] != '.') { 1150 error = ENOTDIR; 1151 break; 1152 } 1153 if (dirbuf.dotdot_ino == source->i_number) { 1154 error = EINVAL; 1155 break; 1156 } 1157 if (dirbuf.dotdot_ino == rootino) 1158 break; 1159 vput(vp); 1160 error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp); 1161 if (error) { 1162 vp = NULL; 1163 break; 1164 } 1165 } 1166 1167 out: 1168 if (error == ENOTDIR) 1169 printf("checkpath: .. not a directory\n"); 1170 if (vp != NULL) 1171 vput(vp); 1172 return (error); 1173 } 1174