1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 39 * $FreeBSD: src/sys/ufs/ufs/ufs_lookup.c,v 1.33.2.7 2001/09/22 19:22:13 iedowse Exp $ 40 * $DragonFly: src/sys/vfs/ufs/ufs_lookup.c,v 1.13 2004/07/18 19:43:48 drhodus Exp $ 41 */ 42 43 #include "opt_ufs.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/buf.h> 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/stat.h> 52 #include <sys/mount.h> 53 #include <sys/vnode.h> 54 #include <sys/sysctl.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> 58 59 #include "quota.h" 60 #include "inode.h" 61 #include "dir.h" 62 #ifdef UFS_DIRHASH 63 #include "dirhash.h" 64 #endif 65 #include "ufsmount.h" 66 #include "ufs_extern.h" 67 68 #ifdef DIAGNOSTIC 69 int dirchk = 1; 70 #else 71 int dirchk = 0; 72 #endif 73 74 SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); 75 76 /* true if old FS format...*/ 77 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 78 79 /* 80 * Convert a component of a pathname into a pointer to a locked inode. 81 * This is a very central and rather complicated routine. 82 * If the filesystem is not maintained in a strict tree hierarchy, 83 * this can result in a deadlock situation (see comments in code below). 84 * 85 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 86 * on whether the name is to be looked up, created, renamed, or deleted. 87 * When CREATE, RENAME, or DELETE is specified, information usable in 88 * creating, renaming, or deleting a directory entry may be calculated. 89 * If flag has LOCKPARENT or'ed into it and the target of the pathname 90 * exists, lookup returns both the target and its parent directory locked. 91 * When creating or renaming and LOCKPARENT is specified, the target may 92 * not be ".". When deleting and LOCKPARENT is specified, the target may 93 * be "."., but the caller must check to ensure it does an vrele and vput 94 * instead of two vputs. 95 * 96 * This routine is actually used as VOP_CACHEDLOOKUP method, and the 97 * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP 98 * method. 99 * 100 * vfs_cache_lookup() performs the following for us: 101 * check that it is a directory 102 * check accessibility of directory 103 * check for modification attempts on read-only mounts 104 * if name found in cache 105 * if at end of path and deleting or creating 106 * drop it 107 * else 108 * return name. 109 * return VOP_CACHEDLOOKUP() 110 * 111 * Overall outline of ufs_lookup: 112 * 113 * search for name in directory, to found or notfound 114 * notfound: 115 * if creating, return locked directory, leaving info on available slots 116 * else return error 117 * found: 118 * if at end of path and deleting, return information to allow delete 119 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 120 * inode and return info to allow rewrite 121 * if not at end, add name to cache; if at end and neither creating 122 * nor deleting, add name to cache 123 * 124 * ufs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 125 * struct componentname *a_cnp) 126 */ 127 int 128 ufs_lookup(struct vop_cachedlookup_args *ap) 129 { 130 struct vnode *vdp; /* vnode for directory being searched */ 131 struct inode *dp; /* inode for directory being searched */ 132 struct buf *bp; /* a buffer of directory entries */ 133 struct direct *ep; /* the current directory entry */ 134 int entryoffsetinblock; /* offset of ep in bp's buffer */ 135 enum {NONE, COMPACT, FOUND} slotstatus; 136 doff_t slotoffset; /* offset of area with free space */ 137 int slotsize; /* size of area at slotoffset */ 138 int slotfreespace; /* amount of space free in slot */ 139 int slotneeded; /* size of the entry we're seeking */ 140 int numdirpasses; /* strategy for directory search */ 141 doff_t endsearch; /* offset to end directory search */ 142 doff_t prevoff; /* prev entry dp->i_offset */ 143 struct vnode *pdp; /* saved dp during symlink work */ 144 struct vnode *tdp; /* returned by VFS_VGET */ 145 doff_t enduseful; /* pointer past last used dir slot */ 146 u_long bmask; /* block offset mask */ 147 int lockparent; /* 1 => lockparent flag is set */ 148 int wantparent; /* 1 => wantparent or lockparent flag */ 149 int namlen, error; 150 struct vnode **vpp = ap->a_vpp; 151 struct componentname *cnp = ap->a_cnp; 152 struct ucred *cred = cnp->cn_cred; 153 int flags = cnp->cn_flags; 154 int nameiop = cnp->cn_nameiop; 155 struct thread *td = cnp->cn_td; 156 globaldata_t gd = mycpu; 157 158 bp = NULL; 159 slotoffset = -1; 160 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 161 /* 162 * XXX there was a soft-update diff about this I couldn't merge. 163 * I think this was the equiv. 164 */ 165 *vpp = NULL; 166 167 vdp = ap->a_dvp; 168 dp = VTOI(vdp); 169 lockparent = flags & CNP_LOCKPARENT; 170 wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT); 171 172 /* 173 * We now have a segment name to search for, and a directory to search. 174 * 175 * Suppress search for slots unless creating 176 * file and at end of pathname, in which case 177 * we watch for a place to put the new file in 178 * case it doesn't already exist. 179 */ 180 slotstatus = FOUND; 181 slotfreespace = slotsize = slotneeded = 0; 182 if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) && 183 (flags & CNP_ISLASTCN)) { 184 slotstatus = NONE; 185 slotneeded = DIRECTSIZ(cnp->cn_namelen); 186 } 187 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 188 189 #ifdef UFS_DIRHASH 190 /* 191 * Use dirhash for fast operations on large directories. The logic 192 * to determine whether to hash the directory is contained within 193 * ufsdirhash_build(); a zero return means that it decided to hash 194 * this directory and it successfully built up the hash table. 195 */ 196 if (ufsdirhash_build(dp) == 0) { 197 /* Look for a free slot if needed. */ 198 enduseful = dp->i_size; 199 if (slotstatus != FOUND) { 200 slotoffset = ufsdirhash_findfree(dp, slotneeded, 201 &slotsize); 202 if (slotoffset >= 0) { 203 slotstatus = COMPACT; 204 enduseful = ufsdirhash_enduseful(dp); 205 if (enduseful < 0) 206 enduseful = dp->i_size; 207 } 208 } 209 /* Look up the component. */ 210 numdirpasses = 1; 211 entryoffsetinblock = 0; /* silence compiler warning */ 212 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 213 &dp->i_offset, &bp, nameiop == NAMEI_DELETE ? &prevoff : NULL)) { 214 case 0: 215 ep = (struct direct *)((char *)bp->b_data + 216 (dp->i_offset & bmask)); 217 goto foundentry; 218 case ENOENT: 219 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 220 goto notfound; 221 default: 222 /* Something failed; just do a linear search. */ 223 break; 224 } 225 } 226 #endif /* UFS_DIRHASH */ 227 /* 228 * If there is cached information on a previous search of 229 * this directory, pick up where we last left off. 230 * We cache only lookups as these are the most common 231 * and have the greatest payoff. Caching CREATE has little 232 * benefit as it usually must search the entire directory 233 * to determine that the entry does not exist. Caching the 234 * location of the last DELETE or RENAME has not reduced 235 * profiling time and hence has been removed in the interest 236 * of simplicity. 237 */ 238 if (nameiop != NAMEI_LOOKUP || dp->i_diroff == 0 || 239 dp->i_diroff >= dp->i_size) { 240 entryoffsetinblock = 0; 241 dp->i_offset = 0; 242 numdirpasses = 1; 243 } else { 244 dp->i_offset = dp->i_diroff; 245 if ((entryoffsetinblock = dp->i_offset & bmask) && 246 (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) 247 return (error); 248 numdirpasses = 2; 249 gd->gd_nchstats->ncs_2passes++; 250 } 251 prevoff = dp->i_offset; 252 endsearch = roundup2(dp->i_size, DIRBLKSIZ); 253 enduseful = 0; 254 255 searchloop: 256 while (dp->i_offset < endsearch) { 257 /* 258 * If necessary, get the next directory block. 259 */ 260 if ((dp->i_offset & bmask) == 0) { 261 if (bp != NULL) 262 brelse(bp); 263 error = 264 UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp); 265 if (error) 266 return (error); 267 entryoffsetinblock = 0; 268 } 269 /* 270 * If still looking for a slot, and at a DIRBLKSIZE 271 * boundary, have to start looking for free space again. 272 */ 273 if (slotstatus == NONE && 274 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 275 slotoffset = -1; 276 slotfreespace = 0; 277 } 278 /* 279 * Get pointer to next entry. 280 * Full validation checks are slow, so we only check 281 * enough to insure forward progress through the 282 * directory. Complete checks can be run by patching 283 * "dirchk" to be true. 284 */ 285 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 286 if (ep->d_reclen == 0 || ep->d_reclen > 287 DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 288 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 289 int i; 290 291 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 292 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 293 dp->i_offset += i; 294 entryoffsetinblock += i; 295 continue; 296 } 297 298 /* 299 * If an appropriate sized slot has not yet been found, 300 * check to see if one is available. Also accumulate space 301 * in the current block so that we can determine if 302 * compaction is viable. 303 */ 304 if (slotstatus != FOUND) { 305 int size = ep->d_reclen; 306 307 if (ep->d_ino != 0) 308 size -= DIRSIZ(OFSFMT(vdp), ep); 309 if (size > 0) { 310 if (size >= slotneeded) { 311 slotstatus = FOUND; 312 slotoffset = dp->i_offset; 313 slotsize = ep->d_reclen; 314 } else if (slotstatus == NONE) { 315 slotfreespace += size; 316 if (slotoffset == -1) 317 slotoffset = dp->i_offset; 318 if (slotfreespace >= slotneeded) { 319 slotstatus = COMPACT; 320 slotsize = dp->i_offset + 321 ep->d_reclen - slotoffset; 322 } 323 } 324 } 325 } 326 327 /* 328 * Check for a name match. 329 */ 330 if (ep->d_ino) { 331 # if (BYTE_ORDER == LITTLE_ENDIAN) 332 if (OFSFMT(vdp)) 333 namlen = ep->d_type; 334 else 335 namlen = ep->d_namlen; 336 # else 337 namlen = ep->d_namlen; 338 # endif 339 if (namlen == cnp->cn_namelen && 340 (cnp->cn_nameptr[0] == ep->d_name[0]) && 341 !bcmp(cnp->cn_nameptr, ep->d_name, 342 (unsigned)namlen)) { 343 #ifdef UFS_DIRHASH 344 foundentry: 345 #endif 346 /* 347 * Save directory entry's inode number and 348 * reclen in ndp->ni_ufs area, and release 349 * directory buffer. 350 */ 351 if (vdp->v_mount->mnt_maxsymlinklen > 0 && 352 ep->d_type == DT_WHT) { 353 slotstatus = FOUND; 354 slotoffset = dp->i_offset; 355 slotsize = ep->d_reclen; 356 dp->i_reclen = slotsize; 357 enduseful = dp->i_size; 358 ap->a_cnp->cn_flags |= CNP_ISWHITEOUT; 359 numdirpasses--; 360 goto notfound; 361 } 362 dp->i_ino = ep->d_ino; 363 dp->i_reclen = ep->d_reclen; 364 goto found; 365 } 366 } 367 prevoff = dp->i_offset; 368 dp->i_offset += ep->d_reclen; 369 entryoffsetinblock += ep->d_reclen; 370 if (ep->d_ino) 371 enduseful = dp->i_offset; 372 } 373 notfound: 374 /* 375 * If we started in the middle of the directory and failed 376 * to find our target, we must check the beginning as well. 377 */ 378 if (numdirpasses == 2) { 379 numdirpasses--; 380 dp->i_offset = 0; 381 endsearch = dp->i_diroff; 382 goto searchloop; 383 } 384 if (bp != NULL) 385 brelse(bp); 386 /* 387 * If creating, and at end of pathname and current 388 * directory has not been removed, then can consider 389 * allowing file to be created. 390 */ 391 if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME || 392 (nameiop == NAMEI_DELETE && 393 (ap->a_cnp->cn_flags & CNP_DOWHITEOUT) && 394 (ap->a_cnp->cn_flags & CNP_ISWHITEOUT))) && 395 (flags & CNP_ISLASTCN) && dp->i_effnlink != 0) { 396 /* 397 * Access for write is interpreted as allowing 398 * creation of files in the directory. 399 */ 400 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td); 401 if (error) 402 return (error); 403 /* 404 * Return an indication of where the new directory 405 * entry should be put. If we didn't find a slot, 406 * then set dp->i_count to 0 indicating 407 * that the new slot belongs at the end of the 408 * directory. If we found a slot, then the new entry 409 * can be put in the range from dp->i_offset to 410 * dp->i_offset + dp->i_count. 411 */ 412 if (slotstatus == NONE) { 413 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 414 dp->i_count = 0; 415 enduseful = dp->i_offset; 416 } else if (nameiop == NAMEI_DELETE) { 417 dp->i_offset = slotoffset; 418 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 419 dp->i_count = 0; 420 else 421 dp->i_count = dp->i_offset - prevoff; 422 } else { 423 dp->i_offset = slotoffset; 424 dp->i_count = slotsize; 425 if (enduseful < slotoffset + slotsize) 426 enduseful = slotoffset + slotsize; 427 } 428 dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); 429 dp->i_flag |= IN_CHANGE | IN_UPDATE; 430 /* 431 * We return with the directory locked, so that 432 * the parameters we set up above will still be 433 * valid if we actually decide to do a direnter(). 434 * We return ni_vp == NULL to indicate that the entry 435 * does not currently exist; we leave a pointer to 436 * the (locked) directory inode in ndp->ni_dvp. 437 * The pathname buffer is saved so that the name 438 * can be obtained later. 439 * 440 * NB - if the directory is unlocked, then this 441 * information cannot be used. 442 */ 443 cnp->cn_flags |= CNP_SAVENAME; 444 if (!lockparent) { 445 VOP_UNLOCK(vdp, NULL, 0, td); 446 cnp->cn_flags |= CNP_PDIRUNLOCK; 447 } 448 return (EJUSTRETURN); 449 } 450 /* 451 * Insert name into cache (as non-existent) if appropriate. 452 */ 453 if ((cnp->cn_flags & CNP_MAKEENTRY) && nameiop != NAMEI_CREATE) 454 cache_enter(vdp, NCPNULL, *vpp, cnp); 455 return (ENOENT); 456 457 found: 458 if (numdirpasses == 2) 459 gd->gd_nchstats->ncs_pass2++; 460 /* 461 * Check that directory length properly reflects presence 462 * of this entry. 463 */ 464 if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { 465 ufs_dirbad(dp, dp->i_offset, "i_size too small"); 466 dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep); 467 dp->i_flag |= IN_CHANGE | IN_UPDATE; 468 } 469 brelse(bp); 470 471 /* 472 * Found component in pathname. 473 * If the final component of path name, save information 474 * in the cache as to where the entry was found. 475 */ 476 if ((flags & CNP_ISLASTCN) && nameiop == NAMEI_LOOKUP) 477 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 478 479 /* 480 * If deleting, and at end of pathname, return 481 * parameters which can be used to remove file. 482 * If the wantparent flag isn't set, we return only 483 * the directory (in ndp->ni_dvp), otherwise we go 484 * on and lock the inode, being careful with ".". 485 */ 486 if (nameiop == NAMEI_DELETE && (flags & CNP_ISLASTCN)) { 487 /* 488 * Write access to directory required to delete files. 489 */ 490 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td); 491 if (error) 492 return (error); 493 /* 494 * Return pointer to current entry in dp->i_offset, 495 * and distance past previous entry (if there 496 * is a previous entry in this block) in dp->i_count. 497 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 498 */ 499 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 500 dp->i_count = 0; 501 else 502 dp->i_count = dp->i_offset - prevoff; 503 if (dp->i_number == dp->i_ino) { 504 vref(vdp); 505 *vpp = vdp; 506 return (0); 507 } 508 if (flags & CNP_ISDOTDOT) 509 VOP_UNLOCK(vdp, NULL, 0, td);/* race to get the inode */ 510 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 511 if (flags & CNP_ISDOTDOT) { 512 if (vn_lock(vdp, NULL, LK_EXCLUSIVE | LK_RETRY, td) != 0) 513 cnp->cn_flags |= CNP_PDIRUNLOCK; 514 } 515 if (error) 516 return (error); 517 /* 518 * If directory is "sticky", then user must own 519 * the directory, or the file in it, else she 520 * may not delete it (unless she's root). This 521 * implements append-only directories. 522 */ 523 if ((dp->i_mode & ISVTX) && 524 cred->cr_uid != 0 && 525 cred->cr_uid != dp->i_uid && 526 VTOI(tdp)->i_uid != cred->cr_uid) { 527 vput(tdp); 528 return (EPERM); 529 } 530 *vpp = tdp; 531 if (!lockparent) { 532 VOP_UNLOCK(vdp, NULL, 0, td); 533 cnp->cn_flags |= CNP_PDIRUNLOCK; 534 } 535 return (0); 536 } 537 538 /* 539 * If rewriting (RENAME), return the inode and the 540 * information required to rewrite the present directory 541 * Must get inode of directory entry to verify it's a 542 * regular file, or empty directory. 543 */ 544 if (nameiop == NAMEI_RENAME && wantparent && (flags & CNP_ISLASTCN)) { 545 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_td)) != 0) 546 return (error); 547 /* 548 * Careful about locking second inode. 549 * This can only occur if the target is ".". 550 */ 551 if (dp->i_number == dp->i_ino) 552 return (EISDIR); 553 if (flags & CNP_ISDOTDOT) 554 VOP_UNLOCK(vdp, NULL, 0, td); /* race to get the inode */ 555 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 556 if (flags & CNP_ISDOTDOT) { 557 if (vn_lock(vdp, NULL, LK_EXCLUSIVE | LK_RETRY, td) != 0) 558 cnp->cn_flags |= CNP_PDIRUNLOCK; 559 } 560 if (error) 561 return (error); 562 *vpp = tdp; 563 cnp->cn_flags |= CNP_SAVENAME; 564 if (!lockparent) { 565 VOP_UNLOCK(vdp, NULL, 0, td); 566 cnp->cn_flags |= CNP_PDIRUNLOCK; 567 } 568 return (0); 569 } 570 571 /* 572 * Step through the translation in the name. We do not `vput' the 573 * directory because we may need it again if a symbolic link 574 * is relative to the current directory. Instead we save it 575 * unlocked as "pdp". We must get the target inode before unlocking 576 * the directory to insure that the inode will not be removed 577 * before we get it. We prevent deadlock by always fetching 578 * inodes from the root, moving down the directory tree. Thus 579 * when following backward pointers ".." we must unlock the 580 * parent directory before getting the requested directory. 581 * There is a potential race condition here if both the current 582 * and parent directories are removed before the VFS_VGET for the 583 * inode associated with ".." returns. We hope that this occurs 584 * infrequently since we cannot avoid this race condition without 585 * implementing a sophisticated deadlock detection algorithm. 586 * Note also that this simple deadlock detection scheme will not 587 * work if the filesystem has any hard links other than ".." 588 * that point backwards in the directory structure. 589 */ 590 pdp = vdp; 591 if (flags & CNP_ISDOTDOT) { 592 VOP_UNLOCK(pdp, NULL, 0, td); /* race to get the inode */ 593 cnp->cn_flags |= CNP_PDIRUNLOCK; 594 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) { 595 if (vn_lock(pdp, NULL, LK_EXCLUSIVE | LK_RETRY, td) == 0) 596 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 597 return (error); 598 } 599 if (lockparent && (flags & CNP_ISLASTCN)) { 600 if ((error = vn_lock(pdp, NULL, LK_EXCLUSIVE, td)) != 0) { 601 vput(tdp); 602 return (error); 603 } 604 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 605 } 606 *vpp = tdp; 607 } else if (dp->i_number == dp->i_ino) { 608 vref(vdp); /* we want ourself, ie "." */ 609 *vpp = vdp; 610 } else { 611 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 612 if (error) 613 return (error); 614 if (!lockparent || !(flags & CNP_ISLASTCN)) { 615 VOP_UNLOCK(pdp, NULL, 0, td); 616 cnp->cn_flags |= CNP_PDIRUNLOCK; 617 } 618 *vpp = tdp; 619 } 620 621 /* 622 * Insert name into cache if appropriate. 623 */ 624 if (cnp->cn_flags & CNP_MAKEENTRY) 625 cache_enter(vdp, NCPNULL, *vpp, cnp); 626 return (0); 627 } 628 629 void 630 ufs_dirbad(struct inode *ip, doff_t offset, char *how) 631 { 632 struct mount *mp; 633 634 mp = ITOV(ip)->v_mount; 635 (void)printf("%s: bad dir ino %lu at offset %ld: %s\n", 636 mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how); 637 if ((mp->mnt_flag & MNT_RDONLY) == 0) 638 panic("ufs_dirbad: bad dir"); 639 } 640 641 /* 642 * Do consistency checking on a directory entry: 643 * record length must be multiple of 4 644 * entry must fit in rest of its DIRBLKSIZ block 645 * record must be large enough to contain entry 646 * name is not longer than MAXNAMLEN 647 * name must be as long as advertised, and null terminated 648 */ 649 int 650 ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock) 651 { 652 int i; 653 int namlen; 654 655 # if (BYTE_ORDER == LITTLE_ENDIAN) 656 if (OFSFMT(dp)) 657 namlen = ep->d_type; 658 else 659 namlen = ep->d_namlen; 660 # else 661 namlen = ep->d_namlen; 662 # endif 663 if ((ep->d_reclen & 0x3) != 0 || 664 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 665 ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { 666 /*return (1); */ 667 printf("First bad\n"); 668 goto bad; 669 } 670 if (ep->d_ino == 0) 671 return (0); 672 for (i = 0; i < namlen; i++) 673 if (ep->d_name[i] == '\0') { 674 /*return (1); */ 675 printf("Second bad\n"); 676 goto bad; 677 } 678 if (ep->d_name[i]) 679 goto bad; 680 return (0); 681 bad: 682 return (1); 683 } 684 685 /* 686 * Construct a new directory entry after a call to namei, using the 687 * parameters that it left in the componentname argument cnp. The 688 * argument ip is the inode to which the new directory entry will refer. 689 */ 690 void 691 ufs_makedirentry(struct inode *ip, struct componentname *cnp, 692 struct direct *newdirp) 693 { 694 695 #ifdef DIAGNOSTIC 696 if ((cnp->cn_flags & CNP_SAVENAME) == 0) 697 panic("ufs_makedirentry: missing name"); 698 #endif 699 newdirp->d_ino = ip->i_number; 700 newdirp->d_namlen = cnp->cn_namelen; 701 bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); 702 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 703 newdirp->d_type = IFTODT(ip->i_mode); 704 else { 705 newdirp->d_type = 0; 706 # if (BYTE_ORDER == LITTLE_ENDIAN) 707 { u_char tmp = newdirp->d_namlen; 708 newdirp->d_namlen = newdirp->d_type; 709 newdirp->d_type = tmp; } 710 # endif 711 } 712 } 713 714 /* 715 * Write a directory entry after a call to namei, using the parameters 716 * that it left in nameidata. The argument dirp is the new directory 717 * entry contents. Dvp is a pointer to the directory to be written, 718 * which was left locked by namei. Remaining parameters (dp->i_offset, 719 * dp->i_count) indicate how the space for the new entry is to be obtained. 720 * Non-null bp indicates that a directory is being created (for the 721 * soft dependency code). 722 */ 723 int 724 ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, 725 struct componentname *cnp, struct buf *newdirbp) 726 { 727 struct ucred *cred; 728 struct thread *td = curthread; /* XXX */ 729 int newentrysize; 730 struct inode *dp; 731 struct buf *bp; 732 uint dsize; 733 struct direct *ep, *nep; 734 int error, ret, blkoff, loc, spacefree, flags; 735 char *dirbuf; 736 737 KKASSERT(td->td_proc); /* YYY use/require cred passed in cnp? */ 738 cred = td->td_proc->p_ucred; 739 740 dp = VTOI(dvp); 741 newentrysize = DIRSIZ(OFSFMT(dvp), dirp); 742 743 if (dp->i_count == 0) { 744 /* 745 * If dp->i_count is 0, then namei could find no 746 * space in the directory. Here, dp->i_offset will 747 * be on a directory block boundary and we will write the 748 * new entry into a fresh block. 749 */ 750 if (dp->i_offset & (DIRBLKSIZ - 1)) 751 panic("ufs_direnter: newblk"); 752 flags = B_CLRBUF; 753 if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) 754 flags |= B_SYNC; 755 if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, 756 cred, flags, &bp)) != 0) { 757 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 758 bdwrite(newdirbp); 759 return (error); 760 } 761 dp->i_size = dp->i_offset + DIRBLKSIZ; 762 dp->i_flag |= IN_CHANGE | IN_UPDATE; 763 vnode_pager_setsize(dvp, (u_long)dp->i_size); 764 dirp->d_reclen = DIRBLKSIZ; 765 blkoff = dp->i_offset & 766 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); 767 bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); 768 #ifdef UFS_DIRHASH 769 if (dp->i_dirhash != NULL) { 770 ufsdirhash_newblk(dp, dp->i_offset); 771 ufsdirhash_add(dp, dirp, dp->i_offset); 772 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 773 dp->i_offset); 774 } 775 #endif 776 if (DOINGSOFTDEP(dvp)) { 777 /* 778 * Ensure that the entire newly allocated block is a 779 * valid directory so that future growth within the 780 * block does not have to ensure that the block is 781 * written before the inode. 782 */ 783 blkoff += DIRBLKSIZ; 784 while (blkoff < bp->b_bcount) { 785 ((struct direct *) 786 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 787 blkoff += DIRBLKSIZ; 788 } 789 softdep_setup_directory_add(bp, dp, dp->i_offset, 790 dirp->d_ino, newdirbp); 791 bdwrite(bp); 792 return (UFS_UPDATE(dvp, 0)); 793 } 794 if (DOINGASYNC(dvp)) { 795 bdwrite(bp); 796 return (UFS_UPDATE(dvp, 0)); 797 } 798 error = VOP_BWRITE(bp->b_vp, bp); 799 ret = UFS_UPDATE(dvp, 1); 800 if (error == 0) 801 return (ret); 802 return (error); 803 } 804 805 /* 806 * If dp->i_count is non-zero, then namei found space for the new 807 * entry in the range dp->i_offset to dp->i_offset + dp->i_count 808 * in the directory. To use this space, we may have to compact 809 * the entries located there, by copying them together towards the 810 * beginning of the block, leaving the free space in one usable 811 * chunk at the end. 812 */ 813 814 /* 815 * Increase size of directory if entry eats into new space. 816 * This should never push the size past a new multiple of 817 * DIRBLKSIZE. 818 * 819 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 820 */ 821 if (dp->i_offset + dp->i_count > dp->i_size) 822 dp->i_size = dp->i_offset + dp->i_count; 823 /* 824 * Get the block containing the space for the new directory entry. 825 */ 826 error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); 827 if (error) { 828 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 829 bdwrite(newdirbp); 830 return (error); 831 } 832 /* 833 * Find space for the new entry. In the simple case, the entry at 834 * offset base will have the space. If it does not, then namei 835 * arranged that compacting the region dp->i_offset to 836 * dp->i_offset + dp->i_count would yield the space. 837 */ 838 ep = (struct direct *)dirbuf; 839 dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; 840 spacefree = ep->d_reclen - dsize; 841 for (loc = ep->d_reclen; loc < dp->i_count; ) { 842 nep = (struct direct *)(dirbuf + loc); 843 844 /* Trim the existing slot (NB: dsize may be zero). */ 845 ep->d_reclen = dsize; 846 ep = (struct direct *)((char *)ep + dsize); 847 848 /* Read nep->d_reclen now as the bcopy() may clobber it. */ 849 loc += nep->d_reclen; 850 if (nep->d_ino == 0) { 851 /* 852 * A mid-block unused entry. Such entries are 853 * never created by the kernel, but fsck_ffs 854 * can create them (and it doesn't fix them). 855 * 856 * Add up the free space, and initialise the 857 * relocated entry since we don't bcopy it. 858 */ 859 spacefree += nep->d_reclen; 860 ep->d_ino = 0; 861 dsize = 0; 862 continue; 863 } 864 dsize = DIRSIZ(OFSFMT(dvp), nep); 865 spacefree += nep->d_reclen - dsize; 866 #ifdef UFS_DIRHASH 867 if (dp->i_dirhash != NULL) 868 ufsdirhash_move(dp, nep, 869 dp->i_offset + ((char *)nep - dirbuf), 870 dp->i_offset + ((char *)ep - dirbuf)); 871 #endif 872 if (DOINGSOFTDEP(dvp)) 873 softdep_change_directoryentry_offset(dp, dirbuf, 874 (caddr_t)nep, (caddr_t)ep, dsize); 875 else 876 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 877 } 878 /* 879 * Here, `ep' points to a directory entry containing `dsize' in-use 880 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 881 * then the entry is completely unused (dsize == 0). The value 882 * of ep->d_reclen is always indeterminate. 883 * 884 * Update the pointer fields in the previous entry (if any), 885 * copy in the new entry, and write out the block. 886 */ 887 if (ep->d_ino == 0 || 888 (ep->d_ino == WINO && 889 bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 890 if (spacefree + dsize < newentrysize) 891 panic("ufs_direnter: compact1"); 892 dirp->d_reclen = spacefree + dsize; 893 } else { 894 if (spacefree < newentrysize) 895 panic("ufs_direnter: compact2"); 896 dirp->d_reclen = spacefree; 897 ep->d_reclen = dsize; 898 ep = (struct direct *)((char *)ep + dsize); 899 } 900 #ifdef UFS_DIRHASH 901 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 902 dirp->d_reclen == spacefree)) 903 ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); 904 #endif 905 bcopy((caddr_t)dirp, (caddr_t)ep, (uint)newentrysize); 906 #ifdef UFS_DIRHASH 907 if (dp->i_dirhash != NULL) 908 ufsdirhash_checkblock(dp, dirbuf - 909 (dp->i_offset & (DIRBLKSIZ - 1)), 910 dp->i_offset & ~(DIRBLKSIZ - 1)); 911 #endif 912 913 if (DOINGSOFTDEP(dvp)) { 914 softdep_setup_directory_add(bp, dp, 915 dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); 916 bdwrite(bp); 917 } else { 918 if (DOINGASYNC(dvp)) { 919 bdwrite(bp); 920 error = 0; 921 } else { 922 error = bowrite(bp); 923 } 924 } 925 dp->i_flag |= IN_CHANGE | IN_UPDATE; 926 /* 927 * If all went well, and the directory can be shortened, proceed 928 * with the truncation. Note that we have to unlock the inode for 929 * the entry that we just entered, as the truncation may need to 930 * lock other inodes which can lead to deadlock if we also hold a 931 * lock on the newly entered node. 932 */ 933 if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { 934 if (tvp != NULL) 935 VOP_UNLOCK(tvp, NULL, 0, td); 936 #ifdef UFS_DIRHASH 937 if (dp->i_dirhash != NULL) 938 ufsdirhash_dirtrunc(dp, dp->i_endoff); 939 #endif 940 (void)UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cred, td); 941 if (tvp != NULL) 942 vn_lock(tvp, NULL, LK_EXCLUSIVE | LK_RETRY, td); 943 } 944 return (error); 945 } 946 947 /* 948 * Remove a directory entry after a call to namei, using 949 * the parameters which it left in nameidata. The entry 950 * dp->i_offset contains the offset into the directory of the 951 * entry to be eliminated. The dp->i_count field contains the 952 * size of the previous record in the directory. If this 953 * is 0, the first entry is being deleted, so we need only 954 * zero the inode number to mark the entry as free. If the 955 * entry is not the first in the directory, we must reclaim 956 * the space of the now empty record by adding the record size 957 * to the size of the previous entry. 958 */ 959 int 960 ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) 961 { 962 struct inode *dp; 963 struct direct *ep; 964 struct buf *bp; 965 int error; 966 967 dp = VTOI(dvp); 968 969 if (flags & CNP_DOWHITEOUT) { 970 /* 971 * Whiteout entry: set d_ino to WINO. 972 */ 973 if ((error = 974 UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) 975 return (error); 976 ep->d_ino = WINO; 977 ep->d_type = DT_WHT; 978 goto out; 979 } 980 981 if ((error = UFS_BLKATOFF(dvp, 982 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) 983 return (error); 984 #ifdef UFS_DIRHASH 985 /* 986 * Remove the dirhash entry. This is complicated by the fact 987 * that `ep' is the previous entry when dp->i_count != 0. 988 */ 989 if (dp->i_dirhash != NULL) 990 ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : 991 (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); 992 #endif 993 if (dp->i_count == 0) { 994 /* 995 * First entry in block: set d_ino to zero. 996 */ 997 ep->d_ino = 0; 998 } else { 999 /* 1000 * Collapse new free space into previous entry. 1001 */ 1002 ep->d_reclen += dp->i_reclen; 1003 } 1004 #ifdef UFS_DIRHASH 1005 if (dp->i_dirhash != NULL) 1006 ufsdirhash_checkblock(dp, (char *)ep - 1007 ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), 1008 dp->i_offset & ~(DIRBLKSIZ - 1)); 1009 #endif 1010 out: 1011 if (DOINGSOFTDEP(dvp)) { 1012 if (ip) { 1013 ip->i_effnlink--; 1014 softdep_change_linkcnt(ip); 1015 softdep_setup_remove(bp, dp, ip, isrmdir); 1016 } 1017 if (softdep_slowdown(dvp)) { 1018 error = VOP_BWRITE(bp->b_vp, bp); 1019 } else { 1020 bdwrite(bp); 1021 error = 0; 1022 } 1023 } else { 1024 if (ip) { 1025 ip->i_effnlink--; 1026 ip->i_nlink--; 1027 ip->i_flag |= IN_CHANGE; 1028 } 1029 if (flags & CNP_DOWHITEOUT) 1030 error = VOP_BWRITE(bp->b_vp, bp); 1031 else if (DOINGASYNC(dvp) && dp->i_count != 0) { 1032 bdwrite(bp); 1033 error = 0; 1034 } else 1035 error = bowrite(bp); 1036 } 1037 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1038 return (error); 1039 } 1040 1041 /* 1042 * Rewrite an existing directory entry to point at the inode 1043 * supplied. The parameters describing the directory entry are 1044 * set up by a call to namei. 1045 */ 1046 int 1047 ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype, 1048 int isrmdir) 1049 { 1050 struct buf *bp; 1051 struct direct *ep; 1052 struct vnode *vdp = ITOV(dp); 1053 int error; 1054 1055 error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); 1056 if (error) 1057 return (error); 1058 ep->d_ino = newinum; 1059 if (!OFSFMT(vdp)) 1060 ep->d_type = newtype; 1061 oip->i_effnlink--; 1062 if (DOINGSOFTDEP(vdp)) { 1063 softdep_change_linkcnt(oip); 1064 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); 1065 bdwrite(bp); 1066 } else { 1067 oip->i_nlink--; 1068 oip->i_flag |= IN_CHANGE; 1069 if (DOINGASYNC(vdp)) { 1070 bdwrite(bp); 1071 error = 0; 1072 } else { 1073 error = bowrite(bp); 1074 } 1075 } 1076 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1077 return (error); 1078 } 1079 1080 /* 1081 * Check if a directory is empty or not. 1082 * Inode supplied must be locked. 1083 * 1084 * Using a struct dirtemplate here is not precisely 1085 * what we want, but better than using a struct direct. 1086 * 1087 * NB: does not handle corrupted directories. 1088 */ 1089 int 1090 ufs_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred) 1091 { 1092 off_t off; 1093 struct dirtemplate dbuf; 1094 struct direct *dp = (struct direct *)&dbuf; 1095 int error, count, namlen; 1096 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1097 1098 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1099 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 1100 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, NULL); 1101 /* 1102 * Since we read MINDIRSIZ, residual must 1103 * be 0 unless we're at end of file. 1104 */ 1105 if (error || count != 0) 1106 return (0); 1107 /* avoid infinite loops */ 1108 if (dp->d_reclen == 0) 1109 return (0); 1110 /* skip empty entries */ 1111 if (dp->d_ino == 0 || dp->d_ino == WINO) 1112 continue; 1113 /* accept only "." and ".." */ 1114 # if (BYTE_ORDER == LITTLE_ENDIAN) 1115 if (OFSFMT(ITOV(ip))) 1116 namlen = dp->d_type; 1117 else 1118 namlen = dp->d_namlen; 1119 # else 1120 namlen = dp->d_namlen; 1121 # endif 1122 if (namlen > 2) 1123 return (0); 1124 if (dp->d_name[0] != '.') 1125 return (0); 1126 /* 1127 * At this point namlen must be 1 or 2. 1128 * 1 implies ".", 2 implies ".." if second 1129 * char is also "." 1130 */ 1131 if (namlen == 1 && dp->d_ino == ip->i_number) 1132 continue; 1133 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1134 continue; 1135 return (0); 1136 } 1137 return (1); 1138 } 1139 1140 /* 1141 * Check if source directory is in the path of the target directory. 1142 * Target is supplied locked, source is unlocked. 1143 * The target is always vput before returning. 1144 */ 1145 int 1146 ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) 1147 { 1148 struct vnode *vp; 1149 int error, rootino, namlen; 1150 struct dirtemplate dirbuf; 1151 1152 vp = ITOV(target); 1153 if (target->i_number == source->i_number) { 1154 error = EEXIST; 1155 goto out; 1156 } 1157 rootino = ROOTINO; 1158 error = 0; 1159 if (target->i_number == rootino) 1160 goto out; 1161 1162 for (;;) { 1163 if (vp->v_type != VDIR) { 1164 error = ENOTDIR; 1165 break; 1166 } 1167 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1168 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1169 IO_NODELOCKED, cred, (int *)0, NULL); 1170 if (error != 0) 1171 break; 1172 # if (BYTE_ORDER == LITTLE_ENDIAN) 1173 if (OFSFMT(vp)) 1174 namlen = dirbuf.dotdot_type; 1175 else 1176 namlen = dirbuf.dotdot_namlen; 1177 # else 1178 namlen = dirbuf.dotdot_namlen; 1179 # endif 1180 if (namlen != 2 || 1181 dirbuf.dotdot_name[0] != '.' || 1182 dirbuf.dotdot_name[1] != '.') { 1183 error = ENOTDIR; 1184 break; 1185 } 1186 if (dirbuf.dotdot_ino == source->i_number) { 1187 error = EINVAL; 1188 break; 1189 } 1190 if (dirbuf.dotdot_ino == rootino) 1191 break; 1192 vput(vp); 1193 error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp); 1194 if (error) { 1195 vp = NULL; 1196 break; 1197 } 1198 } 1199 1200 out: 1201 if (error == ENOTDIR) 1202 printf("checkpath: .. not a directory\n"); 1203 if (vp != NULL) 1204 vput(vp); 1205 return (error); 1206 } 1207