1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 39 * $FreeBSD: src/sys/ufs/ufs/ufs_lookup.c,v 1.33.2.7 2001/09/22 19:22:13 iedowse Exp $ 40 */ 41 42 #include "opt_ufs.h" 43 44 #include <sys/param.h> 45 #include <sys/systm.h> 46 #include <sys/kernel.h> 47 #include <sys/namei.h> 48 #include <sys/buf.h> 49 #include <sys/proc.h> 50 #include <sys/stat.h> 51 #include <sys/mount.h> 52 #include <sys/vnode.h> 53 #include <sys/sysctl.h> 54 55 #include <vm/vm.h> 56 #include <vm/vm_extern.h> 57 58 #include <ufs/ufs/quota.h> 59 #include <ufs/ufs/inode.h> 60 #include <ufs/ufs/dir.h> 61 #ifdef UFS_DIRHASH 62 #include <ufs/ufs/dirhash.h> 63 #endif 64 #include <ufs/ufs/ufsmount.h> 65 #include <ufs/ufs/ufs_extern.h> 66 67 #ifdef DIAGNOSTIC 68 int dirchk = 1; 69 #else 70 int dirchk = 0; 71 #endif 72 73 SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); 74 75 /* true if old FS format...*/ 76 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 77 78 /* 79 * Convert a component of a pathname into a pointer to a locked inode. 80 * This is a very central and rather complicated routine. 81 * If the file system is not maintained in a strict tree hierarchy, 82 * this can result in a deadlock situation (see comments in code below). 83 * 84 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 85 * on whether the name is to be looked up, created, renamed, or deleted. 86 * When CREATE, RENAME, or DELETE is specified, information usable in 87 * creating, renaming, or deleting a directory entry may be calculated. 88 * If flag has LOCKPARENT or'ed into it and the target of the pathname 89 * exists, lookup returns both the target and its parent directory locked. 90 * When creating or renaming and LOCKPARENT is specified, the target may 91 * not be ".". When deleting and LOCKPARENT is specified, the target may 92 * be "."., but the caller must check to ensure it does an vrele and vput 93 * instead of two vputs. 94 * 95 * This routine is actually used as VOP_CACHEDLOOKUP method, and the 96 * filesystem employs the generic vfs_cache_lookup() as VOP_LOOKUP 97 * method. 98 * 99 * vfs_cache_lookup() performs the following for us: 100 * check that it is a directory 101 * check accessibility of directory 102 * check for modification attempts on read-only mounts 103 * if name found in cache 104 * if at end of path and deleting or creating 105 * drop it 106 * else 107 * return name. 108 * return VOP_CACHEDLOOKUP() 109 * 110 * Overall outline of ufs_lookup: 111 * 112 * search for name in directory, to found or notfound 113 * notfound: 114 * if creating, return locked directory, leaving info on available slots 115 * else return error 116 * found: 117 * if at end of path and deleting, return information to allow delete 118 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 119 * inode and return info to allow rewrite 120 * if not at end, add name to cache; if at end and neither creating 121 * nor deleting, add name to cache 122 */ 123 int 124 ufs_lookup(ap) 125 struct vop_cachedlookup_args /* { 126 struct vnode *a_dvp; 127 struct vnode **a_vpp; 128 struct componentname *a_cnp; 129 } */ *ap; 130 { 131 register struct vnode *vdp; /* vnode for directory being searched */ 132 register struct inode *dp; /* inode for directory being searched */ 133 struct buf *bp; /* a buffer of directory entries */ 134 struct direct *ep; /* the current directory entry */ 135 int entryoffsetinblock; /* offset of ep in bp's buffer */ 136 enum {NONE, COMPACT, FOUND} slotstatus; 137 doff_t slotoffset; /* offset of area with free space */ 138 int slotsize; /* size of area at slotoffset */ 139 int slotfreespace; /* amount of space free in slot */ 140 int slotneeded; /* size of the entry we're seeking */ 141 int numdirpasses; /* strategy for directory search */ 142 doff_t endsearch; /* offset to end directory search */ 143 doff_t prevoff; /* prev entry dp->i_offset */ 144 struct vnode *pdp; /* saved dp during symlink work */ 145 struct vnode *tdp; /* returned by VFS_VGET */ 146 doff_t enduseful; /* pointer past last used dir slot */ 147 u_long bmask; /* block offset mask */ 148 int lockparent; /* 1 => lockparent flag is set */ 149 int wantparent; /* 1 => wantparent or lockparent flag */ 150 int namlen, error; 151 struct vnode **vpp = ap->a_vpp; 152 struct componentname *cnp = ap->a_cnp; 153 struct ucred *cred = cnp->cn_cred; 154 int flags = cnp->cn_flags; 155 int nameiop = cnp->cn_nameiop; 156 struct proc *p = cnp->cn_proc; 157 158 bp = NULL; 159 slotoffset = -1; 160 cnp->cn_flags &= ~PDIRUNLOCK; 161 /* 162 * XXX there was a soft-update diff about this I couldn't merge. 163 * I think this was the equiv. 164 */ 165 *vpp = NULL; 166 167 vdp = ap->a_dvp; 168 dp = VTOI(vdp); 169 lockparent = flags & LOCKPARENT; 170 wantparent = flags & (LOCKPARENT|WANTPARENT); 171 172 /* 173 * We now have a segment name to search for, and a directory to search. 174 * 175 * Suppress search for slots unless creating 176 * file and at end of pathname, in which case 177 * we watch for a place to put the new file in 178 * case it doesn't already exist. 179 */ 180 slotstatus = FOUND; 181 slotfreespace = slotsize = slotneeded = 0; 182 if ((nameiop == CREATE || nameiop == RENAME) && 183 (flags & ISLASTCN)) { 184 slotstatus = NONE; 185 slotneeded = DIRECTSIZ(cnp->cn_namelen); 186 } 187 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 188 189 #ifdef UFS_DIRHASH 190 /* 191 * Use dirhash for fast operations on large directories. The logic 192 * to determine whether to hash the directory is contained within 193 * ufsdirhash_build(); a zero return means that it decided to hash 194 * this directory and it successfully built up the hash table. 195 */ 196 if (ufsdirhash_build(dp) == 0) { 197 /* Look for a free slot if needed. */ 198 enduseful = dp->i_size; 199 if (slotstatus != FOUND) { 200 slotoffset = ufsdirhash_findfree(dp, slotneeded, 201 &slotsize); 202 if (slotoffset >= 0) { 203 slotstatus = COMPACT; 204 enduseful = ufsdirhash_enduseful(dp); 205 if (enduseful < 0) 206 enduseful = dp->i_size; 207 } 208 } 209 /* Look up the component. */ 210 numdirpasses = 1; 211 entryoffsetinblock = 0; /* silence compiler warning */ 212 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 213 &dp->i_offset, &bp, nameiop == DELETE ? &prevoff : NULL)) { 214 case 0: 215 ep = (struct direct *)((char *)bp->b_data + 216 (dp->i_offset & bmask)); 217 goto foundentry; 218 case ENOENT: 219 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 220 goto notfound; 221 default: 222 /* Something failed; just do a linear search. */ 223 break; 224 } 225 } 226 #endif /* UFS_DIRHASH */ 227 /* 228 * If there is cached information on a previous search of 229 * this directory, pick up where we last left off. 230 * We cache only lookups as these are the most common 231 * and have the greatest payoff. Caching CREATE has little 232 * benefit as it usually must search the entire directory 233 * to determine that the entry does not exist. Caching the 234 * location of the last DELETE or RENAME has not reduced 235 * profiling time and hence has been removed in the interest 236 * of simplicity. 237 */ 238 if (nameiop != LOOKUP || dp->i_diroff == 0 || 239 dp->i_diroff >= dp->i_size) { 240 entryoffsetinblock = 0; 241 dp->i_offset = 0; 242 numdirpasses = 1; 243 } else { 244 dp->i_offset = dp->i_diroff; 245 if ((entryoffsetinblock = dp->i_offset & bmask) && 246 (error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) 247 return (error); 248 numdirpasses = 2; 249 nchstats.ncs_2passes++; 250 } 251 prevoff = dp->i_offset; 252 endsearch = roundup2(dp->i_size, DIRBLKSIZ); 253 enduseful = 0; 254 255 searchloop: 256 while (dp->i_offset < endsearch) { 257 /* 258 * If necessary, get the next directory block. 259 */ 260 if ((dp->i_offset & bmask) == 0) { 261 if (bp != NULL) 262 brelse(bp); 263 error = 264 UFS_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp); 265 if (error) 266 return (error); 267 entryoffsetinblock = 0; 268 } 269 /* 270 * If still looking for a slot, and at a DIRBLKSIZE 271 * boundary, have to start looking for free space again. 272 */ 273 if (slotstatus == NONE && 274 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 275 slotoffset = -1; 276 slotfreespace = 0; 277 } 278 /* 279 * Get pointer to next entry. 280 * Full validation checks are slow, so we only check 281 * enough to insure forward progress through the 282 * directory. Complete checks can be run by patching 283 * "dirchk" to be true. 284 */ 285 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 286 if (ep->d_reclen == 0 || ep->d_reclen > 287 DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 288 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 289 int i; 290 291 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 292 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 293 dp->i_offset += i; 294 entryoffsetinblock += i; 295 continue; 296 } 297 298 /* 299 * If an appropriate sized slot has not yet been found, 300 * check to see if one is available. Also accumulate space 301 * in the current block so that we can determine if 302 * compaction is viable. 303 */ 304 if (slotstatus != FOUND) { 305 int size = ep->d_reclen; 306 307 if (ep->d_ino != 0) 308 size -= DIRSIZ(OFSFMT(vdp), ep); 309 if (size > 0) { 310 if (size >= slotneeded) { 311 slotstatus = FOUND; 312 slotoffset = dp->i_offset; 313 slotsize = ep->d_reclen; 314 } else if (slotstatus == NONE) { 315 slotfreespace += size; 316 if (slotoffset == -1) 317 slotoffset = dp->i_offset; 318 if (slotfreespace >= slotneeded) { 319 slotstatus = COMPACT; 320 slotsize = dp->i_offset + 321 ep->d_reclen - slotoffset; 322 } 323 } 324 } 325 } 326 327 /* 328 * Check for a name match. 329 */ 330 if (ep->d_ino) { 331 # if (BYTE_ORDER == LITTLE_ENDIAN) 332 if (OFSFMT(vdp)) 333 namlen = ep->d_type; 334 else 335 namlen = ep->d_namlen; 336 # else 337 namlen = ep->d_namlen; 338 # endif 339 if (namlen == cnp->cn_namelen && 340 (cnp->cn_nameptr[0] == ep->d_name[0]) && 341 !bcmp(cnp->cn_nameptr, ep->d_name, 342 (unsigned)namlen)) { 343 #ifdef UFS_DIRHASH 344 foundentry: 345 #endif 346 /* 347 * Save directory entry's inode number and 348 * reclen in ndp->ni_ufs area, and release 349 * directory buffer. 350 */ 351 if (vdp->v_mount->mnt_maxsymlinklen > 0 && 352 ep->d_type == DT_WHT) { 353 slotstatus = FOUND; 354 slotoffset = dp->i_offset; 355 slotsize = ep->d_reclen; 356 dp->i_reclen = slotsize; 357 enduseful = dp->i_size; 358 ap->a_cnp->cn_flags |= ISWHITEOUT; 359 numdirpasses--; 360 goto notfound; 361 } 362 dp->i_ino = ep->d_ino; 363 dp->i_reclen = ep->d_reclen; 364 goto found; 365 } 366 } 367 prevoff = dp->i_offset; 368 dp->i_offset += ep->d_reclen; 369 entryoffsetinblock += ep->d_reclen; 370 if (ep->d_ino) 371 enduseful = dp->i_offset; 372 } 373 notfound: 374 /* 375 * If we started in the middle of the directory and failed 376 * to find our target, we must check the beginning as well. 377 */ 378 if (numdirpasses == 2) { 379 numdirpasses--; 380 dp->i_offset = 0; 381 endsearch = dp->i_diroff; 382 goto searchloop; 383 } 384 if (bp != NULL) 385 brelse(bp); 386 /* 387 * If creating, and at end of pathname and current 388 * directory has not been removed, then can consider 389 * allowing file to be created. 390 */ 391 if ((nameiop == CREATE || nameiop == RENAME || 392 (nameiop == DELETE && 393 (ap->a_cnp->cn_flags & DOWHITEOUT) && 394 (ap->a_cnp->cn_flags & ISWHITEOUT))) && 395 (flags & ISLASTCN) && dp->i_effnlink != 0) { 396 /* 397 * Access for write is interpreted as allowing 398 * creation of files in the directory. 399 */ 400 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 401 if (error) 402 return (error); 403 /* 404 * Return an indication of where the new directory 405 * entry should be put. If we didn't find a slot, 406 * then set dp->i_count to 0 indicating 407 * that the new slot belongs at the end of the 408 * directory. If we found a slot, then the new entry 409 * can be put in the range from dp->i_offset to 410 * dp->i_offset + dp->i_count. 411 */ 412 if (slotstatus == NONE) { 413 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 414 dp->i_count = 0; 415 enduseful = dp->i_offset; 416 } else if (nameiop == DELETE) { 417 dp->i_offset = slotoffset; 418 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 419 dp->i_count = 0; 420 else 421 dp->i_count = dp->i_offset - prevoff; 422 } else { 423 dp->i_offset = slotoffset; 424 dp->i_count = slotsize; 425 if (enduseful < slotoffset + slotsize) 426 enduseful = slotoffset + slotsize; 427 } 428 dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); 429 dp->i_flag |= IN_CHANGE | IN_UPDATE; 430 /* 431 * We return with the directory locked, so that 432 * the parameters we set up above will still be 433 * valid if we actually decide to do a direnter(). 434 * We return ni_vp == NULL to indicate that the entry 435 * does not currently exist; we leave a pointer to 436 * the (locked) directory inode in ndp->ni_dvp. 437 * The pathname buffer is saved so that the name 438 * can be obtained later. 439 * 440 * NB - if the directory is unlocked, then this 441 * information cannot be used. 442 */ 443 cnp->cn_flags |= SAVENAME; 444 if (!lockparent) { 445 VOP_UNLOCK(vdp, 0, p); 446 cnp->cn_flags |= PDIRUNLOCK; 447 } 448 return (EJUSTRETURN); 449 } 450 /* 451 * Insert name into cache (as non-existent) if appropriate. 452 */ 453 if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 454 cache_enter(vdp, *vpp, cnp); 455 return (ENOENT); 456 457 found: 458 if (numdirpasses == 2) 459 nchstats.ncs_pass2++; 460 /* 461 * Check that directory length properly reflects presence 462 * of this entry. 463 */ 464 if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { 465 ufs_dirbad(dp, dp->i_offset, "i_size too small"); 466 dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep); 467 dp->i_flag |= IN_CHANGE | IN_UPDATE; 468 } 469 brelse(bp); 470 471 /* 472 * Found component in pathname. 473 * If the final component of path name, save information 474 * in the cache as to where the entry was found. 475 */ 476 if ((flags & ISLASTCN) && nameiop == LOOKUP) 477 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 478 479 /* 480 * If deleting, and at end of pathname, return 481 * parameters which can be used to remove file. 482 * If the wantparent flag isn't set, we return only 483 * the directory (in ndp->ni_dvp), otherwise we go 484 * on and lock the inode, being careful with ".". 485 */ 486 if (nameiop == DELETE && (flags & ISLASTCN)) { 487 /* 488 * Write access to directory required to delete files. 489 */ 490 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 491 if (error) 492 return (error); 493 /* 494 * Return pointer to current entry in dp->i_offset, 495 * and distance past previous entry (if there 496 * is a previous entry in this block) in dp->i_count. 497 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 498 */ 499 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 500 dp->i_count = 0; 501 else 502 dp->i_count = dp->i_offset - prevoff; 503 if (dp->i_number == dp->i_ino) { 504 VREF(vdp); 505 *vpp = vdp; 506 return (0); 507 } 508 if (flags & ISDOTDOT) 509 VOP_UNLOCK(vdp, 0, p); /* race to get the inode */ 510 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 511 if (flags & ISDOTDOT) { 512 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY, p) != 0) 513 cnp->cn_flags |= PDIRUNLOCK; 514 } 515 if (error) 516 return (error); 517 /* 518 * If directory is "sticky", then user must own 519 * the directory, or the file in it, else she 520 * may not delete it (unless she's root). This 521 * implements append-only directories. 522 */ 523 if ((dp->i_mode & ISVTX) && 524 cred->cr_uid != 0 && 525 cred->cr_uid != dp->i_uid && 526 VTOI(tdp)->i_uid != cred->cr_uid) { 527 vput(tdp); 528 return (EPERM); 529 } 530 *vpp = tdp; 531 if (!lockparent) { 532 VOP_UNLOCK(vdp, 0, p); 533 cnp->cn_flags |= PDIRUNLOCK; 534 } 535 return (0); 536 } 537 538 /* 539 * If rewriting (RENAME), return the inode and the 540 * information required to rewrite the present directory 541 * Must get inode of directory entry to verify it's a 542 * regular file, or empty directory. 543 */ 544 if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { 545 if ((error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc)) != 0) 546 return (error); 547 /* 548 * Careful about locking second inode. 549 * This can only occur if the target is ".". 550 */ 551 if (dp->i_number == dp->i_ino) 552 return (EISDIR); 553 if (flags & ISDOTDOT) 554 VOP_UNLOCK(vdp, 0, p); /* race to get the inode */ 555 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 556 if (flags & ISDOTDOT) { 557 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY, p) != 0) 558 cnp->cn_flags |= PDIRUNLOCK; 559 } 560 if (error) 561 return (error); 562 *vpp = tdp; 563 cnp->cn_flags |= SAVENAME; 564 if (!lockparent) { 565 VOP_UNLOCK(vdp, 0, p); 566 cnp->cn_flags |= PDIRUNLOCK; 567 } 568 return (0); 569 } 570 571 /* 572 * Step through the translation in the name. We do not `vput' the 573 * directory because we may need it again if a symbolic link 574 * is relative to the current directory. Instead we save it 575 * unlocked as "pdp". We must get the target inode before unlocking 576 * the directory to insure that the inode will not be removed 577 * before we get it. We prevent deadlock by always fetching 578 * inodes from the root, moving down the directory tree. Thus 579 * when following backward pointers ".." we must unlock the 580 * parent directory before getting the requested directory. 581 * There is a potential race condition here if both the current 582 * and parent directories are removed before the VFS_VGET for the 583 * inode associated with ".." returns. We hope that this occurs 584 * infrequently since we cannot avoid this race condition without 585 * implementing a sophisticated deadlock detection algorithm. 586 * Note also that this simple deadlock detection scheme will not 587 * work if the file system has any hard links other than ".." 588 * that point backwards in the directory structure. 589 */ 590 pdp = vdp; 591 if (flags & ISDOTDOT) { 592 VOP_UNLOCK(pdp, 0, p); /* race to get the inode */ 593 cnp->cn_flags |= PDIRUNLOCK; 594 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) { 595 if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY, p) == 0) 596 cnp->cn_flags &= ~PDIRUNLOCK; 597 return (error); 598 } 599 if (lockparent && (flags & ISLASTCN)) { 600 if ((error = vn_lock(pdp, LK_EXCLUSIVE, p)) != 0) { 601 vput(tdp); 602 return (error); 603 } 604 cnp->cn_flags &= ~PDIRUNLOCK; 605 } 606 *vpp = tdp; 607 } else if (dp->i_number == dp->i_ino) { 608 VREF(vdp); /* we want ourself, ie "." */ 609 *vpp = vdp; 610 } else { 611 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 612 if (error) 613 return (error); 614 if (!lockparent || !(flags & ISLASTCN)) { 615 VOP_UNLOCK(pdp, 0, p); 616 cnp->cn_flags |= PDIRUNLOCK; 617 } 618 *vpp = tdp; 619 } 620 621 /* 622 * Insert name into cache if appropriate. 623 */ 624 if (cnp->cn_flags & MAKEENTRY) 625 cache_enter(vdp, *vpp, cnp); 626 return (0); 627 } 628 629 void 630 ufs_dirbad(ip, offset, how) 631 struct inode *ip; 632 doff_t offset; 633 char *how; 634 { 635 struct mount *mp; 636 637 mp = ITOV(ip)->v_mount; 638 (void)printf("%s: bad dir ino %lu at offset %ld: %s\n", 639 mp->mnt_stat.f_mntonname, (u_long)ip->i_number, (long)offset, how); 640 if ((mp->mnt_flag & MNT_RDONLY) == 0) 641 panic("ufs_dirbad: bad dir"); 642 } 643 644 /* 645 * Do consistency checking on a directory entry: 646 * record length must be multiple of 4 647 * entry must fit in rest of its DIRBLKSIZ block 648 * record must be large enough to contain entry 649 * name is not longer than MAXNAMLEN 650 * name must be as long as advertised, and null terminated 651 */ 652 int 653 ufs_dirbadentry(dp, ep, entryoffsetinblock) 654 struct vnode *dp; 655 register struct direct *ep; 656 int entryoffsetinblock; 657 { 658 register int i; 659 int namlen; 660 661 # if (BYTE_ORDER == LITTLE_ENDIAN) 662 if (OFSFMT(dp)) 663 namlen = ep->d_type; 664 else 665 namlen = ep->d_namlen; 666 # else 667 namlen = ep->d_namlen; 668 # endif 669 if ((ep->d_reclen & 0x3) != 0 || 670 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 671 ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { 672 /*return (1); */ 673 printf("First bad\n"); 674 goto bad; 675 } 676 if (ep->d_ino == 0) 677 return (0); 678 for (i = 0; i < namlen; i++) 679 if (ep->d_name[i] == '\0') { 680 /*return (1); */ 681 printf("Second bad\n"); 682 goto bad; 683 } 684 if (ep->d_name[i]) 685 goto bad; 686 return (0); 687 bad: 688 return (1); 689 } 690 691 /* 692 * Construct a new directory entry after a call to namei, using the 693 * parameters that it left in the componentname argument cnp. The 694 * argument ip is the inode to which the new directory entry will refer. 695 */ 696 void 697 ufs_makedirentry(ip, cnp, newdirp) 698 struct inode *ip; 699 struct componentname *cnp; 700 struct direct *newdirp; 701 { 702 703 #ifdef DIAGNOSTIC 704 if ((cnp->cn_flags & SAVENAME) == 0) 705 panic("ufs_makedirentry: missing name"); 706 #endif 707 newdirp->d_ino = ip->i_number; 708 newdirp->d_namlen = cnp->cn_namelen; 709 bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); 710 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 711 newdirp->d_type = IFTODT(ip->i_mode); 712 else { 713 newdirp->d_type = 0; 714 # if (BYTE_ORDER == LITTLE_ENDIAN) 715 { u_char tmp = newdirp->d_namlen; 716 newdirp->d_namlen = newdirp->d_type; 717 newdirp->d_type = tmp; } 718 # endif 719 } 720 } 721 722 /* 723 * Write a directory entry after a call to namei, using the parameters 724 * that it left in nameidata. The argument dirp is the new directory 725 * entry contents. Dvp is a pointer to the directory to be written, 726 * which was left locked by namei. Remaining parameters (dp->i_offset, 727 * dp->i_count) indicate how the space for the new entry is to be obtained. 728 * Non-null bp indicates that a directory is being created (for the 729 * soft dependency code). 730 */ 731 int 732 ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) 733 struct vnode *dvp; 734 struct vnode *tvp; 735 struct direct *dirp; 736 struct componentname *cnp; 737 struct buf *newdirbp; 738 { 739 struct ucred *cr; 740 struct proc *p; 741 int newentrysize; 742 struct inode *dp; 743 struct buf *bp; 744 u_int dsize; 745 struct direct *ep, *nep; 746 int error, ret, blkoff, loc, spacefree, flags; 747 char *dirbuf; 748 749 p = curproc; /* XXX */ 750 cr = p->p_ucred; 751 752 dp = VTOI(dvp); 753 newentrysize = DIRSIZ(OFSFMT(dvp), dirp); 754 755 if (dp->i_count == 0) { 756 /* 757 * If dp->i_count is 0, then namei could find no 758 * space in the directory. Here, dp->i_offset will 759 * be on a directory block boundary and we will write the 760 * new entry into a fresh block. 761 */ 762 if (dp->i_offset & (DIRBLKSIZ - 1)) 763 panic("ufs_direnter: newblk"); 764 flags = B_CLRBUF; 765 if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) 766 flags |= B_SYNC; 767 if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, 768 cr, flags, &bp)) != 0) { 769 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 770 bdwrite(newdirbp); 771 return (error); 772 } 773 dp->i_size = dp->i_offset + DIRBLKSIZ; 774 dp->i_flag |= IN_CHANGE | IN_UPDATE; 775 vnode_pager_setsize(dvp, (u_long)dp->i_size); 776 dirp->d_reclen = DIRBLKSIZ; 777 blkoff = dp->i_offset & 778 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); 779 bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); 780 #ifdef UFS_DIRHASH 781 if (dp->i_dirhash != NULL) { 782 ufsdirhash_newblk(dp, dp->i_offset); 783 ufsdirhash_add(dp, dirp, dp->i_offset); 784 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 785 dp->i_offset); 786 } 787 #endif 788 if (DOINGSOFTDEP(dvp)) { 789 /* 790 * Ensure that the entire newly allocated block is a 791 * valid directory so that future growth within the 792 * block does not have to ensure that the block is 793 * written before the inode. 794 */ 795 blkoff += DIRBLKSIZ; 796 while (blkoff < bp->b_bcount) { 797 ((struct direct *) 798 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 799 blkoff += DIRBLKSIZ; 800 } 801 softdep_setup_directory_add(bp, dp, dp->i_offset, 802 dirp->d_ino, newdirbp); 803 bdwrite(bp); 804 return (UFS_UPDATE(dvp, 0)); 805 } 806 if (DOINGASYNC(dvp)) { 807 bdwrite(bp); 808 return (UFS_UPDATE(dvp, 0)); 809 } 810 error = VOP_BWRITE(bp->b_vp, bp); 811 ret = UFS_UPDATE(dvp, 1); 812 if (error == 0) 813 return (ret); 814 return (error); 815 } 816 817 /* 818 * If dp->i_count is non-zero, then namei found space for the new 819 * entry in the range dp->i_offset to dp->i_offset + dp->i_count 820 * in the directory. To use this space, we may have to compact 821 * the entries located there, by copying them together towards the 822 * beginning of the block, leaving the free space in one usable 823 * chunk at the end. 824 */ 825 826 /* 827 * Increase size of directory if entry eats into new space. 828 * This should never push the size past a new multiple of 829 * DIRBLKSIZE. 830 * 831 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 832 */ 833 if (dp->i_offset + dp->i_count > dp->i_size) 834 dp->i_size = dp->i_offset + dp->i_count; 835 /* 836 * Get the block containing the space for the new directory entry. 837 */ 838 error = UFS_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); 839 if (error) { 840 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 841 bdwrite(newdirbp); 842 return (error); 843 } 844 /* 845 * Find space for the new entry. In the simple case, the entry at 846 * offset base will have the space. If it does not, then namei 847 * arranged that compacting the region dp->i_offset to 848 * dp->i_offset + dp->i_count would yield the space. 849 */ 850 ep = (struct direct *)dirbuf; 851 dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; 852 spacefree = ep->d_reclen - dsize; 853 for (loc = ep->d_reclen; loc < dp->i_count; ) { 854 nep = (struct direct *)(dirbuf + loc); 855 856 /* Trim the existing slot (NB: dsize may be zero). */ 857 ep->d_reclen = dsize; 858 ep = (struct direct *)((char *)ep + dsize); 859 860 /* Read nep->d_reclen now as the bcopy() may clobber it. */ 861 loc += nep->d_reclen; 862 if (nep->d_ino == 0) { 863 /* 864 * A mid-block unused entry. Such entries are 865 * never created by the kernel, but fsck_ffs 866 * can create them (and it doesn't fix them). 867 * 868 * Add up the free space, and initialise the 869 * relocated entry since we don't bcopy it. 870 */ 871 spacefree += nep->d_reclen; 872 ep->d_ino = 0; 873 dsize = 0; 874 continue; 875 } 876 dsize = DIRSIZ(OFSFMT(dvp), nep); 877 spacefree += nep->d_reclen - dsize; 878 #ifdef UFS_DIRHASH 879 if (dp->i_dirhash != NULL) 880 ufsdirhash_move(dp, nep, 881 dp->i_offset + ((char *)nep - dirbuf), 882 dp->i_offset + ((char *)ep - dirbuf)); 883 #endif 884 if (DOINGSOFTDEP(dvp)) 885 softdep_change_directoryentry_offset(dp, dirbuf, 886 (caddr_t)nep, (caddr_t)ep, dsize); 887 else 888 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 889 } 890 /* 891 * Here, `ep' points to a directory entry containing `dsize' in-use 892 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 893 * then the entry is completely unused (dsize == 0). The value 894 * of ep->d_reclen is always indeterminate. 895 * 896 * Update the pointer fields in the previous entry (if any), 897 * copy in the new entry, and write out the block. 898 */ 899 if (ep->d_ino == 0 || 900 (ep->d_ino == WINO && 901 bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 902 if (spacefree + dsize < newentrysize) 903 panic("ufs_direnter: compact1"); 904 dirp->d_reclen = spacefree + dsize; 905 } else { 906 if (spacefree < newentrysize) 907 panic("ufs_direnter: compact2"); 908 dirp->d_reclen = spacefree; 909 ep->d_reclen = dsize; 910 ep = (struct direct *)((char *)ep + dsize); 911 } 912 #ifdef UFS_DIRHASH 913 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 914 dirp->d_reclen == spacefree)) 915 ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); 916 #endif 917 bcopy((caddr_t)dirp, (caddr_t)ep, (u_int)newentrysize); 918 #ifdef UFS_DIRHASH 919 if (dp->i_dirhash != NULL) 920 ufsdirhash_checkblock(dp, dirbuf - 921 (dp->i_offset & (DIRBLKSIZ - 1)), 922 dp->i_offset & ~(DIRBLKSIZ - 1)); 923 #endif 924 925 if (DOINGSOFTDEP(dvp)) { 926 softdep_setup_directory_add(bp, dp, 927 dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); 928 bdwrite(bp); 929 } else { 930 if (DOINGASYNC(dvp)) { 931 bdwrite(bp); 932 error = 0; 933 } else { 934 error = bowrite(bp); 935 } 936 } 937 dp->i_flag |= IN_CHANGE | IN_UPDATE; 938 /* 939 * If all went well, and the directory can be shortened, proceed 940 * with the truncation. Note that we have to unlock the inode for 941 * the entry that we just entered, as the truncation may need to 942 * lock other inodes which can lead to deadlock if we also hold a 943 * lock on the newly entered node. 944 */ 945 if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { 946 if (tvp != NULL) 947 VOP_UNLOCK(tvp, 0, p); 948 #ifdef UFS_DIRHASH 949 if (dp->i_dirhash != NULL) 950 ufsdirhash_dirtrunc(dp, dp->i_endoff); 951 #endif 952 (void) UFS_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); 953 if (tvp != NULL) 954 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY, p); 955 } 956 return (error); 957 } 958 959 /* 960 * Remove a directory entry after a call to namei, using 961 * the parameters which it left in nameidata. The entry 962 * dp->i_offset contains the offset into the directory of the 963 * entry to be eliminated. The dp->i_count field contains the 964 * size of the previous record in the directory. If this 965 * is 0, the first entry is being deleted, so we need only 966 * zero the inode number to mark the entry as free. If the 967 * entry is not the first in the directory, we must reclaim 968 * the space of the now empty record by adding the record size 969 * to the size of the previous entry. 970 */ 971 int 972 ufs_dirremove(dvp, ip, flags, isrmdir) 973 struct vnode *dvp; 974 struct inode *ip; 975 int flags; 976 int isrmdir; 977 { 978 struct inode *dp; 979 struct direct *ep; 980 struct buf *bp; 981 int error; 982 983 dp = VTOI(dvp); 984 985 if (flags & DOWHITEOUT) { 986 /* 987 * Whiteout entry: set d_ino to WINO. 988 */ 989 if ((error = 990 UFS_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) 991 return (error); 992 ep->d_ino = WINO; 993 ep->d_type = DT_WHT; 994 goto out; 995 } 996 997 if ((error = UFS_BLKATOFF(dvp, 998 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) 999 return (error); 1000 #ifdef UFS_DIRHASH 1001 /* 1002 * Remove the dirhash entry. This is complicated by the fact 1003 * that `ep' is the previous entry when dp->i_count != 0. 1004 */ 1005 if (dp->i_dirhash != NULL) 1006 ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : 1007 (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); 1008 #endif 1009 if (dp->i_count == 0) { 1010 /* 1011 * First entry in block: set d_ino to zero. 1012 */ 1013 ep->d_ino = 0; 1014 } else { 1015 /* 1016 * Collapse new free space into previous entry. 1017 */ 1018 ep->d_reclen += dp->i_reclen; 1019 } 1020 #ifdef UFS_DIRHASH 1021 if (dp->i_dirhash != NULL) 1022 ufsdirhash_checkblock(dp, (char *)ep - 1023 ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), 1024 dp->i_offset & ~(DIRBLKSIZ - 1)); 1025 #endif 1026 out: 1027 if (DOINGSOFTDEP(dvp)) { 1028 if (ip) { 1029 ip->i_effnlink--; 1030 softdep_change_linkcnt(ip); 1031 softdep_setup_remove(bp, dp, ip, isrmdir); 1032 } 1033 if (softdep_slowdown(dvp)) { 1034 error = VOP_BWRITE(bp->b_vp, bp); 1035 } else { 1036 bdwrite(bp); 1037 error = 0; 1038 } 1039 } else { 1040 if (ip) { 1041 ip->i_effnlink--; 1042 ip->i_nlink--; 1043 ip->i_flag |= IN_CHANGE; 1044 } 1045 if (flags & DOWHITEOUT) 1046 error = VOP_BWRITE(bp->b_vp, bp); 1047 else if (DOINGASYNC(dvp) && dp->i_count != 0) { 1048 bdwrite(bp); 1049 error = 0; 1050 } else 1051 error = bowrite(bp); 1052 } 1053 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1054 return (error); 1055 } 1056 1057 /* 1058 * Rewrite an existing directory entry to point at the inode 1059 * supplied. The parameters describing the directory entry are 1060 * set up by a call to namei. 1061 */ 1062 int 1063 ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) 1064 struct inode *dp, *oip; 1065 ino_t newinum; 1066 int newtype; 1067 int isrmdir; 1068 { 1069 struct buf *bp; 1070 struct direct *ep; 1071 struct vnode *vdp = ITOV(dp); 1072 int error; 1073 1074 error = UFS_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); 1075 if (error) 1076 return (error); 1077 ep->d_ino = newinum; 1078 if (!OFSFMT(vdp)) 1079 ep->d_type = newtype; 1080 oip->i_effnlink--; 1081 if (DOINGSOFTDEP(vdp)) { 1082 softdep_change_linkcnt(oip); 1083 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); 1084 bdwrite(bp); 1085 } else { 1086 oip->i_nlink--; 1087 oip->i_flag |= IN_CHANGE; 1088 if (DOINGASYNC(vdp)) { 1089 bdwrite(bp); 1090 error = 0; 1091 } else { 1092 error = bowrite(bp); 1093 } 1094 } 1095 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1096 return (error); 1097 } 1098 1099 /* 1100 * Check if a directory is empty or not. 1101 * Inode supplied must be locked. 1102 * 1103 * Using a struct dirtemplate here is not precisely 1104 * what we want, but better than using a struct direct. 1105 * 1106 * NB: does not handle corrupted directories. 1107 */ 1108 int 1109 ufs_dirempty(ip, parentino, cred) 1110 register struct inode *ip; 1111 ino_t parentino; 1112 struct ucred *cred; 1113 { 1114 register off_t off; 1115 struct dirtemplate dbuf; 1116 register struct direct *dp = (struct direct *)&dbuf; 1117 int error, count, namlen; 1118 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1119 1120 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1121 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 1122 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); 1123 /* 1124 * Since we read MINDIRSIZ, residual must 1125 * be 0 unless we're at end of file. 1126 */ 1127 if (error || count != 0) 1128 return (0); 1129 /* avoid infinite loops */ 1130 if (dp->d_reclen == 0) 1131 return (0); 1132 /* skip empty entries */ 1133 if (dp->d_ino == 0 || dp->d_ino == WINO) 1134 continue; 1135 /* accept only "." and ".." */ 1136 # if (BYTE_ORDER == LITTLE_ENDIAN) 1137 if (OFSFMT(ITOV(ip))) 1138 namlen = dp->d_type; 1139 else 1140 namlen = dp->d_namlen; 1141 # else 1142 namlen = dp->d_namlen; 1143 # endif 1144 if (namlen > 2) 1145 return (0); 1146 if (dp->d_name[0] != '.') 1147 return (0); 1148 /* 1149 * At this point namlen must be 1 or 2. 1150 * 1 implies ".", 2 implies ".." if second 1151 * char is also "." 1152 */ 1153 if (namlen == 1 && dp->d_ino == ip->i_number) 1154 continue; 1155 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1156 continue; 1157 return (0); 1158 } 1159 return (1); 1160 } 1161 1162 /* 1163 * Check if source directory is in the path of the target directory. 1164 * Target is supplied locked, source is unlocked. 1165 * The target is always vput before returning. 1166 */ 1167 int 1168 ufs_checkpath(source, target, cred) 1169 struct inode *source, *target; 1170 struct ucred *cred; 1171 { 1172 struct vnode *vp; 1173 int error, rootino, namlen; 1174 struct dirtemplate dirbuf; 1175 1176 vp = ITOV(target); 1177 if (target->i_number == source->i_number) { 1178 error = EEXIST; 1179 goto out; 1180 } 1181 rootino = ROOTINO; 1182 error = 0; 1183 if (target->i_number == rootino) 1184 goto out; 1185 1186 for (;;) { 1187 if (vp->v_type != VDIR) { 1188 error = ENOTDIR; 1189 break; 1190 } 1191 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1192 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1193 IO_NODELOCKED, cred, (int *)0, (struct proc *)0); 1194 if (error != 0) 1195 break; 1196 # if (BYTE_ORDER == LITTLE_ENDIAN) 1197 if (OFSFMT(vp)) 1198 namlen = dirbuf.dotdot_type; 1199 else 1200 namlen = dirbuf.dotdot_namlen; 1201 # else 1202 namlen = dirbuf.dotdot_namlen; 1203 # endif 1204 if (namlen != 2 || 1205 dirbuf.dotdot_name[0] != '.' || 1206 dirbuf.dotdot_name[1] != '.') { 1207 error = ENOTDIR; 1208 break; 1209 } 1210 if (dirbuf.dotdot_ino == source->i_number) { 1211 error = EINVAL; 1212 break; 1213 } 1214 if (dirbuf.dotdot_ino == rootino) 1215 break; 1216 vput(vp); 1217 error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp); 1218 if (error) { 1219 vp = NULL; 1220 break; 1221 } 1222 } 1223 1224 out: 1225 if (error == ENOTDIR) 1226 printf("checkpath: .. not a directory\n"); 1227 if (vp != NULL) 1228 vput(vp); 1229 return (error); 1230 } 1231