1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. Neither the name of the University nor the names of its contributors 19 * may be used to endorse or promote products derived from this software 20 * without specific prior written permission. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 35 * $FreeBSD: src/sys/ufs/ufs/ufs_lookup.c,v 1.33.2.7 2001/09/22 19:22:13 iedowse Exp $ 36 * $DragonFly: src/sys/vfs/ufs/ufs_lookup.c,v 1.29 2008/10/15 12:12:51 aggelos Exp $ 37 */ 38 39 #include "opt_ufs.h" 40 41 #include <sys/param.h> 42 #include <sys/systm.h> 43 #include <sys/kernel.h> 44 #include <sys/buf.h> 45 #include <sys/proc.h> 46 #include <sys/namei.h> 47 #include <sys/stat.h> 48 #include <sys/mount.h> 49 #include <sys/vnode.h> 50 #include <sys/sysctl.h> 51 52 #include <vm/vm.h> 53 #include <vm/vm_extern.h> 54 55 #include "quota.h" 56 #include "inode.h" 57 #include "dir.h" 58 #ifdef UFS_DIRHASH 59 #include "dirhash.h" 60 #endif 61 #include "ufsmount.h" 62 #include "ufs_extern.h" 63 #include "ffs_extern.h" 64 65 #ifdef DIAGNOSTIC 66 int dirchk = 1; 67 #else 68 int dirchk = 0; 69 #endif 70 71 SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, 72 "Enable full validation checks of directory"); 73 74 /* true if old FS format...*/ 75 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 76 77 /* 78 * Convert a component of a pathname into a pointer to a locked inode. 79 * This is a very central and rather complicated routine. 80 * If the filesystem is not maintained in a strict tree hierarchy, 81 * this can result in a deadlock situation (see comments in code below). 82 * 83 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 84 * on whether the name is to be looked up, created, renamed, or deleted. 85 * When CREATE, RENAME, or DELETE is specified, information usable in 86 * creating, renaming, or deleting a directory entry may be calculated. 87 * If flag has LOCKPARENT or'ed into it and the target of the pathname 88 * exists, lookup returns both the target and its parent directory locked. 89 * When creating or renaming and LOCKPARENT is specified, the target may 90 * not be ".". When deleting and LOCKPARENT is specified, the target may 91 * be "."., but the caller must check to ensure it does an vrele and vput 92 * instead of two vputs. 93 * 94 * Overall outline of ufs_lookup: 95 * 96 * search for name in directory, to found or notfound 97 * notfound: 98 * if creating, return locked directory, leaving info on available slots 99 * else return error 100 * found: 101 * if at end of path and deleting, return information to allow delete 102 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 103 * inode and return info to allow rewrite 104 * if not at end, add name to cache; if at end and neither creating 105 * nor deleting, add name to cache 106 * 107 * ufs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 108 * struct componentname *a_cnp) 109 */ 110 int 111 ufs_lookup(struct vop_old_lookup_args *ap) 112 { 113 struct vnode *vdp; /* vnode for directory being searched */ 114 struct inode *dp; /* inode for directory being searched */ 115 struct buf *bp; /* a buffer of directory entries */ 116 struct direct *ep; /* the current directory entry */ 117 int entryoffsetinblock; /* offset of ep in bp's buffer */ 118 enum {NONE, COMPACT, FOUND} slotstatus; 119 doff_t slotoffset; /* offset of area with free space */ 120 int slotsize; /* size of area at slotoffset */ 121 int slotfreespace; /* amount of space free in slot */ 122 int slotneeded; /* size of the entry we're seeking */ 123 int numdirpasses; /* strategy for directory search */ 124 doff_t endsearch; /* offset to end directory search */ 125 doff_t prevoff; /* prev entry dp->i_offset */ 126 struct vnode *pdp; /* saved dp during symlink work */ 127 struct vnode *tdp; /* returned by VFS_VGET */ 128 doff_t enduseful; /* pointer past last used dir slot */ 129 u_long bmask; /* block offset mask */ 130 int lockparent; /* 1 => lockparent flag is set */ 131 int wantparent; /* 1 => wantparent or lockparent flag */ 132 int namlen, error; 133 struct vnode **vpp = ap->a_vpp; 134 struct componentname *cnp = ap->a_cnp; 135 struct ucred *cred = cnp->cn_cred; 136 int flags = cnp->cn_flags; 137 int nameiop = cnp->cn_nameiop; 138 139 bp = NULL; 140 slotoffset = -1; 141 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 142 /* 143 * XXX there was a soft-update diff about this I couldn't merge. 144 * I think this was the equiv. 145 */ 146 *vpp = NULL; 147 148 vdp = ap->a_dvp; 149 dp = VTOI(vdp); 150 lockparent = flags & CNP_LOCKPARENT; 151 wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT); 152 153 /* 154 * We now have a segment name to search for, and a directory to search. 155 * 156 * Suppress search for slots unless creating 157 * file and at end of pathname, in which case 158 * we watch for a place to put the new file in 159 * case it doesn't already exist. 160 */ 161 slotstatus = FOUND; 162 slotfreespace = slotsize = slotneeded = 0; 163 if (nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) { 164 slotstatus = NONE; 165 slotneeded = DIRECTSIZ(cnp->cn_namelen); 166 } 167 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 168 169 #ifdef UFS_DIRHASH 170 /* 171 * Use dirhash for fast operations on large directories. The logic 172 * to determine whether to hash the directory is contained within 173 * ufsdirhash_build(); a zero return means that it decided to hash 174 * this directory and it successfully built up the hash table. 175 */ 176 if (ufsdirhash_build(dp) == 0) { 177 /* Look for a free slot if needed. */ 178 enduseful = dp->i_size; 179 if (slotstatus != FOUND) { 180 slotoffset = ufsdirhash_findfree(dp, slotneeded, 181 &slotsize); 182 if (slotoffset >= 0) { 183 slotstatus = COMPACT; 184 enduseful = ufsdirhash_enduseful(dp); 185 if (enduseful < 0) 186 enduseful = dp->i_size; 187 } 188 } 189 /* Look up the component. */ 190 numdirpasses = 1; 191 entryoffsetinblock = 0; /* silence compiler warning */ 192 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 193 &dp->i_offset, &bp, nameiop == NAMEI_DELETE ? &prevoff : NULL)) { 194 case 0: 195 ep = (struct direct *)((char *)bp->b_data + 196 (dp->i_offset & bmask)); 197 goto foundentry; 198 case ENOENT: 199 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 200 goto notfound; 201 default: 202 /* Something failed; just do a linear search. */ 203 break; 204 } 205 } 206 #endif /* UFS_DIRHASH */ 207 /* 208 * If there is cached information on a previous search of 209 * this directory, pick up where we last left off. 210 * We cache only lookups as these are the most common 211 * and have the greatest payoff. Caching CREATE has little 212 * benefit as it usually must search the entire directory 213 * to determine that the entry does not exist. Caching the 214 * location of the last DELETE or RENAME has not reduced 215 * profiling time and hence has been removed in the interest 216 * of simplicity. 217 */ 218 if (nameiop != NAMEI_LOOKUP || dp->i_diroff == 0 || 219 dp->i_diroff >= dp->i_size) { 220 entryoffsetinblock = 0; 221 dp->i_offset = 0; 222 numdirpasses = 1; 223 } else { 224 dp->i_offset = dp->i_diroff; 225 if ((entryoffsetinblock = dp->i_offset & bmask) && 226 (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp))) 227 return (error); 228 numdirpasses = 2; 229 } 230 prevoff = dp->i_offset; 231 endsearch = roundup2(dp->i_size, DIRBLKSIZ); 232 enduseful = 0; 233 234 searchloop: 235 while (dp->i_offset < endsearch) { 236 /* 237 * If necessary, get the next directory block. 238 */ 239 if ((dp->i_offset & bmask) == 0) { 240 if (bp != NULL) 241 brelse(bp); 242 error = 243 ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp); 244 if (error) 245 return (error); 246 entryoffsetinblock = 0; 247 } 248 /* 249 * If still looking for a slot, and at a DIRBLKSIZE 250 * boundary, have to start looking for free space again. 251 */ 252 if (slotstatus == NONE && 253 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 254 slotoffset = -1; 255 slotfreespace = 0; 256 } 257 /* 258 * Get pointer to next entry. 259 * Full validation checks are slow, so we only check 260 * enough to insure forward progress through the 261 * directory. Complete checks can be run by patching 262 * "dirchk" to be true. 263 */ 264 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 265 if (ep->d_reclen == 0 || ep->d_reclen > 266 DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 267 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 268 int i; 269 270 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 271 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 272 dp->i_offset += i; 273 entryoffsetinblock += i; 274 continue; 275 } 276 277 /* 278 * If an appropriate sized slot has not yet been found, 279 * check to see if one is available. Also accumulate space 280 * in the current block so that we can determine if 281 * compaction is viable. 282 */ 283 if (slotstatus != FOUND) { 284 int size = ep->d_reclen; 285 286 if (ep->d_ino != 0) 287 size -= DIRSIZ(OFSFMT(vdp), ep); 288 if (size > 0) { 289 if (size >= slotneeded) { 290 slotstatus = FOUND; 291 slotoffset = dp->i_offset; 292 slotsize = ep->d_reclen; 293 } else if (slotstatus == NONE) { 294 slotfreespace += size; 295 if (slotoffset == -1) 296 slotoffset = dp->i_offset; 297 if (slotfreespace >= slotneeded) { 298 slotstatus = COMPACT; 299 slotsize = dp->i_offset + 300 ep->d_reclen - slotoffset; 301 } 302 } 303 } 304 } 305 306 /* 307 * Check for a name match. 308 */ 309 if (ep->d_ino) { 310 # if (BYTE_ORDER == LITTLE_ENDIAN) 311 if (OFSFMT(vdp)) 312 namlen = ep->d_type; 313 else 314 namlen = ep->d_namlen; 315 # else 316 namlen = ep->d_namlen; 317 # endif 318 if (namlen == cnp->cn_namelen && 319 (cnp->cn_nameptr[0] == ep->d_name[0]) && 320 !bcmp(cnp->cn_nameptr, ep->d_name, 321 (unsigned)namlen)) { 322 #ifdef UFS_DIRHASH 323 foundentry: 324 #endif 325 /* 326 * Save directory entry's inode number and 327 * reclen in ndp->ni_ufs area, and release 328 * directory buffer. 329 */ 330 if (vdp->v_mount->mnt_maxsymlinklen > 0 && 331 ep->d_type == DT_WHT) { 332 slotstatus = FOUND; 333 slotoffset = dp->i_offset; 334 slotsize = ep->d_reclen; 335 dp->i_reclen = slotsize; 336 enduseful = dp->i_size; 337 ap->a_cnp->cn_flags |= CNP_ISWHITEOUT; 338 numdirpasses--; 339 goto notfound; 340 } 341 dp->i_ino = ep->d_ino; 342 dp->i_reclen = ep->d_reclen; 343 goto found; 344 } 345 } 346 prevoff = dp->i_offset; 347 dp->i_offset += ep->d_reclen; 348 entryoffsetinblock += ep->d_reclen; 349 if (ep->d_ino) 350 enduseful = dp->i_offset; 351 } 352 notfound: 353 /* 354 * If we started in the middle of the directory and failed 355 * to find our target, we must check the beginning as well. 356 */ 357 if (numdirpasses == 2) { 358 numdirpasses--; 359 dp->i_offset = 0; 360 endsearch = dp->i_diroff; 361 goto searchloop; 362 } 363 if (bp != NULL) 364 brelse(bp); 365 /* 366 * If creating, and at end of pathname and current 367 * directory has not been removed, then can consider 368 * allowing file to be created. 369 */ 370 if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME || 371 (nameiop == NAMEI_DELETE && 372 (ap->a_cnp->cn_flags & CNP_DOWHITEOUT) && 373 (ap->a_cnp->cn_flags & CNP_ISWHITEOUT))) && 374 dp->i_effnlink != 0) { 375 /* 376 * Access for write is interpreted as allowing 377 * creation of files in the directory. 378 */ 379 error = VOP_EACCESS(vdp, VWRITE, cred); 380 if (error) 381 return (error); 382 /* 383 * Return an indication of where the new directory 384 * entry should be put. If we didn't find a slot, 385 * then set dp->i_count to 0 indicating 386 * that the new slot belongs at the end of the 387 * directory. If we found a slot, then the new entry 388 * can be put in the range from dp->i_offset to 389 * dp->i_offset + dp->i_count. 390 */ 391 if (slotstatus == NONE) { 392 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 393 dp->i_count = 0; 394 enduseful = dp->i_offset; 395 } else if (nameiop == NAMEI_DELETE) { 396 dp->i_offset = slotoffset; 397 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 398 dp->i_count = 0; 399 else 400 dp->i_count = dp->i_offset - prevoff; 401 } else { 402 dp->i_offset = slotoffset; 403 dp->i_count = slotsize; 404 if (enduseful < slotoffset + slotsize) 405 enduseful = slotoffset + slotsize; 406 } 407 dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); 408 dp->i_flag |= IN_CHANGE | IN_UPDATE; 409 /* 410 * We return with the directory locked, so that 411 * the parameters we set up above will still be 412 * valid if we actually decide to do a direnter(). 413 * We return ni_vp == NULL to indicate that the entry 414 * does not currently exist; we leave a pointer to 415 * the (locked) directory inode in ndp->ni_dvp. 416 * The pathname buffer is saved so that the name 417 * can be obtained later. 418 * 419 * NB - if the directory is unlocked, then this 420 * information cannot be used. 421 */ 422 if (!lockparent) { 423 vn_unlock(vdp); 424 cnp->cn_flags |= CNP_PDIRUNLOCK; 425 } 426 return (EJUSTRETURN); 427 } 428 return (ENOENT); 429 430 found: 431 /* 432 * Check that directory length properly reflects presence 433 * of this entry. 434 */ 435 if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { 436 ufs_dirbad(dp, dp->i_offset, "i_size too small"); 437 dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep); 438 dp->i_flag |= IN_CHANGE | IN_UPDATE; 439 } 440 brelse(bp); 441 442 /* 443 * Found component in pathname. 444 * If the final component of path name, save information 445 * in the cache as to where the entry was found. 446 */ 447 if (nameiop == NAMEI_LOOKUP) 448 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 449 450 /* 451 * If deleting, and at end of pathname, return 452 * parameters which can be used to remove file. 453 * If the wantparent flag isn't set, we return only 454 * the directory (in ndp->ni_dvp), otherwise we go 455 * on and lock the inode, being careful with ".". 456 */ 457 if (nameiop == NAMEI_DELETE) { 458 /* 459 * Write access to directory required to delete files. 460 */ 461 error = VOP_EACCESS(vdp, VWRITE, cred); 462 if (error) 463 return (error); 464 /* 465 * Return pointer to current entry in dp->i_offset, 466 * and distance past previous entry (if there 467 * is a previous entry in this block) in dp->i_count. 468 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 469 */ 470 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 471 dp->i_count = 0; 472 else 473 dp->i_count = dp->i_offset - prevoff; 474 if (dp->i_number == dp->i_ino) { 475 vref(vdp); 476 *vpp = vdp; 477 return (0); 478 } 479 if (flags & CNP_ISDOTDOT) 480 vn_unlock(vdp); /* race to get the inode */ 481 error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp); 482 if (flags & CNP_ISDOTDOT) { 483 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY | 484 LK_FAILRECLAIM) != 0) { 485 cnp->cn_flags |= CNP_PDIRUNLOCK; 486 } 487 } 488 if (error) 489 return (error); 490 /* 491 * If directory is "sticky", then user must own 492 * the directory, or the file in it, else she 493 * may not delete it (unless she's root). This 494 * implements append-only directories. 495 */ 496 if ((dp->i_mode & ISVTX) && 497 cred->cr_uid != 0 && 498 cred->cr_uid != dp->i_uid && 499 VTOI(tdp)->i_uid != cred->cr_uid) { 500 vput(tdp); 501 return (EPERM); 502 } 503 *vpp = tdp; 504 if (!lockparent) { 505 vn_unlock(vdp); 506 cnp->cn_flags |= CNP_PDIRUNLOCK; 507 } 508 return (0); 509 } 510 511 /* 512 * If rewriting (RENAME), return the inode and the 513 * information required to rewrite the present directory 514 * Must get inode of directory entry to verify it's a 515 * regular file, or empty directory. 516 */ 517 if (nameiop == NAMEI_RENAME && wantparent) { 518 if ((error = VOP_EACCESS(vdp, VWRITE, cred)) != 0) 519 return (error); 520 /* 521 * Careful about locking second inode. 522 * This can only occur if the target is ".". 523 */ 524 if (dp->i_number == dp->i_ino) 525 return (EISDIR); 526 if (flags & CNP_ISDOTDOT) 527 vn_unlock(vdp); /* race to get the inode */ 528 error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp); 529 if (flags & CNP_ISDOTDOT) { 530 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY | 531 LK_FAILRECLAIM) != 0) { 532 cnp->cn_flags |= CNP_PDIRUNLOCK; 533 } 534 } 535 if (error) 536 return (error); 537 *vpp = tdp; 538 if (!lockparent) { 539 vn_unlock(vdp); 540 cnp->cn_flags |= CNP_PDIRUNLOCK; 541 } 542 return (0); 543 } 544 545 /* 546 * Step through the translation in the name. We do not `vput' the 547 * directory because we may need it again if a symbolic link 548 * is relative to the current directory. Instead we save it 549 * unlocked as "pdp". We must get the target inode before unlocking 550 * the directory to insure that the inode will not be removed 551 * before we get it. We prevent deadlock by always fetching 552 * inodes from the root, moving down the directory tree. Thus 553 * when following backward pointers ".." we must unlock the 554 * parent directory before getting the requested directory. 555 * There is a potential race condition here if both the current 556 * and parent directories are removed before the VFS_VGET for the 557 * inode associated with ".." returns. We hope that this occurs 558 * infrequently since we cannot avoid this race condition without 559 * implementing a sophisticated deadlock detection algorithm. 560 * Note also that this simple deadlock detection scheme will not 561 * work if the filesystem has any hard links other than ".." 562 * that point backwards in the directory structure. 563 */ 564 pdp = vdp; 565 if (flags & CNP_ISDOTDOT) { 566 vn_unlock(pdp); /* race to get the inode */ 567 cnp->cn_flags |= CNP_PDIRUNLOCK; 568 error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp); 569 if (error) { 570 if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY | 571 LK_FAILRECLAIM) == 0) { 572 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 573 } 574 return (error); 575 } 576 if (lockparent) { 577 error = vn_lock(pdp, LK_EXCLUSIVE | LK_FAILRECLAIM); 578 if (error) { 579 vput(tdp); 580 return (error); 581 } 582 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 583 } 584 *vpp = tdp; 585 } else if (dp->i_number == dp->i_ino) { 586 vref(vdp); /* we want ourself, ie "." */ 587 *vpp = vdp; 588 } else { 589 error = VFS_VGET(vdp->v_mount, NULL, dp->i_ino, &tdp); 590 if (error) 591 return (error); 592 if (!lockparent) { 593 vn_unlock(pdp); 594 cnp->cn_flags |= CNP_PDIRUNLOCK; 595 } 596 *vpp = tdp; 597 } 598 return (0); 599 } 600 601 void 602 ufs_dirbad(struct inode *ip, doff_t offset, char *how) 603 { 604 struct mount *mp; 605 606 mp = ITOV(ip)->v_mount; 607 (void)kprintf("%s: bad dir ino %lu at offset %ld: %s\n", 608 mp->mnt_stat.f_mntfromname, (u_long)ip->i_number, (long)offset, how); 609 if ((mp->mnt_flag & MNT_RDONLY) == 0) 610 panic("ufs_dirbad: bad dir"); 611 } 612 613 /* 614 * Do consistency checking on a directory entry: 615 * record length must be multiple of 4 616 * entry must fit in rest of its DIRBLKSIZ block 617 * record must be large enough to contain entry 618 * name is not longer than MAXNAMLEN 619 * name must be as long as advertised, and null terminated 620 */ 621 int 622 ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock) 623 { 624 int i; 625 int namlen; 626 627 # if (BYTE_ORDER == LITTLE_ENDIAN) 628 if (OFSFMT(dp)) 629 namlen = ep->d_type; 630 else 631 namlen = ep->d_namlen; 632 # else 633 namlen = ep->d_namlen; 634 # endif 635 if ((ep->d_reclen & 0x3) != 0 || 636 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 637 ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { 638 /*return (1); */ 639 kprintf("First bad\n"); 640 goto bad; 641 } 642 if (ep->d_ino == 0) 643 return (0); 644 for (i = 0; i < namlen; i++) 645 if (ep->d_name[i] == '\0') { 646 /*return (1); */ 647 kprintf("Second bad\n"); 648 goto bad; 649 } 650 if (ep->d_name[i]) 651 goto bad; 652 return (0); 653 bad: 654 return (1); 655 } 656 657 /* 658 * Construct a new directory entry after a call to namei, using the 659 * parameters that it left in the componentname argument cnp. The 660 * argument ip is the inode to which the new directory entry will refer. 661 */ 662 void 663 ufs_makedirentry(struct inode *ip, struct componentname *cnp, 664 struct direct *newdirp) 665 { 666 667 newdirp->d_ino = ip->i_number; 668 newdirp->d_namlen = cnp->cn_namelen; 669 bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); 670 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 671 newdirp->d_type = IFTODT(ip->i_mode); 672 else { 673 newdirp->d_type = 0; 674 # if (BYTE_ORDER == LITTLE_ENDIAN) 675 { u_char tmp = newdirp->d_namlen; 676 newdirp->d_namlen = newdirp->d_type; 677 newdirp->d_type = tmp; } 678 # endif 679 } 680 } 681 682 /* 683 * Write a directory entry after a call to namei, using the parameters 684 * that it left in the directory inode. The argument dirp is the new directory 685 * entry contents. Dvp is a pointer to the directory to be written, 686 * which was left locked by namei. Remaining parameters (dp->i_offset, 687 * dp->i_count) indicate how the space for the new entry is to be obtained. 688 * Non-null bp indicates that a directory is being created (for the 689 * soft dependency code). 690 */ 691 int 692 ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, 693 struct componentname *cnp, struct buf *newdirbp) 694 { 695 struct ucred *cred; 696 int newentrysize; 697 struct inode *dp; 698 struct buf *bp; 699 uint dsize; 700 struct direct *ep, *nep; 701 int error, ret, blkoff, loc, spacefree, flags; 702 char *dirbuf; 703 704 cred = cnp->cn_cred; 705 KKASSERT(cred != NULL); 706 707 dp = VTOI(dvp); 708 newentrysize = DIRSIZ(OFSFMT(dvp), dirp); 709 710 if (dp->i_count == 0) { 711 /* 712 * If dp->i_count is 0, then namei could find no 713 * space in the directory. Here, dp->i_offset will 714 * be on a directory block boundary and we will write the 715 * new entry into a fresh block. 716 */ 717 if (dp->i_offset & (DIRBLKSIZ - 1)) 718 panic("ufs_direnter: newblk"); 719 nvnode_pager_setsize(dvp, dp->i_offset + DIRBLKSIZ, 720 DIRBLKSIZ, -1); 721 flags = B_CLRBUF; 722 if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) 723 flags |= B_SYNC; 724 if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, 725 cred, flags, &bp)) != 0) { 726 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 727 bdwrite(newdirbp); 728 return (error); 729 } 730 dp->i_size = dp->i_offset + DIRBLKSIZ; 731 dp->i_flag |= IN_CHANGE | IN_UPDATE; 732 dirp->d_reclen = DIRBLKSIZ; 733 blkoff = dp->i_offset & 734 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); 735 bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); 736 #ifdef UFS_DIRHASH 737 if (dp->i_dirhash != NULL) { 738 ufsdirhash_newblk(dp, dp->i_offset); 739 ufsdirhash_add(dp, dirp, dp->i_offset); 740 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 741 dp->i_offset); 742 } 743 #endif 744 if (DOINGSOFTDEP(dvp)) { 745 /* 746 * Ensure that the entire newly allocated block is a 747 * valid directory so that future growth within the 748 * block does not have to ensure that the block is 749 * written before the inode. 750 */ 751 blkoff += DIRBLKSIZ; 752 while (blkoff < bp->b_bcount) { 753 ((struct direct *) 754 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 755 blkoff += DIRBLKSIZ; 756 } 757 softdep_setup_directory_add(bp, dp, dp->i_offset, 758 dirp->d_ino, newdirbp); 759 bdwrite(bp); 760 return (ffs_update(dvp, 0)); 761 } 762 if (DOINGASYNC(dvp)) { 763 bdwrite(bp); 764 return (ffs_update(dvp, 0)); 765 } 766 error = bwrite(bp); 767 ret = ffs_update(dvp, 1); 768 if (error == 0) 769 return (ret); 770 return (error); 771 } 772 773 /* 774 * If dp->i_count is non-zero, then namei found space for the new 775 * entry in the range dp->i_offset to dp->i_offset + dp->i_count 776 * in the directory. To use this space, we may have to compact 777 * the entries located there, by copying them together towards the 778 * beginning of the block, leaving the free space in one usable 779 * chunk at the end. 780 */ 781 782 /* 783 * Increase size of directory if entry eats into new space. 784 * This should never push the size past a new multiple of 785 * DIRBLKSIZE. 786 * 787 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 788 */ 789 if (dp->i_offset + dp->i_count > dp->i_size) 790 dp->i_size = dp->i_offset + dp->i_count; 791 /* 792 * Get the block containing the space for the new directory entry. 793 */ 794 error = ffs_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, &bp); 795 if (error) { 796 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 797 bdwrite(newdirbp); 798 return (error); 799 } 800 /* 801 * Find space for the new entry. In the simple case, the entry at 802 * offset base will have the space. If it does not, then namei 803 * arranged that compacting the region dp->i_offset to 804 * dp->i_offset + dp->i_count would yield the space. 805 */ 806 ep = (struct direct *)dirbuf; 807 dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; 808 spacefree = ep->d_reclen - dsize; 809 for (loc = ep->d_reclen; loc < dp->i_count; ) { 810 nep = (struct direct *)(dirbuf + loc); 811 812 /* Trim the existing slot (NB: dsize may be zero). */ 813 ep->d_reclen = dsize; 814 ep = (struct direct *)((char *)ep + dsize); 815 816 /* Read nep->d_reclen now as the bcopy() may clobber it. */ 817 loc += nep->d_reclen; 818 if (nep->d_ino == 0) { 819 /* 820 * A mid-block unused entry. Such entries are 821 * never created by the kernel, but fsck_ffs 822 * can create them (and it doesn't fix them). 823 * 824 * Add up the free space, and initialise the 825 * relocated entry since we don't bcopy it. 826 */ 827 spacefree += nep->d_reclen; 828 ep->d_ino = 0; 829 dsize = 0; 830 continue; 831 } 832 dsize = DIRSIZ(OFSFMT(dvp), nep); 833 spacefree += nep->d_reclen - dsize; 834 #ifdef UFS_DIRHASH 835 if (dp->i_dirhash != NULL) 836 ufsdirhash_move(dp, nep, 837 dp->i_offset + ((char *)nep - dirbuf), 838 dp->i_offset + ((char *)ep - dirbuf)); 839 #endif 840 if (DOINGSOFTDEP(dvp)) 841 softdep_change_directoryentry_offset(dp, dirbuf, 842 (caddr_t)nep, (caddr_t)ep, dsize); 843 else 844 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 845 } 846 /* 847 * Here, `ep' points to a directory entry containing `dsize' in-use 848 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 849 * then the entry is completely unused (dsize == 0). The value 850 * of ep->d_reclen is always indeterminate. 851 * 852 * Update the pointer fields in the previous entry (if any), 853 * copy in the new entry, and write out the block. 854 */ 855 if (ep->d_ino == 0 || 856 (ep->d_ino == WINO && 857 bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 858 if (spacefree + dsize < newentrysize) 859 panic("ufs_direnter: compact1"); 860 dirp->d_reclen = spacefree + dsize; 861 } else { 862 if (spacefree < newentrysize) 863 panic("ufs_direnter: compact2"); 864 dirp->d_reclen = spacefree; 865 ep->d_reclen = dsize; 866 ep = (struct direct *)((char *)ep + dsize); 867 } 868 #ifdef UFS_DIRHASH 869 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 870 dirp->d_reclen == spacefree)) 871 ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); 872 #endif 873 bcopy((caddr_t)dirp, (caddr_t)ep, (uint)newentrysize); 874 #ifdef UFS_DIRHASH 875 if (dp->i_dirhash != NULL) 876 ufsdirhash_checkblock(dp, dirbuf - 877 (dp->i_offset & (DIRBLKSIZ - 1)), 878 dp->i_offset & ~(DIRBLKSIZ - 1)); 879 #endif 880 881 if (DOINGSOFTDEP(dvp)) { 882 softdep_setup_directory_add(bp, dp, 883 dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); 884 bdwrite(bp); 885 } else { 886 if (DOINGASYNC(dvp)) { 887 bdwrite(bp); 888 error = 0; 889 } else { 890 error = bowrite(bp); 891 } 892 } 893 dp->i_flag |= IN_CHANGE | IN_UPDATE; 894 /* 895 * If all went well, and the directory can be shortened, proceed 896 * with the truncation. Note that we have to unlock the inode for 897 * the entry that we just entered, as the truncation may need to 898 * lock other inodes which can lead to deadlock if we also hold a 899 * lock on the newly entered node. 900 */ 901 if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { 902 if (tvp != NULL) 903 vn_unlock(tvp); 904 #ifdef UFS_DIRHASH 905 if (dp->i_dirhash != NULL) 906 ufsdirhash_dirtrunc(dp, dp->i_endoff); 907 #endif 908 (void)ffs_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC, cred); 909 if (tvp != NULL) 910 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); 911 } 912 return (error); 913 } 914 915 /* 916 * Remove a directory entry after a call to namei, using 917 * the parameters which it left in the directory inode. The entry 918 * dp->i_offset contains the offset into the directory of the 919 * entry to be eliminated. The dp->i_count field contains the 920 * size of the previous record in the directory. If this 921 * is 0, the first entry is being deleted, so we need only 922 * zero the inode number to mark the entry as free. If the 923 * entry is not the first in the directory, we must reclaim 924 * the space of the now empty record by adding the record size 925 * to the size of the previous entry. 926 */ 927 int 928 ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) 929 { 930 struct inode *dp; 931 struct direct *ep; 932 struct buf *bp; 933 int error; 934 935 dp = VTOI(dvp); 936 937 if (flags & CNP_DOWHITEOUT) { 938 /* 939 * Whiteout entry: set d_ino to WINO. 940 */ 941 if ((error = 942 ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) 943 return (error); 944 ep->d_ino = WINO; 945 ep->d_type = DT_WHT; 946 goto out; 947 } 948 949 if ((error = ffs_blkatoff(dvp, 950 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) 951 return (error); 952 #ifdef UFS_DIRHASH 953 /* 954 * Remove the dirhash entry. This is complicated by the fact 955 * that `ep' is the previous entry when dp->i_count != 0. 956 */ 957 if (dp->i_dirhash != NULL) 958 ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : 959 (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); 960 #endif 961 if (dp->i_count == 0) { 962 /* 963 * First entry in block: set d_ino to zero. 964 */ 965 ep->d_ino = 0; 966 } else { 967 /* 968 * Collapse new free space into previous entry. 969 */ 970 ep->d_reclen += dp->i_reclen; 971 } 972 #ifdef UFS_DIRHASH 973 if (dp->i_dirhash != NULL) 974 ufsdirhash_checkblock(dp, (char *)ep - 975 ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), 976 dp->i_offset & ~(DIRBLKSIZ - 1)); 977 #endif 978 out: 979 if (DOINGSOFTDEP(dvp)) { 980 if (ip) { 981 ip->i_effnlink--; 982 softdep_change_linkcnt(ip); 983 softdep_setup_remove(bp, dp, ip, isrmdir); 984 } 985 if (softdep_slowdown(dvp)) { 986 error = bwrite(bp); 987 } else { 988 bdwrite(bp); 989 error = 0; 990 } 991 } else { 992 if (ip) { 993 ip->i_effnlink--; 994 ip->i_nlink--; 995 ip->i_flag |= IN_CHANGE; 996 } 997 if (flags & CNP_DOWHITEOUT) 998 error = bwrite(bp); 999 else if (DOINGASYNC(dvp) && dp->i_count != 0) { 1000 bdwrite(bp); 1001 error = 0; 1002 } else 1003 error = bowrite(bp); 1004 } 1005 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1006 return (error); 1007 } 1008 1009 /* 1010 * Rewrite an existing directory entry to point at the inode 1011 * supplied. The parameters describing the directory entry are 1012 * set up by a call to namei. 1013 */ 1014 int 1015 ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype, 1016 int isrmdir) 1017 { 1018 struct buf *bp; 1019 struct direct *ep; 1020 struct vnode *vdp = ITOV(dp); 1021 int error; 1022 1023 error = ffs_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); 1024 if (error) 1025 return (error); 1026 ep->d_ino = newinum; 1027 if (!OFSFMT(vdp)) 1028 ep->d_type = newtype; 1029 oip->i_effnlink--; 1030 if (DOINGSOFTDEP(vdp)) { 1031 softdep_change_linkcnt(oip); 1032 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); 1033 bdwrite(bp); 1034 } else { 1035 oip->i_nlink--; 1036 oip->i_flag |= IN_CHANGE; 1037 if (DOINGASYNC(vdp)) { 1038 bdwrite(bp); 1039 error = 0; 1040 } else { 1041 error = bowrite(bp); 1042 } 1043 } 1044 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1045 return (error); 1046 } 1047 1048 /* 1049 * Check if a directory is empty or not. 1050 * Inode supplied must be locked. 1051 * 1052 * Using a struct dirtemplate here is not precisely 1053 * what we want, but better than using a struct direct. 1054 * 1055 * NB: does not handle corrupted directories. 1056 */ 1057 int 1058 ufs_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred) 1059 { 1060 off_t off; 1061 struct dirtemplate dbuf; 1062 struct direct *dp = (struct direct *)&dbuf; 1063 int error, count, namlen; 1064 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1065 1066 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1067 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 1068 UIO_SYSSPACE, IO_NODELOCKED, cred, &count); 1069 /* 1070 * Since we read MINDIRSIZ, residual must 1071 * be 0 unless we're at end of file. 1072 */ 1073 if (error || count != 0) 1074 return (0); 1075 /* avoid infinite loops */ 1076 if (dp->d_reclen == 0) 1077 return (0); 1078 /* skip empty entries */ 1079 if (dp->d_ino == 0 || dp->d_ino == WINO) 1080 continue; 1081 /* accept only "." and ".." */ 1082 # if (BYTE_ORDER == LITTLE_ENDIAN) 1083 if (OFSFMT(ITOV(ip))) 1084 namlen = dp->d_type; 1085 else 1086 namlen = dp->d_namlen; 1087 # else 1088 namlen = dp->d_namlen; 1089 # endif 1090 if (namlen > 2) 1091 return (0); 1092 if (dp->d_name[0] != '.') 1093 return (0); 1094 /* 1095 * At this point namlen must be 1 or 2. 1096 * 1 implies ".", 2 implies ".." if second 1097 * char is also "." 1098 */ 1099 if (namlen == 1 && dp->d_ino == ip->i_number) 1100 continue; 1101 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1102 continue; 1103 return (0); 1104 } 1105 return (1); 1106 } 1107 1108 /* 1109 * Check if source directory is in the path of the target directory. 1110 * Target is supplied locked, source is unlocked. 1111 * The target is always vput before returning. 1112 */ 1113 int 1114 ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) 1115 { 1116 struct vnode *vp; 1117 int error, rootino, namlen; 1118 struct dirtemplate dirbuf; 1119 1120 vp = ITOV(target); 1121 if (target->i_number == source->i_number) { 1122 error = EEXIST; 1123 goto out; 1124 } 1125 rootino = ROOTINO; 1126 error = 0; 1127 if (target->i_number == rootino) 1128 goto out; 1129 1130 for (;;) { 1131 if (vp->v_type != VDIR) { 1132 error = ENOTDIR; 1133 break; 1134 } 1135 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1136 sizeof (struct dirtemplate), (off_t)0, 1137 UIO_SYSSPACE, IO_NODELOCKED, cred, NULL); 1138 if (error != 0) 1139 break; 1140 # if (BYTE_ORDER == LITTLE_ENDIAN) 1141 if (OFSFMT(vp)) 1142 namlen = dirbuf.dotdot_type; 1143 else 1144 namlen = dirbuf.dotdot_namlen; 1145 # else 1146 namlen = dirbuf.dotdot_namlen; 1147 # endif 1148 if (namlen != 2 || 1149 dirbuf.dotdot_name[0] != '.' || 1150 dirbuf.dotdot_name[1] != '.') { 1151 error = ENOTDIR; 1152 break; 1153 } 1154 if (dirbuf.dotdot_ino == source->i_number) { 1155 error = EINVAL; 1156 break; 1157 } 1158 if (dirbuf.dotdot_ino == rootino) 1159 break; 1160 vput(vp); 1161 error = VFS_VGET(vp->v_mount, NULL, dirbuf.dotdot_ino, &vp); 1162 if (error) { 1163 vp = NULL; 1164 break; 1165 } 1166 } 1167 1168 out: 1169 if (error == ENOTDIR) 1170 kprintf("checkpath: .. not a directory\n"); 1171 if (vp != NULL) 1172 vput(vp); 1173 return (error); 1174 } 1175