1 /* 2 * Copyright (c) 1989, 1993 3 * The Regents of the University of California. All rights reserved. 4 * (c) UNIX System Laboratories, Inc. 5 * All or some portions of this file are derived from material licensed 6 * to the University of California by American Telephone and Telegraph 7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 8 * the permission of UNIX System Laboratories, Inc. 9 * 10 * Redistribution and use in source and binary forms, with or without 11 * modification, are permitted provided that the following conditions 12 * are met: 13 * 1. Redistributions of source code must retain the above copyright 14 * notice, this list of conditions and the following disclaimer. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. All advertising materials mentioning features or use of this software 19 * must display the following acknowledgement: 20 * This product includes software developed by the University of 21 * California, Berkeley and its contributors. 22 * 4. Neither the name of the University nor the names of its contributors 23 * may be used to endorse or promote products derived from this software 24 * without specific prior written permission. 25 * 26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 36 * SUCH DAMAGE. 37 * 38 * @(#)ufs_lookup.c 8.15 (Berkeley) 6/16/95 39 * $FreeBSD: src/sys/ufs/ufs/ufs_lookup.c,v 1.33.2.7 2001/09/22 19:22:13 iedowse Exp $ 40 * $DragonFly: src/sys/vfs/ufs/ufs_lookup.c,v 1.29 2008/10/15 12:12:51 aggelos Exp $ 41 */ 42 43 #include "opt_ufs.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/kernel.h> 48 #include <sys/buf.h> 49 #include <sys/proc.h> 50 #include <sys/namei.h> 51 #include <sys/stat.h> 52 #include <sys/mount.h> 53 #include <sys/vnode.h> 54 #include <sys/sysctl.h> 55 56 #include <vm/vm.h> 57 #include <vm/vm_extern.h> 58 59 #include "quota.h" 60 #include "inode.h" 61 #include "dir.h" 62 #ifdef UFS_DIRHASH 63 #include "dirhash.h" 64 #endif 65 #include "ufsmount.h" 66 #include "ufs_extern.h" 67 #include "ffs_extern.h" 68 69 #ifdef DIAGNOSTIC 70 int dirchk = 1; 71 #else 72 int dirchk = 0; 73 #endif 74 75 SYSCTL_INT(_debug, OID_AUTO, dircheck, CTLFLAG_RW, &dirchk, 0, ""); 76 77 /* true if old FS format...*/ 78 #define OFSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 79 80 /* 81 * Convert a component of a pathname into a pointer to a locked inode. 82 * This is a very central and rather complicated routine. 83 * If the filesystem is not maintained in a strict tree hierarchy, 84 * this can result in a deadlock situation (see comments in code below). 85 * 86 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 87 * on whether the name is to be looked up, created, renamed, or deleted. 88 * When CREATE, RENAME, or DELETE is specified, information usable in 89 * creating, renaming, or deleting a directory entry may be calculated. 90 * If flag has LOCKPARENT or'ed into it and the target of the pathname 91 * exists, lookup returns both the target and its parent directory locked. 92 * When creating or renaming and LOCKPARENT is specified, the target may 93 * not be ".". When deleting and LOCKPARENT is specified, the target may 94 * be "."., but the caller must check to ensure it does an vrele and vput 95 * instead of two vputs. 96 * 97 * Overall outline of ufs_lookup: 98 * 99 * search for name in directory, to found or notfound 100 * notfound: 101 * if creating, return locked directory, leaving info on available slots 102 * else return error 103 * found: 104 * if at end of path and deleting, return information to allow delete 105 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 106 * inode and return info to allow rewrite 107 * if not at end, add name to cache; if at end and neither creating 108 * nor deleting, add name to cache 109 * 110 * ufs_lookup(struct vnode *a_dvp, struct vnode **a_vpp, 111 * struct componentname *a_cnp) 112 */ 113 int 114 ufs_lookup(struct vop_old_lookup_args *ap) 115 { 116 struct vnode *vdp; /* vnode for directory being searched */ 117 struct inode *dp; /* inode for directory being searched */ 118 struct buf *bp; /* a buffer of directory entries */ 119 struct direct *ep; /* the current directory entry */ 120 int entryoffsetinblock; /* offset of ep in bp's buffer */ 121 enum {NONE, COMPACT, FOUND} slotstatus; 122 doff_t slotoffset; /* offset of area with free space */ 123 int slotsize; /* size of area at slotoffset */ 124 int slotfreespace; /* amount of space free in slot */ 125 int slotneeded; /* size of the entry we're seeking */ 126 int numdirpasses; /* strategy for directory search */ 127 doff_t endsearch; /* offset to end directory search */ 128 doff_t prevoff; /* prev entry dp->i_offset */ 129 struct vnode *pdp; /* saved dp during symlink work */ 130 struct vnode *tdp; /* returned by VFS_VGET */ 131 doff_t enduseful; /* pointer past last used dir slot */ 132 u_long bmask; /* block offset mask */ 133 int lockparent; /* 1 => lockparent flag is set */ 134 int wantparent; /* 1 => wantparent or lockparent flag */ 135 int namlen, error; 136 struct vnode **vpp = ap->a_vpp; 137 struct componentname *cnp = ap->a_cnp; 138 struct ucred *cred = cnp->cn_cred; 139 int flags = cnp->cn_flags; 140 int nameiop = cnp->cn_nameiop; 141 globaldata_t gd = mycpu; 142 143 bp = NULL; 144 slotoffset = -1; 145 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 146 /* 147 * XXX there was a soft-update diff about this I couldn't merge. 148 * I think this was the equiv. 149 */ 150 *vpp = NULL; 151 152 vdp = ap->a_dvp; 153 dp = VTOI(vdp); 154 lockparent = flags & CNP_LOCKPARENT; 155 wantparent = flags & (CNP_LOCKPARENT|CNP_WANTPARENT); 156 157 /* 158 * We now have a segment name to search for, and a directory to search. 159 * 160 * Suppress search for slots unless creating 161 * file and at end of pathname, in which case 162 * we watch for a place to put the new file in 163 * case it doesn't already exist. 164 */ 165 slotstatus = FOUND; 166 slotfreespace = slotsize = slotneeded = 0; 167 if (nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME) { 168 slotstatus = NONE; 169 slotneeded = DIRECTSIZ(cnp->cn_namelen); 170 } 171 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 172 173 #ifdef UFS_DIRHASH 174 /* 175 * Use dirhash for fast operations on large directories. The logic 176 * to determine whether to hash the directory is contained within 177 * ufsdirhash_build(); a zero return means that it decided to hash 178 * this directory and it successfully built up the hash table. 179 */ 180 if (ufsdirhash_build(dp) == 0) { 181 /* Look for a free slot if needed. */ 182 enduseful = dp->i_size; 183 if (slotstatus != FOUND) { 184 slotoffset = ufsdirhash_findfree(dp, slotneeded, 185 &slotsize); 186 if (slotoffset >= 0) { 187 slotstatus = COMPACT; 188 enduseful = ufsdirhash_enduseful(dp); 189 if (enduseful < 0) 190 enduseful = dp->i_size; 191 } 192 } 193 /* Look up the component. */ 194 numdirpasses = 1; 195 entryoffsetinblock = 0; /* silence compiler warning */ 196 switch (ufsdirhash_lookup(dp, cnp->cn_nameptr, cnp->cn_namelen, 197 &dp->i_offset, &bp, nameiop == NAMEI_DELETE ? &prevoff : NULL)) { 198 case 0: 199 ep = (struct direct *)((char *)bp->b_data + 200 (dp->i_offset & bmask)); 201 goto foundentry; 202 case ENOENT: 203 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 204 goto notfound; 205 default: 206 /* Something failed; just do a linear search. */ 207 break; 208 } 209 } 210 #endif /* UFS_DIRHASH */ 211 /* 212 * If there is cached information on a previous search of 213 * this directory, pick up where we last left off. 214 * We cache only lookups as these are the most common 215 * and have the greatest payoff. Caching CREATE has little 216 * benefit as it usually must search the entire directory 217 * to determine that the entry does not exist. Caching the 218 * location of the last DELETE or RENAME has not reduced 219 * profiling time and hence has been removed in the interest 220 * of simplicity. 221 */ 222 if (nameiop != NAMEI_LOOKUP || dp->i_diroff == 0 || 223 dp->i_diroff >= dp->i_size) { 224 entryoffsetinblock = 0; 225 dp->i_offset = 0; 226 numdirpasses = 1; 227 } else { 228 dp->i_offset = dp->i_diroff; 229 if ((entryoffsetinblock = dp->i_offset & bmask) && 230 (error = ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp))) 231 return (error); 232 numdirpasses = 2; 233 gd->gd_nchstats->ncs_2passes++; 234 } 235 prevoff = dp->i_offset; 236 endsearch = roundup2(dp->i_size, DIRBLKSIZ); 237 enduseful = 0; 238 239 searchloop: 240 while (dp->i_offset < endsearch) { 241 /* 242 * If necessary, get the next directory block. 243 */ 244 if ((dp->i_offset & bmask) == 0) { 245 if (bp != NULL) 246 brelse(bp); 247 error = 248 ffs_blkatoff(vdp, (off_t)dp->i_offset, NULL, &bp); 249 if (error) 250 return (error); 251 entryoffsetinblock = 0; 252 } 253 /* 254 * If still looking for a slot, and at a DIRBLKSIZE 255 * boundary, have to start looking for free space again. 256 */ 257 if (slotstatus == NONE && 258 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 259 slotoffset = -1; 260 slotfreespace = 0; 261 } 262 /* 263 * Get pointer to next entry. 264 * Full validation checks are slow, so we only check 265 * enough to insure forward progress through the 266 * directory. Complete checks can be run by patching 267 * "dirchk" to be true. 268 */ 269 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 270 if (ep->d_reclen == 0 || ep->d_reclen > 271 DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 272 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 273 int i; 274 275 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 276 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 277 dp->i_offset += i; 278 entryoffsetinblock += i; 279 continue; 280 } 281 282 /* 283 * If an appropriate sized slot has not yet been found, 284 * check to see if one is available. Also accumulate space 285 * in the current block so that we can determine if 286 * compaction is viable. 287 */ 288 if (slotstatus != FOUND) { 289 int size = ep->d_reclen; 290 291 if (ep->d_ino != 0) 292 size -= DIRSIZ(OFSFMT(vdp), ep); 293 if (size > 0) { 294 if (size >= slotneeded) { 295 slotstatus = FOUND; 296 slotoffset = dp->i_offset; 297 slotsize = ep->d_reclen; 298 } else if (slotstatus == NONE) { 299 slotfreespace += size; 300 if (slotoffset == -1) 301 slotoffset = dp->i_offset; 302 if (slotfreespace >= slotneeded) { 303 slotstatus = COMPACT; 304 slotsize = dp->i_offset + 305 ep->d_reclen - slotoffset; 306 } 307 } 308 } 309 } 310 311 /* 312 * Check for a name match. 313 */ 314 if (ep->d_ino) { 315 # if (BYTE_ORDER == LITTLE_ENDIAN) 316 if (OFSFMT(vdp)) 317 namlen = ep->d_type; 318 else 319 namlen = ep->d_namlen; 320 # else 321 namlen = ep->d_namlen; 322 # endif 323 if (namlen == cnp->cn_namelen && 324 (cnp->cn_nameptr[0] == ep->d_name[0]) && 325 !bcmp(cnp->cn_nameptr, ep->d_name, 326 (unsigned)namlen)) { 327 #ifdef UFS_DIRHASH 328 foundentry: 329 #endif 330 /* 331 * Save directory entry's inode number and 332 * reclen in ndp->ni_ufs area, and release 333 * directory buffer. 334 */ 335 if (vdp->v_mount->mnt_maxsymlinklen > 0 && 336 ep->d_type == DT_WHT) { 337 slotstatus = FOUND; 338 slotoffset = dp->i_offset; 339 slotsize = ep->d_reclen; 340 dp->i_reclen = slotsize; 341 enduseful = dp->i_size; 342 ap->a_cnp->cn_flags |= CNP_ISWHITEOUT; 343 numdirpasses--; 344 goto notfound; 345 } 346 dp->i_ino = ep->d_ino; 347 dp->i_reclen = ep->d_reclen; 348 goto found; 349 } 350 } 351 prevoff = dp->i_offset; 352 dp->i_offset += ep->d_reclen; 353 entryoffsetinblock += ep->d_reclen; 354 if (ep->d_ino) 355 enduseful = dp->i_offset; 356 } 357 notfound: 358 /* 359 * If we started in the middle of the directory and failed 360 * to find our target, we must check the beginning as well. 361 */ 362 if (numdirpasses == 2) { 363 numdirpasses--; 364 dp->i_offset = 0; 365 endsearch = dp->i_diroff; 366 goto searchloop; 367 } 368 if (bp != NULL) 369 brelse(bp); 370 /* 371 * If creating, and at end of pathname and current 372 * directory has not been removed, then can consider 373 * allowing file to be created. 374 */ 375 if ((nameiop == NAMEI_CREATE || nameiop == NAMEI_RENAME || 376 (nameiop == NAMEI_DELETE && 377 (ap->a_cnp->cn_flags & CNP_DOWHITEOUT) && 378 (ap->a_cnp->cn_flags & CNP_ISWHITEOUT))) && 379 dp->i_effnlink != 0) { 380 /* 381 * Access for write is interpreted as allowing 382 * creation of files in the directory. 383 */ 384 error = VOP_ACCESS(vdp, VWRITE, cred); 385 if (error) 386 return (error); 387 /* 388 * Return an indication of where the new directory 389 * entry should be put. If we didn't find a slot, 390 * then set dp->i_count to 0 indicating 391 * that the new slot belongs at the end of the 392 * directory. If we found a slot, then the new entry 393 * can be put in the range from dp->i_offset to 394 * dp->i_offset + dp->i_count. 395 */ 396 if (slotstatus == NONE) { 397 dp->i_offset = roundup2(dp->i_size, DIRBLKSIZ); 398 dp->i_count = 0; 399 enduseful = dp->i_offset; 400 } else if (nameiop == NAMEI_DELETE) { 401 dp->i_offset = slotoffset; 402 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 403 dp->i_count = 0; 404 else 405 dp->i_count = dp->i_offset - prevoff; 406 } else { 407 dp->i_offset = slotoffset; 408 dp->i_count = slotsize; 409 if (enduseful < slotoffset + slotsize) 410 enduseful = slotoffset + slotsize; 411 } 412 dp->i_endoff = roundup2(enduseful, DIRBLKSIZ); 413 dp->i_flag |= IN_CHANGE | IN_UPDATE; 414 /* 415 * We return with the directory locked, so that 416 * the parameters we set up above will still be 417 * valid if we actually decide to do a direnter(). 418 * We return ni_vp == NULL to indicate that the entry 419 * does not currently exist; we leave a pointer to 420 * the (locked) directory inode in ndp->ni_dvp. 421 * The pathname buffer is saved so that the name 422 * can be obtained later. 423 * 424 * NB - if the directory is unlocked, then this 425 * information cannot be used. 426 */ 427 if (!lockparent) { 428 vn_unlock(vdp); 429 cnp->cn_flags |= CNP_PDIRUNLOCK; 430 } 431 return (EJUSTRETURN); 432 } 433 return (ENOENT); 434 435 found: 436 if (numdirpasses == 2) 437 gd->gd_nchstats->ncs_pass2++; 438 /* 439 * Check that directory length properly reflects presence 440 * of this entry. 441 */ 442 if (dp->i_offset + DIRSIZ(OFSFMT(vdp), ep) > dp->i_size) { 443 ufs_dirbad(dp, dp->i_offset, "i_size too small"); 444 dp->i_size = dp->i_offset + DIRSIZ(OFSFMT(vdp), ep); 445 dp->i_flag |= IN_CHANGE | IN_UPDATE; 446 } 447 brelse(bp); 448 449 /* 450 * Found component in pathname. 451 * If the final component of path name, save information 452 * in the cache as to where the entry was found. 453 */ 454 if (nameiop == NAMEI_LOOKUP) 455 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 456 457 /* 458 * If deleting, and at end of pathname, return 459 * parameters which can be used to remove file. 460 * If the wantparent flag isn't set, we return only 461 * the directory (in ndp->ni_dvp), otherwise we go 462 * on and lock the inode, being careful with ".". 463 */ 464 if (nameiop == NAMEI_DELETE) { 465 /* 466 * Write access to directory required to delete files. 467 */ 468 error = VOP_ACCESS(vdp, VWRITE, cred); 469 if (error) 470 return (error); 471 /* 472 * Return pointer to current entry in dp->i_offset, 473 * and distance past previous entry (if there 474 * is a previous entry in this block) in dp->i_count. 475 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 476 */ 477 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 478 dp->i_count = 0; 479 else 480 dp->i_count = dp->i_offset - prevoff; 481 if (dp->i_number == dp->i_ino) { 482 vref(vdp); 483 *vpp = vdp; 484 return (0); 485 } 486 if (flags & CNP_ISDOTDOT) 487 vn_unlock(vdp); /* race to get the inode */ 488 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 489 if (flags & CNP_ISDOTDOT) { 490 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY) != 0) 491 cnp->cn_flags |= CNP_PDIRUNLOCK; 492 } 493 if (error) 494 return (error); 495 /* 496 * If directory is "sticky", then user must own 497 * the directory, or the file in it, else she 498 * may not delete it (unless she's root). This 499 * implements append-only directories. 500 */ 501 if ((dp->i_mode & ISVTX) && 502 cred->cr_uid != 0 && 503 cred->cr_uid != dp->i_uid && 504 VTOI(tdp)->i_uid != cred->cr_uid) { 505 vput(tdp); 506 return (EPERM); 507 } 508 *vpp = tdp; 509 if (!lockparent) { 510 vn_unlock(vdp); 511 cnp->cn_flags |= CNP_PDIRUNLOCK; 512 } 513 return (0); 514 } 515 516 /* 517 * If rewriting (RENAME), return the inode and the 518 * information required to rewrite the present directory 519 * Must get inode of directory entry to verify it's a 520 * regular file, or empty directory. 521 */ 522 if (nameiop == NAMEI_RENAME && wantparent) { 523 if ((error = VOP_ACCESS(vdp, VWRITE, cred)) != 0) 524 return (error); 525 /* 526 * Careful about locking second inode. 527 * This can only occur if the target is ".". 528 */ 529 if (dp->i_number == dp->i_ino) 530 return (EISDIR); 531 if (flags & CNP_ISDOTDOT) 532 vn_unlock(vdp); /* race to get the inode */ 533 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 534 if (flags & CNP_ISDOTDOT) { 535 if (vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY) != 0) 536 cnp->cn_flags |= CNP_PDIRUNLOCK; 537 } 538 if (error) 539 return (error); 540 *vpp = tdp; 541 if (!lockparent) { 542 vn_unlock(vdp); 543 cnp->cn_flags |= CNP_PDIRUNLOCK; 544 } 545 return (0); 546 } 547 548 /* 549 * Step through the translation in the name. We do not `vput' the 550 * directory because we may need it again if a symbolic link 551 * is relative to the current directory. Instead we save it 552 * unlocked as "pdp". We must get the target inode before unlocking 553 * the directory to insure that the inode will not be removed 554 * before we get it. We prevent deadlock by always fetching 555 * inodes from the root, moving down the directory tree. Thus 556 * when following backward pointers ".." we must unlock the 557 * parent directory before getting the requested directory. 558 * There is a potential race condition here if both the current 559 * and parent directories are removed before the VFS_VGET for the 560 * inode associated with ".." returns. We hope that this occurs 561 * infrequently since we cannot avoid this race condition without 562 * implementing a sophisticated deadlock detection algorithm. 563 * Note also that this simple deadlock detection scheme will not 564 * work if the filesystem has any hard links other than ".." 565 * that point backwards in the directory structure. 566 */ 567 pdp = vdp; 568 if (flags & CNP_ISDOTDOT) { 569 vn_unlock(pdp); /* race to get the inode */ 570 cnp->cn_flags |= CNP_PDIRUNLOCK; 571 if ((error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp)) != 0) { 572 if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY) == 0) 573 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 574 return (error); 575 } 576 if (lockparent) { 577 if ((error = vn_lock(pdp, LK_EXCLUSIVE)) != 0) { 578 vput(tdp); 579 return (error); 580 } 581 cnp->cn_flags &= ~CNP_PDIRUNLOCK; 582 } 583 *vpp = tdp; 584 } else if (dp->i_number == dp->i_ino) { 585 vref(vdp); /* we want ourself, ie "." */ 586 *vpp = vdp; 587 } else { 588 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 589 if (error) 590 return (error); 591 if (!lockparent) { 592 vn_unlock(pdp); 593 cnp->cn_flags |= CNP_PDIRUNLOCK; 594 } 595 *vpp = tdp; 596 } 597 return (0); 598 } 599 600 void 601 ufs_dirbad(struct inode *ip, doff_t offset, char *how) 602 { 603 struct mount *mp; 604 605 mp = ITOV(ip)->v_mount; 606 (void)kprintf("%s: bad dir ino %lu at offset %ld: %s\n", 607 mp->mnt_stat.f_mntfromname, (u_long)ip->i_number, (long)offset, how); 608 if ((mp->mnt_flag & MNT_RDONLY) == 0) 609 panic("ufs_dirbad: bad dir"); 610 } 611 612 /* 613 * Do consistency checking on a directory entry: 614 * record length must be multiple of 4 615 * entry must fit in rest of its DIRBLKSIZ block 616 * record must be large enough to contain entry 617 * name is not longer than MAXNAMLEN 618 * name must be as long as advertised, and null terminated 619 */ 620 int 621 ufs_dirbadentry(struct vnode *dp, struct direct *ep, int entryoffsetinblock) 622 { 623 int i; 624 int namlen; 625 626 # if (BYTE_ORDER == LITTLE_ENDIAN) 627 if (OFSFMT(dp)) 628 namlen = ep->d_type; 629 else 630 namlen = ep->d_namlen; 631 # else 632 namlen = ep->d_namlen; 633 # endif 634 if ((ep->d_reclen & 0x3) != 0 || 635 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 636 ep->d_reclen < DIRSIZ(OFSFMT(dp), ep) || namlen > MAXNAMLEN) { 637 /*return (1); */ 638 kprintf("First bad\n"); 639 goto bad; 640 } 641 if (ep->d_ino == 0) 642 return (0); 643 for (i = 0; i < namlen; i++) 644 if (ep->d_name[i] == '\0') { 645 /*return (1); */ 646 kprintf("Second bad\n"); 647 goto bad; 648 } 649 if (ep->d_name[i]) 650 goto bad; 651 return (0); 652 bad: 653 return (1); 654 } 655 656 /* 657 * Construct a new directory entry after a call to namei, using the 658 * parameters that it left in the componentname argument cnp. The 659 * argument ip is the inode to which the new directory entry will refer. 660 */ 661 void 662 ufs_makedirentry(struct inode *ip, struct componentname *cnp, 663 struct direct *newdirp) 664 { 665 666 newdirp->d_ino = ip->i_number; 667 newdirp->d_namlen = cnp->cn_namelen; 668 bcopy(cnp->cn_nameptr, newdirp->d_name, (unsigned)cnp->cn_namelen + 1); 669 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 670 newdirp->d_type = IFTODT(ip->i_mode); 671 else { 672 newdirp->d_type = 0; 673 # if (BYTE_ORDER == LITTLE_ENDIAN) 674 { u_char tmp = newdirp->d_namlen; 675 newdirp->d_namlen = newdirp->d_type; 676 newdirp->d_type = tmp; } 677 # endif 678 } 679 } 680 681 /* 682 * Write a directory entry after a call to namei, using the parameters 683 * that it left in the directory inode. The argument dirp is the new directory 684 * entry contents. Dvp is a pointer to the directory to be written, 685 * which was left locked by namei. Remaining parameters (dp->i_offset, 686 * dp->i_count) indicate how the space for the new entry is to be obtained. 687 * Non-null bp indicates that a directory is being created (for the 688 * soft dependency code). 689 */ 690 int 691 ufs_direnter(struct vnode *dvp, struct vnode *tvp, struct direct *dirp, 692 struct componentname *cnp, struct buf *newdirbp) 693 { 694 struct ucred *cred; 695 int newentrysize; 696 struct inode *dp; 697 struct buf *bp; 698 uint dsize; 699 struct direct *ep, *nep; 700 int error, ret, blkoff, loc, spacefree, flags; 701 char *dirbuf; 702 703 cred = cnp->cn_cred; 704 KKASSERT(cred != NULL); 705 706 dp = VTOI(dvp); 707 newentrysize = DIRSIZ(OFSFMT(dvp), dirp); 708 709 if (dp->i_count == 0) { 710 /* 711 * If dp->i_count is 0, then namei could find no 712 * space in the directory. Here, dp->i_offset will 713 * be on a directory block boundary and we will write the 714 * new entry into a fresh block. 715 */ 716 if (dp->i_offset & (DIRBLKSIZ - 1)) 717 panic("ufs_direnter: newblk"); 718 vnode_pager_setsize(dvp, dp->i_offset + DIRBLKSIZ); 719 flags = B_CLRBUF; 720 if (!DOINGSOFTDEP(dvp) && !DOINGASYNC(dvp)) 721 flags |= B_SYNC; 722 if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, 723 cred, flags, &bp)) != 0) { 724 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 725 bdwrite(newdirbp); 726 return (error); 727 } 728 dp->i_size = dp->i_offset + DIRBLKSIZ; 729 dp->i_flag |= IN_CHANGE | IN_UPDATE; 730 dirp->d_reclen = DIRBLKSIZ; 731 blkoff = dp->i_offset & 732 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); 733 bcopy((caddr_t)dirp, (caddr_t)bp->b_data + blkoff,newentrysize); 734 #ifdef UFS_DIRHASH 735 if (dp->i_dirhash != NULL) { 736 ufsdirhash_newblk(dp, dp->i_offset); 737 ufsdirhash_add(dp, dirp, dp->i_offset); 738 ufsdirhash_checkblock(dp, (char *)bp->b_data + blkoff, 739 dp->i_offset); 740 } 741 #endif 742 if (DOINGSOFTDEP(dvp)) { 743 /* 744 * Ensure that the entire newly allocated block is a 745 * valid directory so that future growth within the 746 * block does not have to ensure that the block is 747 * written before the inode. 748 */ 749 blkoff += DIRBLKSIZ; 750 while (blkoff < bp->b_bcount) { 751 ((struct direct *) 752 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 753 blkoff += DIRBLKSIZ; 754 } 755 softdep_setup_directory_add(bp, dp, dp->i_offset, 756 dirp->d_ino, newdirbp); 757 bdwrite(bp); 758 return (ffs_update(dvp, 0)); 759 } 760 if (DOINGASYNC(dvp)) { 761 bdwrite(bp); 762 return (ffs_update(dvp, 0)); 763 } 764 error = bwrite(bp); 765 ret = ffs_update(dvp, 1); 766 if (error == 0) 767 return (ret); 768 return (error); 769 } 770 771 /* 772 * If dp->i_count is non-zero, then namei found space for the new 773 * entry in the range dp->i_offset to dp->i_offset + dp->i_count 774 * in the directory. To use this space, we may have to compact 775 * the entries located there, by copying them together towards the 776 * beginning of the block, leaving the free space in one usable 777 * chunk at the end. 778 */ 779 780 /* 781 * Increase size of directory if entry eats into new space. 782 * This should never push the size past a new multiple of 783 * DIRBLKSIZE. 784 * 785 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 786 */ 787 if (dp->i_offset + dp->i_count > dp->i_size) 788 dp->i_size = dp->i_offset + dp->i_count; 789 /* 790 * Get the block containing the space for the new directory entry. 791 */ 792 error = ffs_blkatoff(dvp, (off_t)dp->i_offset, &dirbuf, &bp); 793 if (error) { 794 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 795 bdwrite(newdirbp); 796 return (error); 797 } 798 /* 799 * Find space for the new entry. In the simple case, the entry at 800 * offset base will have the space. If it does not, then namei 801 * arranged that compacting the region dp->i_offset to 802 * dp->i_offset + dp->i_count would yield the space. 803 */ 804 ep = (struct direct *)dirbuf; 805 dsize = ep->d_ino ? DIRSIZ(OFSFMT(dvp), ep) : 0; 806 spacefree = ep->d_reclen - dsize; 807 for (loc = ep->d_reclen; loc < dp->i_count; ) { 808 nep = (struct direct *)(dirbuf + loc); 809 810 /* Trim the existing slot (NB: dsize may be zero). */ 811 ep->d_reclen = dsize; 812 ep = (struct direct *)((char *)ep + dsize); 813 814 /* Read nep->d_reclen now as the bcopy() may clobber it. */ 815 loc += nep->d_reclen; 816 if (nep->d_ino == 0) { 817 /* 818 * A mid-block unused entry. Such entries are 819 * never created by the kernel, but fsck_ffs 820 * can create them (and it doesn't fix them). 821 * 822 * Add up the free space, and initialise the 823 * relocated entry since we don't bcopy it. 824 */ 825 spacefree += nep->d_reclen; 826 ep->d_ino = 0; 827 dsize = 0; 828 continue; 829 } 830 dsize = DIRSIZ(OFSFMT(dvp), nep); 831 spacefree += nep->d_reclen - dsize; 832 #ifdef UFS_DIRHASH 833 if (dp->i_dirhash != NULL) 834 ufsdirhash_move(dp, nep, 835 dp->i_offset + ((char *)nep - dirbuf), 836 dp->i_offset + ((char *)ep - dirbuf)); 837 #endif 838 if (DOINGSOFTDEP(dvp)) 839 softdep_change_directoryentry_offset(dp, dirbuf, 840 (caddr_t)nep, (caddr_t)ep, dsize); 841 else 842 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 843 } 844 /* 845 * Here, `ep' points to a directory entry containing `dsize' in-use 846 * bytes followed by `spacefree' unused bytes. If ep->d_ino == 0, 847 * then the entry is completely unused (dsize == 0). The value 848 * of ep->d_reclen is always indeterminate. 849 * 850 * Update the pointer fields in the previous entry (if any), 851 * copy in the new entry, and write out the block. 852 */ 853 if (ep->d_ino == 0 || 854 (ep->d_ino == WINO && 855 bcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 856 if (spacefree + dsize < newentrysize) 857 panic("ufs_direnter: compact1"); 858 dirp->d_reclen = spacefree + dsize; 859 } else { 860 if (spacefree < newentrysize) 861 panic("ufs_direnter: compact2"); 862 dirp->d_reclen = spacefree; 863 ep->d_reclen = dsize; 864 ep = (struct direct *)((char *)ep + dsize); 865 } 866 #ifdef UFS_DIRHASH 867 if (dp->i_dirhash != NULL && (ep->d_ino == 0 || 868 dirp->d_reclen == spacefree)) 869 ufsdirhash_add(dp, dirp, dp->i_offset + ((char *)ep - dirbuf)); 870 #endif 871 bcopy((caddr_t)dirp, (caddr_t)ep, (uint)newentrysize); 872 #ifdef UFS_DIRHASH 873 if (dp->i_dirhash != NULL) 874 ufsdirhash_checkblock(dp, dirbuf - 875 (dp->i_offset & (DIRBLKSIZ - 1)), 876 dp->i_offset & ~(DIRBLKSIZ - 1)); 877 #endif 878 879 if (DOINGSOFTDEP(dvp)) { 880 softdep_setup_directory_add(bp, dp, 881 dp->i_offset + (caddr_t)ep - dirbuf, dirp->d_ino, newdirbp); 882 bdwrite(bp); 883 } else { 884 if (DOINGASYNC(dvp)) { 885 bdwrite(bp); 886 error = 0; 887 } else { 888 error = bowrite(bp); 889 } 890 } 891 dp->i_flag |= IN_CHANGE | IN_UPDATE; 892 /* 893 * If all went well, and the directory can be shortened, proceed 894 * with the truncation. Note that we have to unlock the inode for 895 * the entry that we just entered, as the truncation may need to 896 * lock other inodes which can lead to deadlock if we also hold a 897 * lock on the newly entered node. 898 */ 899 if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_size) { 900 if (tvp != NULL) 901 vn_unlock(tvp); 902 #ifdef UFS_DIRHASH 903 if (dp->i_dirhash != NULL) 904 ufsdirhash_dirtrunc(dp, dp->i_endoff); 905 #endif 906 (void)ffs_truncate(dvp, (off_t)dp->i_endoff, IO_SYNC, cred); 907 if (tvp != NULL) 908 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); 909 } 910 return (error); 911 } 912 913 /* 914 * Remove a directory entry after a call to namei, using 915 * the parameters which it left in the directory inode. The entry 916 * dp->i_offset contains the offset into the directory of the 917 * entry to be eliminated. The dp->i_count field contains the 918 * size of the previous record in the directory. If this 919 * is 0, the first entry is being deleted, so we need only 920 * zero the inode number to mark the entry as free. If the 921 * entry is not the first in the directory, we must reclaim 922 * the space of the now empty record by adding the record size 923 * to the size of the previous entry. 924 */ 925 int 926 ufs_dirremove(struct vnode *dvp, struct inode *ip, int flags, int isrmdir) 927 { 928 struct inode *dp; 929 struct direct *ep; 930 struct buf *bp; 931 int error; 932 933 dp = VTOI(dvp); 934 935 if (flags & CNP_DOWHITEOUT) { 936 /* 937 * Whiteout entry: set d_ino to WINO. 938 */ 939 if ((error = 940 ffs_blkatoff(dvp, (off_t)dp->i_offset, (char **)&ep, &bp)) != 0) 941 return (error); 942 ep->d_ino = WINO; 943 ep->d_type = DT_WHT; 944 goto out; 945 } 946 947 if ((error = ffs_blkatoff(dvp, 948 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) 949 return (error); 950 #ifdef UFS_DIRHASH 951 /* 952 * Remove the dirhash entry. This is complicated by the fact 953 * that `ep' is the previous entry when dp->i_count != 0. 954 */ 955 if (dp->i_dirhash != NULL) 956 ufsdirhash_remove(dp, (dp->i_count == 0) ? ep : 957 (struct direct *)((char *)ep + ep->d_reclen), dp->i_offset); 958 #endif 959 if (dp->i_count == 0) { 960 /* 961 * First entry in block: set d_ino to zero. 962 */ 963 ep->d_ino = 0; 964 } else { 965 /* 966 * Collapse new free space into previous entry. 967 */ 968 ep->d_reclen += dp->i_reclen; 969 } 970 #ifdef UFS_DIRHASH 971 if (dp->i_dirhash != NULL) 972 ufsdirhash_checkblock(dp, (char *)ep - 973 ((dp->i_offset - dp->i_count) & (DIRBLKSIZ - 1)), 974 dp->i_offset & ~(DIRBLKSIZ - 1)); 975 #endif 976 out: 977 if (DOINGSOFTDEP(dvp)) { 978 if (ip) { 979 ip->i_effnlink--; 980 softdep_change_linkcnt(ip); 981 softdep_setup_remove(bp, dp, ip, isrmdir); 982 } 983 if (softdep_slowdown(dvp)) { 984 error = bwrite(bp); 985 } else { 986 bdwrite(bp); 987 error = 0; 988 } 989 } else { 990 if (ip) { 991 ip->i_effnlink--; 992 ip->i_nlink--; 993 ip->i_flag |= IN_CHANGE; 994 } 995 if (flags & CNP_DOWHITEOUT) 996 error = bwrite(bp); 997 else if (DOINGASYNC(dvp) && dp->i_count != 0) { 998 bdwrite(bp); 999 error = 0; 1000 } else 1001 error = bowrite(bp); 1002 } 1003 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1004 return (error); 1005 } 1006 1007 /* 1008 * Rewrite an existing directory entry to point at the inode 1009 * supplied. The parameters describing the directory entry are 1010 * set up by a call to namei. 1011 */ 1012 int 1013 ufs_dirrewrite(struct inode *dp, struct inode *oip, ino_t newinum, int newtype, 1014 int isrmdir) 1015 { 1016 struct buf *bp; 1017 struct direct *ep; 1018 struct vnode *vdp = ITOV(dp); 1019 int error; 1020 1021 error = ffs_blkatoff(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); 1022 if (error) 1023 return (error); 1024 ep->d_ino = newinum; 1025 if (!OFSFMT(vdp)) 1026 ep->d_type = newtype; 1027 oip->i_effnlink--; 1028 if (DOINGSOFTDEP(vdp)) { 1029 softdep_change_linkcnt(oip); 1030 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); 1031 bdwrite(bp); 1032 } else { 1033 oip->i_nlink--; 1034 oip->i_flag |= IN_CHANGE; 1035 if (DOINGASYNC(vdp)) { 1036 bdwrite(bp); 1037 error = 0; 1038 } else { 1039 error = bowrite(bp); 1040 } 1041 } 1042 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1043 return (error); 1044 } 1045 1046 /* 1047 * Check if a directory is empty or not. 1048 * Inode supplied must be locked. 1049 * 1050 * Using a struct dirtemplate here is not precisely 1051 * what we want, but better than using a struct direct. 1052 * 1053 * NB: does not handle corrupted directories. 1054 */ 1055 int 1056 ufs_dirempty(struct inode *ip, ino_t parentino, struct ucred *cred) 1057 { 1058 off_t off; 1059 struct dirtemplate dbuf; 1060 struct direct *dp = (struct direct *)&dbuf; 1061 int error, count, namlen; 1062 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1063 1064 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1065 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 1066 UIO_SYSSPACE, IO_NODELOCKED, cred, &count); 1067 /* 1068 * Since we read MINDIRSIZ, residual must 1069 * be 0 unless we're at end of file. 1070 */ 1071 if (error || count != 0) 1072 return (0); 1073 /* avoid infinite loops */ 1074 if (dp->d_reclen == 0) 1075 return (0); 1076 /* skip empty entries */ 1077 if (dp->d_ino == 0 || dp->d_ino == WINO) 1078 continue; 1079 /* accept only "." and ".." */ 1080 # if (BYTE_ORDER == LITTLE_ENDIAN) 1081 if (OFSFMT(ITOV(ip))) 1082 namlen = dp->d_type; 1083 else 1084 namlen = dp->d_namlen; 1085 # else 1086 namlen = dp->d_namlen; 1087 # endif 1088 if (namlen > 2) 1089 return (0); 1090 if (dp->d_name[0] != '.') 1091 return (0); 1092 /* 1093 * At this point namlen must be 1 or 2. 1094 * 1 implies ".", 2 implies ".." if second 1095 * char is also "." 1096 */ 1097 if (namlen == 1 && dp->d_ino == ip->i_number) 1098 continue; 1099 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1100 continue; 1101 return (0); 1102 } 1103 return (1); 1104 } 1105 1106 /* 1107 * Check if source directory is in the path of the target directory. 1108 * Target is supplied locked, source is unlocked. 1109 * The target is always vput before returning. 1110 */ 1111 int 1112 ufs_checkpath(struct inode *source, struct inode *target, struct ucred *cred) 1113 { 1114 struct vnode *vp; 1115 int error, rootino, namlen; 1116 struct dirtemplate dirbuf; 1117 1118 vp = ITOV(target); 1119 if (target->i_number == source->i_number) { 1120 error = EEXIST; 1121 goto out; 1122 } 1123 rootino = ROOTINO; 1124 error = 0; 1125 if (target->i_number == rootino) 1126 goto out; 1127 1128 for (;;) { 1129 if (vp->v_type != VDIR) { 1130 error = ENOTDIR; 1131 break; 1132 } 1133 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1134 sizeof (struct dirtemplate), (off_t)0, 1135 UIO_SYSSPACE, IO_NODELOCKED, cred, NULL); 1136 if (error != 0) 1137 break; 1138 # if (BYTE_ORDER == LITTLE_ENDIAN) 1139 if (OFSFMT(vp)) 1140 namlen = dirbuf.dotdot_type; 1141 else 1142 namlen = dirbuf.dotdot_namlen; 1143 # else 1144 namlen = dirbuf.dotdot_namlen; 1145 # endif 1146 if (namlen != 2 || 1147 dirbuf.dotdot_name[0] != '.' || 1148 dirbuf.dotdot_name[1] != '.') { 1149 error = ENOTDIR; 1150 break; 1151 } 1152 if (dirbuf.dotdot_ino == source->i_number) { 1153 error = EINVAL; 1154 break; 1155 } 1156 if (dirbuf.dotdot_ino == rootino) 1157 break; 1158 vput(vp); 1159 error = VFS_VGET(vp->v_mount, dirbuf.dotdot_ino, &vp); 1160 if (error) { 1161 vp = NULL; 1162 break; 1163 } 1164 } 1165 1166 out: 1167 if (error == ENOTDIR) 1168 kprintf("checkpath: .. not a directory\n"); 1169 if (vp != NULL) 1170 vput(vp); 1171 return (error); 1172 } 1173