1 /* $NetBSD: ufs_lookup.c,v 1.39 2002/05/12 23:06:30 matt Exp $ */ 2 3 /* 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. All advertising materials mentioning features or use of this software 21 * must display the following acknowledgement: 22 * This product includes software developed by the University of 23 * California, Berkeley and its contributors. 24 * 4. Neither the name of the University nor the names of its contributors 25 * may be used to endorse or promote products derived from this software 26 * without specific prior written permission. 27 * 28 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 29 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 30 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 31 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 32 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 33 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 34 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 35 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 36 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 37 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 38 * SUCH DAMAGE. 39 * 40 * @(#)ufs_lookup.c 8.9 (Berkeley) 8/11/94 41 */ 42 43 #include <sys/cdefs.h> 44 __KERNEL_RCSID(0, "$NetBSD: ufs_lookup.c,v 1.39 2002/05/12 23:06:30 matt Exp $"); 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/namei.h> 49 #include <sys/buf.h> 50 #include <sys/file.h> 51 #include <sys/stat.h> 52 #include <sys/mount.h> 53 #include <sys/vnode.h> 54 #include <sys/kernel.h> 55 56 #include <ufs/ufs/inode.h> 57 #include <ufs/ufs/dir.h> 58 #include <ufs/ufs/ufsmount.h> 59 #include <ufs/ufs/ufs_extern.h> 60 #include <ufs/ufs/ufs_bswap.h> 61 62 #ifdef DIAGNOSTIC 63 int dirchk = 1; 64 #else 65 int dirchk = 0; 66 #endif 67 68 #define FSFMT(vp) ((vp)->v_mount->mnt_maxsymlinklen <= 0) 69 70 /* 71 * Convert a component of a pathname into a pointer to a locked inode. 72 * This is a very central and rather complicated routine. 73 * If the file system is not maintained in a strict tree hierarchy, 74 * this can result in a deadlock situation (see comments in code below). 75 * 76 * The cnp->cn_nameiop argument is LOOKUP, CREATE, RENAME, or DELETE depending 77 * on whether the name is to be looked up, created, renamed, or deleted. 78 * When CREATE, RENAME, or DELETE is specified, information usable in 79 * creating, renaming, or deleting a directory entry may be calculated. 80 * If flag has LOCKPARENT or'ed into it and the target of the pathname 81 * exists, lookup returns both the target and its parent directory locked. 82 * When creating or renaming and LOCKPARENT is specified, the target may 83 * not be ".". When deleting and LOCKPARENT is specified, the target may 84 * be "."., but the caller must check to ensure it does an vrele and vput 85 * instead of two vputs. 86 * 87 * Overall outline of ufs_lookup: 88 * 89 * check accessibility of directory 90 * look for name in cache, if found, then if at end of path 91 * and deleting or creating, drop it, else return name 92 * search for name in directory, to found or notfound 93 * notfound: 94 * if creating, return locked directory, leaving info on available slots 95 * else return error 96 * found: 97 * if at end of path and deleting, return information to allow delete 98 * if at end of path and rewriting (RENAME and LOCKPARENT), lock target 99 * inode and return info to allow rewrite 100 * if not at end, add name to cache; if at end and neither creating 101 * nor deleting, add name to cache 102 */ 103 int 104 ufs_lookup(v) 105 void *v; 106 { 107 struct vop_lookup_args /* { 108 struct vnode *a_dvp; 109 struct vnode **a_vpp; 110 struct componentname *a_cnp; 111 } */ *ap = v; 112 struct vnode *vdp; /* vnode for directory being searched */ 113 struct inode *dp; /* inode for directory being searched */ 114 struct buf *bp; /* a buffer of directory entries */ 115 struct direct *ep; /* the current directory entry */ 116 int entryoffsetinblock; /* offset of ep in bp's buffer */ 117 enum {NONE, COMPACT, FOUND} slotstatus; 118 doff_t slotoffset; /* offset of area with free space */ 119 int slotsize; /* size of area at slotoffset */ 120 int slotfreespace; /* amount of space free in slot */ 121 int slotneeded; /* size of the entry we're seeking */ 122 int numdirpasses; /* strategy for directory search */ 123 doff_t endsearch; /* offset to end directory search */ 124 doff_t prevoff; /* prev entry dp->i_offset */ 125 struct vnode *pdp; /* saved dp during symlink work */ 126 struct vnode *tdp; /* returned by VFS_VGET */ 127 doff_t enduseful; /* pointer past last used dir slot */ 128 u_long bmask; /* block offset mask */ 129 int lockparent; /* 1 => lockparent flag is set */ 130 int wantparent; /* 1 => wantparent or lockparent flag */ 131 int namlen, error; 132 struct vnode **vpp = ap->a_vpp; 133 struct componentname *cnp = ap->a_cnp; 134 struct ucred *cred = cnp->cn_cred; 135 int flags; 136 int nameiop = cnp->cn_nameiop; 137 const int needswap = UFS_MPNEEDSWAP(ap->a_dvp->v_mount); 138 139 cnp->cn_flags &= ~PDIRUNLOCK; 140 flags = cnp->cn_flags; 141 142 bp = NULL; 143 slotoffset = -1; 144 *vpp = NULL; 145 vdp = ap->a_dvp; 146 dp = VTOI(vdp); 147 lockparent = flags & LOCKPARENT; 148 wantparent = flags & (LOCKPARENT|WANTPARENT); 149 150 151 /* 152 * Check accessiblity of directory. 153 */ 154 if ((error = VOP_ACCESS(vdp, VEXEC, cred, cnp->cn_proc)) != 0) 155 return (error); 156 157 if ((flags & ISLASTCN) && (vdp->v_mount->mnt_flag & MNT_RDONLY) && 158 (cnp->cn_nameiop == DELETE || cnp->cn_nameiop == RENAME)) 159 return (EROFS); 160 161 /* 162 * We now have a segment name to search for, and a directory to search. 163 * 164 * Before tediously performing a linear scan of the directory, 165 * check the name cache to see if the directory/name pair 166 * we are looking for is known already. 167 */ 168 if ((error = cache_lookup(vdp, vpp, cnp)) >= 0) 169 return (error); 170 171 /* 172 * Suppress search for slots unless creating 173 * file and at end of pathname, in which case 174 * we watch for a place to put the new file in 175 * case it doesn't already exist. 176 */ 177 slotstatus = FOUND; 178 slotfreespace = slotsize = slotneeded = 0; 179 if ((nameiop == CREATE || nameiop == RENAME) && 180 (flags & ISLASTCN)) { 181 slotstatus = NONE; 182 slotneeded = (sizeof(struct direct) - MAXNAMLEN + 183 cnp->cn_namelen + 3) &~ 3; 184 } 185 186 /* 187 * If there is cached information on a previous search of 188 * this directory, pick up where we last left off. 189 * We cache only lookups as these are the most common 190 * and have the greatest payoff. Caching CREATE has little 191 * benefit as it usually must search the entire directory 192 * to determine that the entry does not exist. Caching the 193 * location of the last DELETE or RENAME has not reduced 194 * profiling time and hence has been removed in the interest 195 * of simplicity. 196 */ 197 bmask = VFSTOUFS(vdp->v_mount)->um_mountp->mnt_stat.f_iosize - 1; 198 if (nameiop != LOOKUP || dp->i_diroff == 0 || 199 dp->i_diroff >= dp->i_ffs_size) { 200 entryoffsetinblock = 0; 201 dp->i_offset = 0; 202 numdirpasses = 1; 203 } else { 204 dp->i_offset = dp->i_diroff; 205 if ((entryoffsetinblock = dp->i_offset & bmask) && 206 (error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, &bp))) 207 return (error); 208 numdirpasses = 2; 209 nchstats.ncs_2passes++; 210 } 211 prevoff = dp->i_offset; 212 endsearch = roundup(dp->i_ffs_size, DIRBLKSIZ); 213 enduseful = 0; 214 215 searchloop: 216 while (dp->i_offset < endsearch) { 217 /* 218 * If necessary, get the next directory block. 219 */ 220 if ((dp->i_offset & bmask) == 0) { 221 if (bp != NULL) 222 brelse(bp); 223 error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, NULL, 224 &bp); 225 if (error) 226 return (error); 227 entryoffsetinblock = 0; 228 } 229 /* 230 * If still looking for a slot, and at a DIRBLKSIZE 231 * boundary, have to start looking for free space again. 232 */ 233 if (slotstatus == NONE && 234 (entryoffsetinblock & (DIRBLKSIZ - 1)) == 0) { 235 slotoffset = -1; 236 slotfreespace = 0; 237 } 238 /* 239 * Get pointer to next entry. 240 * Full validation checks are slow, so we only check 241 * enough to insure forward progress through the 242 * directory. Complete checks can be run by patching 243 * "dirchk" to be true. 244 */ 245 ep = (struct direct *)((char *)bp->b_data + entryoffsetinblock); 246 if (ep->d_reclen == 0 || 247 (dirchk && ufs_dirbadentry(vdp, ep, entryoffsetinblock))) { 248 int i; 249 250 ufs_dirbad(dp, dp->i_offset, "mangled entry"); 251 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 252 dp->i_offset += i; 253 entryoffsetinblock += i; 254 continue; 255 } 256 257 /* 258 * If an appropriate sized slot has not yet been found, 259 * check to see if one is available. Also accumulate space 260 * in the current block so that we can determine if 261 * compaction is viable. 262 */ 263 if (slotstatus != FOUND) { 264 int size = ufs_rw16(ep->d_reclen, needswap); 265 266 if (ep->d_ino != 0) 267 size -= DIRSIZ(FSFMT(vdp), ep, needswap); 268 if (size > 0) { 269 if (size >= slotneeded) { 270 slotstatus = FOUND; 271 slotoffset = dp->i_offset; 272 slotsize = ufs_rw16(ep->d_reclen, 273 needswap); 274 } else if (slotstatus == NONE) { 275 slotfreespace += size; 276 if (slotoffset == -1) 277 slotoffset = dp->i_offset; 278 if (slotfreespace >= slotneeded) { 279 slotstatus = COMPACT; 280 slotsize = dp->i_offset + 281 ufs_rw16(ep->d_reclen, 282 needswap) 283 - slotoffset; 284 } 285 } 286 } 287 } 288 289 /* 290 * Check for a name match. 291 */ 292 if (ep->d_ino) { 293 #if (BYTE_ORDER == LITTLE_ENDIAN) 294 if (vdp->v_mount->mnt_maxsymlinklen > 0 || 295 needswap != 0) 296 namlen = ep->d_namlen; 297 else 298 namlen = ep->d_type; 299 #else 300 if (vdp->v_mount->mnt_maxsymlinklen <= 0 301 && needswap != 0) 302 namlen = ep->d_type; 303 else 304 namlen = ep->d_namlen; 305 #endif 306 if (namlen == cnp->cn_namelen && 307 !memcmp(cnp->cn_nameptr, ep->d_name, 308 (unsigned)namlen)) { 309 /* 310 * Save directory entry's inode number and 311 * reclen in ndp->ni_ufs area, and release 312 * directory buffer. 313 */ 314 if (vdp->v_mount->mnt_maxsymlinklen > 0 && 315 ep->d_type == DT_WHT) { 316 slotstatus = FOUND; 317 slotoffset = dp->i_offset; 318 slotsize = ufs_rw16(ep->d_reclen, 319 needswap); 320 dp->i_reclen = slotsize; 321 /* 322 * This is used to set dp->i_endoff, 323 * which may be used by ufs_direnter2() 324 * as a length to truncate the 325 * directory to. Therefore, it must 326 * point past the end of the last 327 * non-empty directory entry. We don't 328 * know where that is in this case, so 329 * we effectively disable shrinking by 330 * using the existing size of the 331 * directory. 332 * 333 * Note that we wouldn't expect to 334 * shrink the directory while rewriting 335 * an existing entry anyway. 336 */ 337 enduseful = endsearch; 338 ap->a_cnp->cn_flags |= ISWHITEOUT; 339 numdirpasses--; 340 goto notfound; 341 } 342 dp->i_ino = ufs_rw32(ep->d_ino, needswap); 343 dp->i_reclen = ufs_rw16(ep->d_reclen, needswap); 344 goto found; 345 } 346 } 347 prevoff = dp->i_offset; 348 dp->i_offset += ufs_rw16(ep->d_reclen, needswap); 349 entryoffsetinblock += ufs_rw16(ep->d_reclen, needswap); 350 if (ep->d_ino) 351 enduseful = dp->i_offset; 352 } 353 notfound: 354 /* 355 * If we started in the middle of the directory and failed 356 * to find our target, we must check the beginning as well. 357 */ 358 if (numdirpasses == 2) { 359 numdirpasses--; 360 dp->i_offset = 0; 361 endsearch = dp->i_diroff; 362 goto searchloop; 363 } 364 if (bp != NULL) 365 brelse(bp); 366 /* 367 * If creating, and at end of pathname and current 368 * directory has not been removed, then can consider 369 * allowing file to be created. 370 */ 371 if ((nameiop == CREATE || nameiop == RENAME || 372 (nameiop == DELETE && 373 (ap->a_cnp->cn_flags & DOWHITEOUT) && 374 (ap->a_cnp->cn_flags & ISWHITEOUT))) && 375 (flags & ISLASTCN) && dp->i_ffs_effnlink != 0) { 376 /* 377 * Access for write is interpreted as allowing 378 * creation of files in the directory. 379 */ 380 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 381 if (error) 382 return (error); 383 /* 384 * Return an indication of where the new directory 385 * entry should be put. If we didn't find a slot, 386 * then set dp->i_count to 0 indicating 387 * that the new slot belongs at the end of the 388 * directory. If we found a slot, then the new entry 389 * can be put in the range from dp->i_offset to 390 * dp->i_offset + dp->i_count. 391 */ 392 if (slotstatus == NONE) { 393 dp->i_offset = roundup(dp->i_ffs_size, DIRBLKSIZ); 394 dp->i_count = 0; 395 enduseful = dp->i_offset; 396 } else if (nameiop == DELETE) { 397 dp->i_offset = slotoffset; 398 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 399 dp->i_count = 0; 400 else 401 dp->i_count = dp->i_offset - prevoff; 402 } else { 403 dp->i_offset = slotoffset; 404 dp->i_count = slotsize; 405 if (enduseful < slotoffset + slotsize) 406 enduseful = slotoffset + slotsize; 407 } 408 dp->i_endoff = roundup(enduseful, DIRBLKSIZ); 409 dp->i_flag |= IN_CHANGE | IN_UPDATE; 410 /* 411 * We return with the directory locked, so that 412 * the parameters we set up above will still be 413 * valid if we actually decide to do a direnter(). 414 * We return ni_vp == NULL to indicate that the entry 415 * does not currently exist; we leave a pointer to 416 * the (locked) directory inode in ndp->ni_dvp. 417 * The pathname buffer is saved so that the name 418 * can be obtained later. 419 * 420 * NB - if the directory is unlocked, then this 421 * information cannot be used. 422 */ 423 cnp->cn_flags |= SAVENAME; 424 if (!lockparent) { 425 VOP_UNLOCK(vdp, 0); 426 cnp->cn_flags |= PDIRUNLOCK; 427 } 428 return (EJUSTRETURN); 429 } 430 /* 431 * Insert name into cache (as non-existent) if appropriate. 432 */ 433 if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) 434 cache_enter(vdp, *vpp, cnp); 435 return (ENOENT); 436 437 found: 438 if (numdirpasses == 2) 439 nchstats.ncs_pass2++; 440 /* 441 * Check that directory length properly reflects presence 442 * of this entry. 443 */ 444 if (dp->i_offset + DIRSIZ(FSFMT(vdp), ep, needswap) > 445 dp->i_ffs_size) { 446 ufs_dirbad(dp, dp->i_offset, "i_size too small"); 447 dp->i_ffs_size = dp->i_offset + 448 DIRSIZ(FSFMT(vdp), ep, needswap); 449 dp->i_flag |= IN_CHANGE | IN_UPDATE; 450 } 451 brelse(bp); 452 453 /* 454 * Found component in pathname. 455 * If the final component of path name, save information 456 * in the cache as to where the entry was found. 457 */ 458 if ((flags & ISLASTCN) && nameiop == LOOKUP) 459 dp->i_diroff = dp->i_offset &~ (DIRBLKSIZ - 1); 460 461 /* 462 * If deleting, and at end of pathname, return 463 * parameters which can be used to remove file. 464 * If the wantparent flag isn't set, we return only 465 * the directory (in ndp->ni_dvp), otherwise we go 466 * on and lock the inode, being careful with ".". 467 */ 468 if (nameiop == DELETE && (flags & ISLASTCN)) { 469 /* 470 * Write access to directory required to delete files. 471 */ 472 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 473 if (error) 474 return (error); 475 /* 476 * Return pointer to current entry in dp->i_offset, 477 * and distance past previous entry (if there 478 * is a previous entry in this block) in dp->i_count. 479 * Save directory inode pointer in ndp->ni_dvp for dirremove(). 480 */ 481 if ((dp->i_offset & (DIRBLKSIZ - 1)) == 0) 482 dp->i_count = 0; 483 else 484 dp->i_count = dp->i_offset - prevoff; 485 if (dp->i_number == dp->i_ino) { 486 VREF(vdp); 487 *vpp = vdp; 488 return (0); 489 } 490 if (flags & ISDOTDOT) 491 VOP_UNLOCK(vdp, 0); /* race to get the inode */ 492 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 493 if (flags & ISDOTDOT) 494 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 495 if (error) 496 return (error); 497 /* 498 * If directory is "sticky", then user must own 499 * the directory, or the file in it, else she 500 * may not delete it (unless she's root). This 501 * implements append-only directories. 502 */ 503 if ((dp->i_ffs_mode & ISVTX) && 504 cred->cr_uid != 0 && 505 cred->cr_uid != dp->i_ffs_uid && 506 VTOI(tdp)->i_ffs_uid != cred->cr_uid) { 507 vput(tdp); 508 return (EPERM); 509 } 510 *vpp = tdp; 511 if (!lockparent) { 512 VOP_UNLOCK(vdp, 0); 513 cnp->cn_flags |= PDIRUNLOCK; 514 } 515 return (0); 516 } 517 518 /* 519 * If rewriting (RENAME), return the inode and the 520 * information required to rewrite the present directory 521 * Must get inode of directory entry to verify it's a 522 * regular file, or empty directory. 523 */ 524 if (nameiop == RENAME && wantparent && (flags & ISLASTCN)) { 525 error = VOP_ACCESS(vdp, VWRITE, cred, cnp->cn_proc); 526 if (error) 527 return (error); 528 /* 529 * Careful about locking second inode. 530 * This can only occur if the target is ".". 531 */ 532 if (dp->i_number == dp->i_ino) 533 return (EISDIR); 534 if (flags & ISDOTDOT) 535 VOP_UNLOCK(vdp, 0); /* race to get the inode */ 536 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 537 if (flags & ISDOTDOT) 538 vn_lock(vdp, LK_EXCLUSIVE | LK_RETRY); 539 if (error) 540 return (error); 541 *vpp = tdp; 542 cnp->cn_flags |= SAVENAME; 543 if (!lockparent) { 544 VOP_UNLOCK(vdp, 0); 545 cnp->cn_flags |= PDIRUNLOCK; 546 } 547 return (0); 548 } 549 550 /* 551 * Step through the translation in the name. We do not `vput' the 552 * directory because we may need it again if a symbolic link 553 * is relative to the current directory. Instead we save it 554 * unlocked as "pdp". We must get the target inode before unlocking 555 * the directory to insure that the inode will not be removed 556 * before we get it. We prevent deadlock by always fetching 557 * inodes from the root, moving down the directory tree. Thus 558 * when following backward pointers ".." we must unlock the 559 * parent directory before getting the requested directory. 560 * There is a potential race condition here if both the current 561 * and parent directories are removed before the VFS_VGET for the 562 * inode associated with ".." returns. We hope that this occurs 563 * infrequently since we cannot avoid this race condition without 564 * implementing a sophisticated deadlock detection algorithm. 565 * Note also that this simple deadlock detection scheme will not 566 * work if the file system has any hard links other than ".." 567 * that point backwards in the directory structure. 568 */ 569 pdp = vdp; 570 if (flags & ISDOTDOT) { 571 VOP_UNLOCK(pdp, 0); /* race to get the inode */ 572 cnp->cn_flags |= PDIRUNLOCK; 573 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 574 if (error) { 575 if (vn_lock(pdp, LK_EXCLUSIVE | LK_RETRY) == 0) 576 cnp->cn_flags &= ~PDIRUNLOCK; 577 return (error); 578 } 579 if (lockparent && (flags & ISLASTCN)) { 580 if ((error = vn_lock(pdp, LK_EXCLUSIVE))) { 581 vput(tdp); 582 return (error); 583 } 584 cnp->cn_flags &= ~PDIRUNLOCK; 585 } 586 *vpp = tdp; 587 } else if (dp->i_number == dp->i_ino) { 588 VREF(vdp); /* we want ourself, ie "." */ 589 *vpp = vdp; 590 } else { 591 error = VFS_VGET(vdp->v_mount, dp->i_ino, &tdp); 592 if (error) 593 return (error); 594 if (!lockparent || !(flags & ISLASTCN)) { 595 VOP_UNLOCK(pdp, 0); 596 cnp->cn_flags |= PDIRUNLOCK; 597 } 598 *vpp = tdp; 599 } 600 601 /* 602 * Insert name into cache if appropriate. 603 */ 604 if (cnp->cn_flags & MAKEENTRY) 605 cache_enter(vdp, *vpp, cnp); 606 return (0); 607 } 608 609 void 610 ufs_dirbad(ip, offset, how) 611 struct inode *ip; 612 doff_t offset; 613 char *how; 614 { 615 struct mount *mp; 616 617 mp = ITOV(ip)->v_mount; 618 printf("%s: bad dir ino %d at offset %d: %s\n", 619 mp->mnt_stat.f_mntonname, ip->i_number, offset, how); 620 if ((mp->mnt_stat.f_flags & MNT_RDONLY) == 0) 621 panic("bad dir"); 622 } 623 624 /* 625 * Do consistency checking on a directory entry: 626 * record length must be multiple of 4 627 * entry must fit in rest of its DIRBLKSIZ block 628 * record must be large enough to contain entry 629 * name is not longer than MAXNAMLEN 630 * name must be as long as advertised, and null terminated 631 */ 632 int 633 ufs_dirbadentry(dp, ep, entryoffsetinblock) 634 struct vnode *dp; 635 struct direct *ep; 636 int entryoffsetinblock; 637 { 638 int i; 639 int namlen; 640 const int needswap = UFS_MPNEEDSWAP(dp->v_mount); 641 642 #if (BYTE_ORDER == LITTLE_ENDIAN) 643 if (dp->v_mount->mnt_maxsymlinklen > 0 || needswap != 0) 644 namlen = ep->d_namlen; 645 else 646 namlen = ep->d_type; 647 #else 648 if (dp->v_mount->mnt_maxsymlinklen <= 0 && needswap != 0) 649 namlen = ep->d_type; 650 else 651 namlen = ep->d_namlen; 652 #endif 653 if ((ufs_rw16(ep->d_reclen, needswap) & 0x3) != 0 || 654 ufs_rw16(ep->d_reclen, needswap) > 655 DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 656 ufs_rw16(ep->d_reclen, needswap) < 657 DIRSIZ(FSFMT(dp), ep, needswap) || 658 namlen > MAXNAMLEN) { 659 /*return (1); */ 660 printf("First bad, reclen=%x, DIRSIZ=%lu, namlen=%d, flags=%x " 661 "entryoffsetinblock=%d\n", 662 ufs_rw16(ep->d_reclen, needswap), 663 (u_long)DIRSIZ(FSFMT(dp), ep, needswap), 664 namlen, dp->v_mount->mnt_flag, entryoffsetinblock); 665 goto bad; 666 } 667 if (ep->d_ino == 0) 668 return (0); 669 for (i = 0; i < namlen; i++) 670 if (ep->d_name[i] == '\0') { 671 /*return (1); */ 672 printf("Second bad\n"); 673 goto bad; 674 } 675 if (ep->d_name[i]) 676 goto bad; 677 return (0); 678 bad: 679 return (1); 680 } 681 682 /* 683 * Construct a new directory entry after a call to namei, using the 684 * parameters that it left in the componentname argument cnp. The 685 * argument ip is the inode to which the new directory entry will refer. 686 */ 687 void 688 ufs_makedirentry(ip, cnp, newdirp) 689 struct inode *ip; 690 struct componentname *cnp; 691 struct direct *newdirp; 692 { 693 #ifdef DIAGNOSTIC 694 if ((cnp->cn_flags & SAVENAME) == 0) 695 panic("makedirentry: missing name"); 696 #endif 697 newdirp->d_ino = ip->i_number; 698 newdirp->d_namlen = cnp->cn_namelen; 699 memcpy(newdirp->d_name, cnp->cn_nameptr, (unsigned)cnp->cn_namelen + 1); 700 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0) 701 newdirp->d_type = IFTODT(ip->i_ffs_mode); 702 else { 703 newdirp->d_type = 0; 704 } 705 } 706 707 /* 708 * Write a directory entry after a call to namei, using the parameters 709 * that it left in nameidata. The argument dirp is the new directory 710 * entry contents. Dvp is a pointer to the directory to be written, 711 * which was left locked by namei. Remaining parameters (dp->i_offset, 712 * dp->i_count) indicate how the space for the new entry is to be obtained. 713 * Non-null bp indicates that a directory is being created (for the 714 * soft dependency code). 715 */ 716 int 717 ufs_direnter(dvp, tvp, dirp, cnp, newdirbp) 718 struct vnode *dvp; 719 struct vnode *tvp; 720 struct direct *dirp; 721 struct componentname *cnp; 722 struct buf *newdirbp; 723 { 724 struct ucred *cr; 725 struct proc *p; 726 int newentrysize; 727 struct inode *dp; 728 struct buf *bp; 729 u_int dsize; 730 struct direct *ep, *nep; 731 int error, ret, blkoff, loc, spacefree, flags; 732 char *dirbuf; 733 struct timespec ts; 734 const int needswap = UFS_MPNEEDSWAP(dvp->v_mount); 735 736 error = 0; 737 cr = cnp->cn_cred; 738 p = cnp->cn_proc; 739 740 dp = VTOI(dvp); 741 newentrysize = DIRSIZ(0, dirp, 0); 742 743 if (dp->i_count == 0) { 744 /* 745 * If dp->i_count is 0, then namei could find no 746 * space in the directory. Here, dp->i_offset will 747 * be on a directory block boundary and we will write the 748 * new entry into a fresh block. 749 */ 750 if (dp->i_offset & (DIRBLKSIZ - 1)) 751 panic("ufs_direnter: newblk"); 752 flags = B_CLRBUF; 753 if (!DOINGSOFTDEP(dvp)) 754 flags |= B_SYNC; 755 if ((error = VOP_BALLOC(dvp, (off_t)dp->i_offset, DIRBLKSIZ, 756 cr, flags, &bp)) != 0) { 757 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 758 bdwrite(newdirbp); 759 return (error); 760 } 761 dp->i_ffs_size = dp->i_offset + DIRBLKSIZ; 762 dp->i_flag |= IN_CHANGE | IN_UPDATE; 763 uvm_vnp_setsize(dvp, dp->i_ffs_size); 764 dirp->d_reclen = ufs_rw16(DIRBLKSIZ, needswap); 765 dirp->d_ino = ufs_rw32(dirp->d_ino, needswap); 766 if (dvp->v_mount->mnt_maxsymlinklen <= 0) { 767 #if (BYTE_ORDER == LITTLE_ENDIAN) 768 if (needswap == 0) { 769 #else 770 if (needswap != 0) { 771 #endif 772 u_char tmp = dirp->d_namlen; 773 dirp->d_namlen = dirp->d_type; 774 dirp->d_type = tmp; 775 } 776 } 777 blkoff = dp->i_offset & 778 (VFSTOUFS(dvp->v_mount)->um_mountp->mnt_stat.f_iosize - 1); 779 memcpy((caddr_t)bp->b_data + blkoff, (caddr_t)dirp, 780 newentrysize); 781 if (DOINGSOFTDEP(dvp)) { 782 /* 783 * Ensure that the entire newly allocated block is a 784 * valid directory so that future growth within the 785 * block does not have to ensure that the block is 786 * written before the inode. 787 */ 788 blkoff += DIRBLKSIZ; 789 while (blkoff < bp->b_bcount) { 790 ((struct direct *) 791 (bp->b_data + blkoff))->d_reclen = DIRBLKSIZ; 792 blkoff += DIRBLKSIZ; 793 } 794 if (softdep_setup_directory_add(bp, dp, dp->i_offset, 795 ufs_rw32(dirp->d_ino, needswap), newdirbp, 1) == 0) { 796 bdwrite(bp); 797 TIMEVAL_TO_TIMESPEC(&time, &ts); 798 return VOP_UPDATE(dvp, &ts, &ts, UPDATE_DIROP); 799 } 800 /* We have just allocated a directory block in an 801 * indirect block. Rather than tracking when it gets 802 * claimed by the inode, we simply do a VOP_FSYNC 803 * now to ensure that it is there (in case the user 804 * does a future fsync). Note that we have to unlock 805 * the inode for the entry that we just entered, as 806 * the VOP_FSYNC may need to lock other inodes which 807 * can lead to deadlock if we also hold a lock on 808 * the newly entered node. 809 */ 810 error = VOP_BWRITE(bp); 811 if (error != 0) 812 return (error); 813 if (tvp != NULL) 814 VOP_UNLOCK(tvp, 0); 815 error = VOP_FSYNC(dvp, p->p_ucred, FSYNC_WAIT, 0, 0, p); 816 if (tvp != 0) 817 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); 818 return (error); 819 } else { 820 error = VOP_BWRITE(bp); 821 } 822 TIMEVAL_TO_TIMESPEC(&time, &ts); 823 ret = VOP_UPDATE(dvp, &ts, &ts, UPDATE_DIROP); 824 if (error == 0) 825 return (ret); 826 return (error); 827 } 828 829 /* 830 * If dp->i_count is non-zero, then namei found space for the new 831 * entry in the range dp->i_offset to dp->i_offset + dp->i_count 832 * in the directory. To use this space, we may have to compact 833 * the entries located there, by copying them together towards the 834 * beginning of the block, leaving the free space in one usable 835 * chunk at the end. 836 */ 837 838 /* 839 * Increase size of directory if entry eats into new space. 840 * This should never push the size past a new multiple of 841 * DIRBLKSIZE. 842 * 843 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 844 */ 845 if (dp->i_offset + dp->i_count > dp->i_ffs_size) 846 dp->i_ffs_size = dp->i_offset + dp->i_count; 847 /* 848 * Get the block containing the space for the new directory entry. 849 */ 850 error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, &dirbuf, &bp); 851 if (error) { 852 if (DOINGSOFTDEP(dvp) && newdirbp != NULL) 853 bdwrite(newdirbp); 854 return (error); 855 } 856 /* 857 * Find space for the new entry. In the simple case, the entry at 858 * offset base will have the space. If it does not, then namei 859 * arranged that compacting the region dp->i_offset to 860 * dp->i_offset + dp->i_count would yield the space. 861 */ 862 ep = (struct direct *)dirbuf; 863 dsize = DIRSIZ(FSFMT(dvp), ep, needswap); 864 spacefree = ufs_rw16(ep->d_reclen, needswap) - dsize; 865 for (loc = ufs_rw16(ep->d_reclen, needswap); loc < dp->i_count; ) { 866 nep = (struct direct *)(dirbuf + loc); 867 if (ep->d_ino) { 868 /* trim the existing slot */ 869 ep->d_reclen = ufs_rw16(dsize, needswap); 870 ep = (struct direct *)((char *)ep + dsize); 871 } else { 872 /* overwrite; nothing there; header is ours */ 873 spacefree += dsize; 874 } 875 dsize = DIRSIZ(FSFMT(dvp), nep, needswap); 876 spacefree += ufs_rw16(nep->d_reclen, needswap) - dsize; 877 loc += ufs_rw16(nep->d_reclen, needswap); 878 if (DOINGSOFTDEP(dvp)) 879 softdep_change_directoryentry_offset(dp, dirbuf, 880 (caddr_t)nep, (caddr_t)ep, dsize); 881 else 882 memcpy((caddr_t)ep, (caddr_t)nep, dsize); 883 } 884 /* 885 * Update the pointer fields in the previous entry (if any), 886 * copy in the new entry, and write out the block. 887 */ 888 if (ep->d_ino == 0 || 889 (ufs_rw32(ep->d_ino, needswap) == WINO && 890 memcmp(ep->d_name, dirp->d_name, dirp->d_namlen) == 0)) { 891 if (spacefree + dsize < newentrysize) 892 panic("ufs_direnter: compact1"); 893 dirp->d_reclen = spacefree + dsize; 894 } else { 895 if (spacefree < newentrysize) 896 panic("ufs_direnter: compact2"); 897 dirp->d_reclen = spacefree; 898 ep->d_reclen = ufs_rw16(dsize, needswap); 899 ep = (struct direct *)((char *)ep + dsize); 900 } 901 dirp->d_reclen = ufs_rw16(dirp->d_reclen, needswap); 902 dirp->d_ino = ufs_rw32(dirp->d_ino, needswap); 903 if (dvp->v_mount->mnt_maxsymlinklen <= 0) { 904 #if (BYTE_ORDER == LITTLE_ENDIAN) 905 if (needswap == 0) { 906 #else 907 if (needswap != 0) { 908 #endif 909 u_char tmp = dirp->d_namlen; 910 dirp->d_namlen = dirp->d_type; 911 dirp->d_type = tmp; 912 } 913 } 914 memcpy((caddr_t)ep, (caddr_t)dirp, (u_int)newentrysize); 915 if (DOINGSOFTDEP(dvp)) { 916 softdep_setup_directory_add(bp, dp, 917 dp->i_offset + (caddr_t)ep - dirbuf, 918 ufs_rw32(dirp->d_ino, needswap), newdirbp, 0); 919 bdwrite(bp); 920 } else { 921 error = VOP_BWRITE(bp); 922 } 923 dp->i_flag |= IN_CHANGE | IN_UPDATE; 924 /* 925 * If all went well, and the directory can be shortened, proceed 926 * with the truncation. Note that we have to unlock the inode for 927 * the entry that we just entered, as the truncation may need to 928 * lock other inodes which can lead to deadlock if we also hold a 929 * lock on the newly entered node. 930 */ 931 if (error == 0 && dp->i_endoff && dp->i_endoff < dp->i_ffs_size) { 932 if (tvp != NULL) 933 VOP_UNLOCK(tvp, 0); 934 (void) VOP_TRUNCATE(dvp, (off_t)dp->i_endoff, IO_SYNC, cr, p); 935 if (tvp != NULL) 936 vn_lock(tvp, LK_EXCLUSIVE | LK_RETRY); 937 } 938 return (error); 939 } 940 941 /* 942 * Remove a directory entry after a call to namei, using 943 * the parameters which it left in nameidata. The entry 944 * dp->i_offset contains the offset into the directory of the 945 * entry to be eliminated. The dp->i_count field contains the 946 * size of the previous record in the directory. If this 947 * is 0, the first entry is being deleted, so we need only 948 * zero the inode number to mark the entry as free. If the 949 * entry is not the first in the directory, we must reclaim 950 * the space of the now empty record by adding the record size 951 * to the size of the previous entry. 952 */ 953 int 954 ufs_dirremove(dvp, ip, flags, isrmdir) 955 struct vnode *dvp; 956 struct inode *ip; 957 int flags; 958 int isrmdir; 959 { 960 struct inode *dp; 961 struct direct *ep; 962 struct buf *bp; 963 int error; 964 #ifdef FFS_EI 965 const int needswap = UFS_MPNEEDSWAP(dvp->v_mount); 966 #endif 967 968 dp = VTOI(dvp); 969 970 if (flags & DOWHITEOUT) { 971 /* 972 * Whiteout entry: set d_ino to WINO. 973 */ 974 error = VOP_BLKATOFF(dvp, (off_t)dp->i_offset, (char **)&ep, 975 &bp); 976 if (error) 977 return (error); 978 ep->d_ino = ufs_rw32(WINO, needswap); 979 ep->d_type = DT_WHT; 980 goto out; 981 } 982 983 if ((error = VOP_BLKATOFF(dvp, 984 (off_t)(dp->i_offset - dp->i_count), (char **)&ep, &bp)) != 0) 985 return (error); 986 987 if (dp->i_count == 0) { 988 /* 989 * First entry in block: set d_ino to zero. 990 */ 991 ep->d_ino = 0; 992 } else { 993 /* 994 * Collapse new free space into previous entry. 995 */ 996 ep->d_reclen = 997 ufs_rw16(ufs_rw16(ep->d_reclen, needswap) + dp->i_reclen, 998 needswap); 999 } 1000 out: 1001 if (DOINGSOFTDEP(dvp)) { 1002 if (ip) { 1003 ip->i_ffs_effnlink--; 1004 softdep_change_linkcnt(ip); 1005 softdep_setup_remove(bp, dp, ip, isrmdir); 1006 } 1007 bdwrite(bp); 1008 } else { 1009 if (ip) { 1010 ip->i_ffs_effnlink--; 1011 ip->i_ffs_nlink--; 1012 ip->i_flag |= IN_CHANGE; 1013 } 1014 error = VOP_BWRITE(bp); 1015 } 1016 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1017 return (error); 1018 } 1019 1020 /* 1021 * Rewrite an existing directory entry to point at the inode 1022 * supplied. The parameters describing the directory entry are 1023 * set up by a call to namei. 1024 */ 1025 int 1026 ufs_dirrewrite(dp, oip, newinum, newtype, isrmdir) 1027 struct inode *dp, *oip; 1028 ino_t newinum; 1029 int newtype; 1030 int isrmdir; 1031 { 1032 struct buf *bp; 1033 struct direct *ep; 1034 struct vnode *vdp = ITOV(dp); 1035 int error; 1036 1037 error = VOP_BLKATOFF(vdp, (off_t)dp->i_offset, (char **)&ep, &bp); 1038 if (error) 1039 return (error); 1040 ep->d_ino = ufs_rw32(newinum, UFS_MPNEEDSWAP(vdp->v_mount)); 1041 if (vdp->v_mount->mnt_maxsymlinklen > 0) 1042 ep->d_type = newtype; 1043 oip->i_ffs_effnlink--; 1044 if (DOINGSOFTDEP(vdp)) { 1045 softdep_change_linkcnt(oip); 1046 softdep_setup_directory_change(bp, dp, oip, newinum, isrmdir); 1047 bdwrite(bp); 1048 } else { 1049 oip->i_ffs_nlink--; 1050 oip->i_flag |= IN_CHANGE; 1051 error = VOP_BWRITE(bp); 1052 } 1053 dp->i_flag |= IN_CHANGE | IN_UPDATE; 1054 return (error); 1055 } 1056 1057 /* 1058 * Check if a directory is empty or not. 1059 * Inode supplied must be locked. 1060 * 1061 * Using a struct dirtemplate here is not precisely 1062 * what we want, but better than using a struct direct. 1063 * 1064 * NB: does not handle corrupted directories. 1065 */ 1066 int 1067 ufs_dirempty(ip, parentino, cred) 1068 struct inode *ip; 1069 ino_t parentino; 1070 struct ucred *cred; 1071 { 1072 off_t off; 1073 struct dirtemplate dbuf; 1074 struct direct *dp = (struct direct *)&dbuf; 1075 int error, namlen; 1076 size_t count; 1077 const int needswap = UFS_IPNEEDSWAP(ip); 1078 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1079 1080 for (off = 0; off < ip->i_ffs_size; 1081 off += ufs_rw16(dp->d_reclen, needswap)) { 1082 error = vn_rdwr(UIO_READ, ITOV(ip), (caddr_t)dp, MINDIRSIZ, off, 1083 UIO_SYSSPACE, IO_NODELOCKED, cred, &count, (struct proc *)0); 1084 /* 1085 * Since we read MINDIRSIZ, residual must 1086 * be 0 unless we're at end of file. 1087 */ 1088 if (error || count != 0) 1089 return (0); 1090 /* avoid infinite loops */ 1091 if (dp->d_reclen == 0) 1092 return (0); 1093 /* skip empty entries */ 1094 if (dp->d_ino == 0 || ufs_rw32(dp->d_ino, needswap) == WINO) 1095 continue; 1096 /* accept only "." and ".." */ 1097 #if (BYTE_ORDER == LITTLE_ENDIAN) 1098 if (ITOV(ip)->v_mount->mnt_maxsymlinklen > 0 || needswap != 0) 1099 namlen = dp->d_namlen; 1100 else 1101 namlen = dp->d_type; 1102 #else 1103 if (ITOV(ip)->v_mount->mnt_maxsymlinklen <= 0 && needswap != 0) 1104 namlen = dp->d_type; 1105 else 1106 namlen = dp->d_namlen; 1107 #endif 1108 if (namlen > 2) 1109 return (0); 1110 if (dp->d_name[0] != '.') 1111 return (0); 1112 /* 1113 * At this point namlen must be 1 or 2. 1114 * 1 implies ".", 2 implies ".." if second 1115 * char is also "." 1116 */ 1117 if (namlen == 1 && 1118 ufs_rw32(dp->d_ino, needswap) == ip->i_number) 1119 continue; 1120 if (dp->d_name[1] == '.' && 1121 ufs_rw32(dp->d_ino, needswap) == parentino) 1122 continue; 1123 return (0); 1124 } 1125 return (1); 1126 } 1127 1128 /* 1129 * Check if source directory is in the path of the target directory. 1130 * Target is supplied locked, source is unlocked. 1131 * The target is always vput before returning. 1132 */ 1133 int 1134 ufs_checkpath(source, target, cred) 1135 struct inode *source, *target; 1136 struct ucred *cred; 1137 { 1138 struct vnode *vp = ITOV(target); 1139 int error, rootino, namlen; 1140 struct dirtemplate dirbuf; 1141 const int needswap = UFS_MPNEEDSWAP(vp->v_mount); 1142 1143 vp = ITOV(target); 1144 if (target->i_number == source->i_number) { 1145 error = EEXIST; 1146 goto out; 1147 } 1148 rootino = ROOTINO; 1149 error = 0; 1150 if (target->i_number == rootino) 1151 goto out; 1152 1153 for (;;) { 1154 if (vp->v_type != VDIR) { 1155 error = ENOTDIR; 1156 break; 1157 } 1158 error = vn_rdwr(UIO_READ, vp, (caddr_t)&dirbuf, 1159 sizeof (struct dirtemplate), (off_t)0, UIO_SYSSPACE, 1160 IO_NODELOCKED, cred, NULL, (struct proc *)0); 1161 if (error != 0) 1162 break; 1163 #if (BYTE_ORDER == LITTLE_ENDIAN) 1164 if (vp->v_mount->mnt_maxsymlinklen > 0 || 1165 needswap != 0) 1166 namlen = dirbuf.dotdot_namlen; 1167 else 1168 namlen = dirbuf.dotdot_type; 1169 #else 1170 if (vp->v_mount->mnt_maxsymlinklen == 0 && 1171 needswap != 0) 1172 namlen = dirbuf.dotdot_type; 1173 else 1174 namlen = dirbuf.dotdot_namlen; 1175 #endif 1176 if (namlen != 2 || 1177 dirbuf.dotdot_name[0] != '.' || 1178 dirbuf.dotdot_name[1] != '.') { 1179 error = ENOTDIR; 1180 break; 1181 } 1182 if (ufs_rw32(dirbuf.dotdot_ino, needswap) == source->i_number) { 1183 error = EINVAL; 1184 break; 1185 } 1186 if (ufs_rw32(dirbuf.dotdot_ino, needswap) == rootino) 1187 break; 1188 vput(vp); 1189 error = VFS_VGET(vp->v_mount, 1190 ufs_rw32(dirbuf.dotdot_ino, needswap), &vp); 1191 if (error) { 1192 vp = NULL; 1193 break; 1194 } 1195 } 1196 1197 out: 1198 if (error == ENOTDIR) 1199 printf("checkpath: .. not a directory\n"); 1200 if (vp != NULL) 1201 vput(vp); 1202 return (error); 1203 } 1204