1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)ufs_lookup.c 7.5 (Berkeley) 01/12/89 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "inode.h" 12 #include "fs.h" 13 #include "mount.h" 14 #include "dir.h" 15 #include "user.h" 16 #include "buf.h" 17 #include "conf.h" 18 #include "uio.h" 19 #include "kernel.h" 20 #include "malloc.h" 21 22 struct buf *blkatoff(); 23 int dirchk = 0; 24 25 /* 26 * Structures associated with name cacheing. 27 */ 28 #define NCHHASH 32 /* size of hash table */ 29 30 #if ((NCHHASH)&((NCHHASH)-1)) != 0 31 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH)) 32 #else 33 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1)) 34 #endif 35 36 union nchash { 37 union nchash *nch_head[2]; 38 struct namecache *nch_chain[2]; 39 } nchash[NCHHASH]; 40 #define nch_forw nch_chain[0] 41 #define nch_back nch_chain[1] 42 43 struct namecache *nchhead, **nchtail; /* LRU chain pointers */ 44 struct nchstats nchstats; /* cache effectiveness statistics */ 45 46 /* 47 * Convert a pathname into a pointer to a locked inode. 48 * This is a very central and rather complicated routine. 49 * If the file system is not maintained in a strict tree hierarchy, 50 * this can result in a deadlock situation (see comments in code below). 51 * 52 * The flag argument is LOOKUP, CREATE, or DELETE depending on whether 53 * the name is to be looked up, created, or deleted. When CREATE or 54 * DELETE is specified, information usable in creating or deleteing a 55 * directory entry is also calculated. If flag has LOCKPARENT or'ed 56 * into it and the target of the pathname exists, namei returns both 57 * the target and its parent directory locked. When creating and 58 * LOCKPARENT is specified, the target may not be ".". When deleting 59 * and LOCKPARENT is specified, the target may be ".", but the caller 60 * must check to insure it does an irele and iput instead of two iputs. 61 * 62 * The FOLLOW flag is set when symbolic links are to be followed 63 * when they occur at the end of the name translation process. 64 * Symbolic links are always followed for all other pathname 65 * components other than the last. 66 * 67 * The segflg defines whether the name is to be copied from user 68 * space or kernel space. 69 * 70 * Name caching works as follows: 71 * 72 * Names found by directory scans are retained in a cache 73 * for future reference. It is managed LRU, so frequently 74 * used names will hang around. Cache is indexed by hash value 75 * obtained from (ino,dev,name) where ino & dev refer to the 76 * directory containing name. 77 * 78 * For simplicity (and economy of storage), names longer than 79 * a maximum length of NCHNAMLEN are not cached; they occur 80 * infrequently in any case, and are almost never of interest. 81 * 82 * Upon reaching the last segment of a path, if the reference 83 * is for DELETE, or NOCACHE is set (rewrite), and the 84 * name is located in the cache, it will be dropped. 85 * 86 * Overall outline of namei: 87 * 88 * copy in name 89 * get starting directory 90 * dirloop: 91 * check accessibility of directory 92 * dirloop2: 93 * copy next component of name to ndp->ni_dent 94 * handle degenerate case where name is null string 95 * look for name in cache, if found, then if at end of path 96 * and deleting or creating, drop it, else to haveino 97 * search for name in directory, to found or notfound 98 * notfound: 99 * if creating, return locked directory, leaving info on avail. slots 100 * else return error 101 * found: 102 * if at end of path and deleting, return information to allow delete 103 * if at end of path and rewriting (CREATE and LOCKPARENT), lock target 104 * inode and return info to allow rewrite 105 * if .. and on mounted filesys, look in mount table for parent 106 * if not at end, add name to cache; if at end and neither creating 107 * nor deleting, add name to cache 108 * haveino: 109 * if symbolic link, massage name in buffer and continue at dirloop 110 * if more components of name, do next level at dirloop 111 * return the answer as locked inode 112 * 113 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, 114 * but unlocked. 115 */ 116 struct inode * 117 namei(ndp) 118 register struct nameidata *ndp; 119 { 120 register char *cp; /* pointer into pathname argument */ 121 /* these variables refer to things which must be freed or unlocked */ 122 register struct inode *dp = 0; /* the directory we are searching */ 123 register struct namecache *ncp; /* cache slot for entry */ 124 register struct fs *fs; /* file system that directory is in */ 125 register struct buf *bp = 0; /* a buffer of directory entries */ 126 register struct direct *ep; /* the current directory entry */ 127 int entryoffsetinblock; /* offset of ep in bp's buffer */ 128 register caddr_t nbp; /* buffer storing path name argument */ 129 /* these variables hold information about the search for a slot */ 130 enum {NONE, COMPACT, FOUND} slotstatus; 131 int slotoffset = -1; /* offset of area with free space */ 132 int slotsize; /* size of area at slotoffset */ 133 int slotfreespace; /* amount of space free in slot */ 134 int slotneeded; /* size of the entry we're seeking */ 135 /* */ 136 int numdirpasses; /* strategy for directory search */ 137 int endsearch; /* offset to end directory search */ 138 int prevoff; /* ndp->ni_offset of previous entry */ 139 int nlink = 0; /* number of symbolic links taken */ 140 struct inode *pdp; /* saved dp during symlink work */ 141 int error, i; 142 int lockparent; 143 int docache; /* == 0 do not cache last component */ 144 int makeentry; /* != 0 if name to be added to cache */ 145 unsigned hash; /* value of name hash for entry */ 146 union nchash *nhp; /* cache chain head for entry */ 147 int isdotdot; /* != 0 if current name is ".." */ 148 int flag; /* op ie, LOOKUP, CREATE, or DELETE */ 149 off_t enduseful; /* pointer past last used dir slot */ 150 151 lockparent = ndp->ni_nameiop & LOCKPARENT; 152 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE; 153 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW); 154 if (flag == DELETE || lockparent) 155 docache = 0; 156 /* 157 * Get a buffer for the name to be translated, and copy the 158 * name into the buffer. 159 */ 160 MALLOC(nbp, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); 161 if (ndp->ni_segflg == UIO_SYSSPACE) 162 error = copystr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0); 163 else 164 error = copyinstr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0); 165 if (error) { 166 u.u_error = error; 167 goto bad; 168 } 169 170 /* 171 * Get starting directory. 172 */ 173 cp = nbp; 174 if (*cp == '/') { 175 while (*cp == '/') 176 cp++; 177 if ((dp = u.u_rdir) == NULL) 178 dp = rootdir; 179 } else 180 dp = u.u_cdir; 181 fs = dp->i_fs; 182 ILOCK(dp); 183 dp->i_count++; 184 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */ 185 ndp->ni_endoff = 0; 186 187 /* 188 * We come to dirloop to search a new directory. 189 * The directory must be locked so that it can be 190 * iput, and fs must be already set to dp->i_fs. 191 */ 192 dirloop: 193 /* 194 * Check accessiblity of directory. 195 */ 196 if ((dp->i_mode&IFMT) != IFDIR) { 197 u.u_error = ENOTDIR; 198 goto bad; 199 } 200 if (access(dp, IEXEC)) 201 goto bad; 202 203 dirloop2: 204 /* 205 * Copy next component of name to ndp->ni_dent. 206 */ 207 hash = 0; 208 for (i = 0; *cp != 0 && *cp != '/'; cp++) { 209 if (i >= MAXNAMLEN) { 210 u.u_error = ENAMETOOLONG; 211 goto bad; 212 } 213 if (*cp & 0200) 214 if ((*cp&0377) == ('/'|0200) || flag != DELETE) { 215 u.u_error = EINVAL; 216 goto bad; 217 } 218 ndp->ni_dent.d_name[i++] = *cp; 219 hash += (unsigned char)*cp * i; 220 } 221 ndp->ni_dent.d_namlen = i; 222 ndp->ni_dent.d_name[i] = '\0'; 223 isdotdot = (i == 2 && 224 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.'); 225 makeentry = 1; 226 if (*cp == '\0' && docache == 0) 227 makeentry = 0; 228 229 /* 230 * Check for degenerate name (e.g. / or "") 231 * which is a way of talking about a directory, 232 * e.g. like "/." or ".". 233 */ 234 if (ndp->ni_dent.d_name[0] == '\0') { 235 if (flag != LOOKUP || lockparent) { 236 u.u_error = EISDIR; 237 goto bad; 238 } 239 FREE(nbp, M_NAMEI); 240 return (dp); 241 } 242 243 /* 244 * Special handling for ".." allowing chdir out of mounted 245 * file system: indirect .. in root inode to reevaluate 246 * in directory file system was mounted on. 247 */ 248 if (isdotdot) { 249 for (;;) { 250 if (dp == u.u_rdir || dp == rootdir) { 251 ndp->ni_dent.d_ino = dp->i_number; 252 pdp = dp; 253 dp->i_count++; 254 goto haveino; 255 } 256 if (dp->i_number != ROOTINO) 257 break; 258 for (i = 1; i < NMOUNT; i++) { 259 if (mount[i].m_fs != NULL && 260 mount[i].m_dev == dp->i_dev) { 261 iput(dp); 262 dp = mount[i].m_inodp; 263 ILOCK(dp); 264 dp->i_count++; 265 fs = dp->i_fs; 266 break; 267 } 268 } 269 } 270 } 271 272 /* 273 * We now have a segment name to search for, and a directory to search. 274 * 275 * Before tediously performing a linear scan of the directory, 276 * check the name cache to see if the directory/name pair 277 * we are looking for is known already. We don't do this 278 * if the segment name is long, simply so the cache can avoid 279 * holding long names (which would either waste space, or 280 * add greatly to the complexity). 281 */ 282 if (ndp->ni_dent.d_namlen > NCHNAMLEN) { 283 nchstats.ncs_long++; 284 makeentry = 0; 285 } else { 286 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)]; 287 for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp; 288 ncp = ncp->nc_forw) { 289 if (ncp->nc_ino == dp->i_number && 290 ncp->nc_dev == dp->i_dev && 291 ncp->nc_nlen == ndp->ni_dent.d_namlen && 292 !bcmp(ncp->nc_name, ndp->ni_dent.d_name, 293 (unsigned)ncp->nc_nlen)) 294 break; 295 } 296 if (ncp == (struct namecache *)nhp) { 297 nchstats.ncs_miss++; 298 ncp = NULL; 299 } else { 300 if (ncp->nc_id != ncp->nc_ip->i_id) 301 nchstats.ncs_falsehits++; 302 else if (!makeentry) 303 nchstats.ncs_badhits++; 304 else { 305 /* 306 * move this slot to end of LRU 307 * chain, if not already there 308 */ 309 if (ncp->nc_nxt) { 310 /* remove from LRU chain */ 311 *ncp->nc_prev = ncp->nc_nxt; 312 ncp->nc_nxt->nc_prev = ncp->nc_prev; 313 314 /* and replace at end of it */ 315 ncp->nc_nxt = NULL; 316 ncp->nc_prev = nchtail; 317 *nchtail = ncp; 318 nchtail = &ncp->nc_nxt; 319 } 320 321 /* 322 * Get the next inode in the path. 323 * See comment above other `IUNLOCK' code for 324 * an explaination of the locking protocol. 325 */ 326 pdp = dp; 327 if (!isdotdot || dp != u.u_rdir) 328 dp = ncp->nc_ip; 329 if (dp == NULL) 330 panic("namei: null cache ino"); 331 if (pdp == dp) 332 dp->i_count++; 333 else if (isdotdot) { 334 IUNLOCK(pdp); 335 igrab(dp); 336 } else { 337 igrab(dp); 338 IUNLOCK(pdp); 339 } 340 341 /* 342 * Verify that the inode that we got 343 * did not change while we were waiting 344 * for it to be locked. 345 */ 346 if (ncp->nc_id != ncp->nc_ip->i_id) { 347 iput(dp); 348 ILOCK(pdp); 349 dp = pdp; 350 nchstats.ncs_falsehits++; 351 } else { 352 ndp->ni_dent.d_ino = dp->i_number; 353 /* ni_dent.d_reclen is garbage ... */ 354 nchstats.ncs_goodhits++; 355 goto haveino; 356 } 357 } 358 359 /* 360 * Last component and we are renaming or deleting, 361 * the cache entry is invalid, or otherwise don't 362 * want cache entry to exist. 363 */ 364 /* remove from LRU chain */ 365 *ncp->nc_prev = ncp->nc_nxt; 366 if (ncp->nc_nxt) 367 ncp->nc_nxt->nc_prev = ncp->nc_prev; 368 else 369 nchtail = ncp->nc_prev; 370 remque(ncp); /* remove from hash chain */ 371 /* insert at head of LRU list (first to grab) */ 372 ncp->nc_nxt = nchhead; 373 ncp->nc_prev = &nchhead; 374 nchhead->nc_prev = &ncp->nc_nxt; 375 nchhead = ncp; 376 /* and make a dummy hash chain */ 377 ncp->nc_forw = ncp; 378 ncp->nc_back = ncp; 379 ncp = NULL; 380 } 381 } 382 383 /* 384 * Suppress search for slots unless creating 385 * file and at end of pathname, in which case 386 * we watch for a place to put the new file in 387 * case it doesn't already exist. 388 */ 389 slotstatus = FOUND; 390 if (flag == CREATE && *cp == 0) { 391 slotstatus = NONE; 392 slotfreespace = 0; 393 slotneeded = DIRSIZ(&ndp->ni_dent); 394 } 395 /* 396 * If this is the same directory that this process 397 * previously searched, pick up where we last left off. 398 * We cache only lookups as these are the most common 399 * and have the greatest payoff. Caching CREATE has little 400 * benefit as it usually must search the entire directory 401 * to determine that the entry does not exist. Caching the 402 * location of the last DELETE has not reduced profiling time 403 * and hence has been removed in the interest of simplicity. 404 */ 405 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber || 406 dp->i_dev != u.u_ncache.nc_dev) { 407 ndp->ni_offset = 0; 408 numdirpasses = 1; 409 } else { 410 if (u.u_ncache.nc_prevoffset > dp->i_size) 411 u.u_ncache.nc_prevoffset = 0; 412 ndp->ni_offset = u.u_ncache.nc_prevoffset; 413 entryoffsetinblock = blkoff(fs, ndp->ni_offset); 414 if (entryoffsetinblock != 0) { 415 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 416 if (bp == 0) 417 goto bad; 418 } 419 numdirpasses = 2; 420 nchstats.ncs_2passes++; 421 } 422 endsearch = roundup(dp->i_size, DIRBLKSIZ); 423 enduseful = 0; 424 425 searchloop: 426 while (ndp->ni_offset < endsearch) { 427 /* 428 * If offset is on a block boundary, 429 * read the next directory block. 430 * Release previous if it exists. 431 */ 432 if (blkoff(fs, ndp->ni_offset) == 0) { 433 if (bp != NULL) 434 brelse(bp); 435 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 436 if (bp == 0) 437 goto bad; 438 entryoffsetinblock = 0; 439 } 440 /* 441 * If still looking for a slot, and at a DIRBLKSIZE 442 * boundary, have to start looking for free space again. 443 */ 444 if (slotstatus == NONE && 445 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) { 446 slotoffset = -1; 447 slotfreespace = 0; 448 } 449 /* 450 * Get pointer to next entry. 451 * Full validation checks are slow, so we only check 452 * enough to insure forward progress through the 453 * directory. Complete checks can be run by patching 454 * "dirchk" to be true. 455 */ 456 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock); 457 if (ep->d_reclen == 0 || 458 dirchk && dirbadentry(ep, entryoffsetinblock)) { 459 dirbad(dp, ndp->ni_offset, "mangled entry"); 460 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 461 ndp->ni_offset += i; 462 entryoffsetinblock += i; 463 continue; 464 } 465 466 /* 467 * If an appropriate sized slot has not yet been found, 468 * check to see if one is available. Also accumulate space 469 * in the current block so that we can determine if 470 * compaction is viable. 471 */ 472 if (slotstatus != FOUND) { 473 int size = ep->d_reclen; 474 475 if (ep->d_ino != 0) 476 size -= DIRSIZ(ep); 477 if (size > 0) { 478 if (size >= slotneeded) { 479 slotstatus = FOUND; 480 slotoffset = ndp->ni_offset; 481 slotsize = ep->d_reclen; 482 } else if (slotstatus == NONE) { 483 slotfreespace += size; 484 if (slotoffset == -1) 485 slotoffset = ndp->ni_offset; 486 if (slotfreespace >= slotneeded) { 487 slotstatus = COMPACT; 488 slotsize = ndp->ni_offset + 489 ep->d_reclen - slotoffset; 490 } 491 } 492 } 493 } 494 495 /* 496 * Check for a name match. 497 */ 498 if (ep->d_ino) { 499 if (ep->d_namlen == ndp->ni_dent.d_namlen && 500 !bcmp(ndp->ni_dent.d_name, ep->d_name, 501 (unsigned)ep->d_namlen)) 502 goto found; 503 } 504 prevoff = ndp->ni_offset; 505 ndp->ni_offset += ep->d_reclen; 506 entryoffsetinblock += ep->d_reclen; 507 if (ep->d_ino) 508 enduseful = ndp->ni_offset; 509 } 510 /* notfound: */ 511 /* 512 * If we started in the middle of the directory and failed 513 * to find our target, we must check the beginning as well. 514 */ 515 if (numdirpasses == 2) { 516 numdirpasses--; 517 ndp->ni_offset = 0; 518 endsearch = u.u_ncache.nc_prevoffset; 519 goto searchloop; 520 } 521 /* 522 * If creating, and at end of pathname and current 523 * directory has not been removed, then can consider 524 * allowing file to be created. 525 */ 526 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { 527 /* 528 * Access for write is interpreted as allowing 529 * creation of files in the directory. 530 */ 531 if (access(dp, IWRITE)) 532 goto bad; 533 /* 534 * Return an indication of where the new directory 535 * entry should be put. If we didn't find a slot, 536 * then set ndp->ni_count to 0 indicating that the new 537 * slot belongs at the end of the directory. If we found 538 * a slot, then the new entry can be put in the range 539 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count) 540 */ 541 if (slotstatus == NONE) { 542 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ); 543 ndp->ni_count = 0; 544 enduseful = ndp->ni_offset; 545 } else { 546 ndp->ni_offset = slotoffset; 547 ndp->ni_count = slotsize; 548 if (enduseful < slotoffset + slotsize) 549 enduseful = slotoffset + slotsize; 550 } 551 ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ); 552 dp->i_flag |= IUPD|ICHG; 553 if (bp) 554 brelse(bp); 555 FREE(nbp, M_NAMEI); 556 /* 557 * We return with the directory locked, so that 558 * the parameters we set up above will still be 559 * valid if we actually decide to do a direnter(). 560 * We return NULL to indicate that the entry doesn't 561 * currently exist, leaving a pointer to the (locked) 562 * directory inode in ndp->ni_pdir. 563 */ 564 ndp->ni_pdir = dp; 565 return (NULL); 566 } 567 u.u_error = ENOENT; 568 goto bad; 569 found: 570 if (numdirpasses == 2) 571 nchstats.ncs_pass2++; 572 /* 573 * Check that directory length properly reflects presence 574 * of this entry. 575 */ 576 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) { 577 dirbad(dp, ndp->ni_offset, "i_size too small"); 578 dp->i_size = entryoffsetinblock + DIRSIZ(ep); 579 dp->i_flag |= IUPD|ICHG; 580 } 581 582 /* 583 * Found component in pathname. 584 * If the final component of path name, save information 585 * in the cache as to where the entry was found. 586 */ 587 if (*cp == '\0' && flag == LOOKUP) { 588 u.u_ncache.nc_prevoffset = ndp->ni_offset &~ (DIRBLKSIZ - 1); 589 u.u_ncache.nc_inumber = dp->i_number; 590 u.u_ncache.nc_dev = dp->i_dev; 591 } 592 /* 593 * Save directory entry's inode number and reclen in ndp->ni_dent, 594 * and release directory buffer. 595 */ 596 ndp->ni_dent.d_ino = ep->d_ino; 597 ndp->ni_dent.d_reclen = ep->d_reclen; 598 brelse(bp); 599 bp = NULL; 600 601 /* 602 * If deleting, and at end of pathname, return 603 * parameters which can be used to remove file. 604 * If the lockparent flag isn't set, we return only 605 * the directory (in ndp->ni_pdir), otherwise we go 606 * on and lock the inode, being careful with ".". 607 */ 608 if (flag == DELETE && *cp == 0) { 609 /* 610 * Write access to directory required to delete files. 611 */ 612 if (access(dp, IWRITE)) 613 goto bad; 614 ndp->ni_pdir = dp; /* for dirremove() */ 615 /* 616 * Return pointer to current entry in ndp->ni_offset, 617 * and distance past previous entry (if there 618 * is a previous entry in this block) in ndp->ni_count. 619 * Save directory inode pointer in ndp->ni_pdir for dirremove(). 620 */ 621 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0) 622 ndp->ni_count = 0; 623 else 624 ndp->ni_count = ndp->ni_offset - prevoff; 625 if (lockparent) { 626 if (dp->i_number == ndp->ni_dent.d_ino) 627 dp->i_count++; 628 else { 629 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 630 if (dp == NULL) { 631 iput(ndp->ni_pdir); 632 goto bad; 633 } 634 /* 635 * If directory is "sticky", then user must own 636 * the directory, or the file in it, else he 637 * may not delete it (unless he's root). This 638 * implements append-only directories. 639 */ 640 if ((ndp->ni_pdir->i_mode & ISVTX) && 641 u.u_uid != 0 && 642 u.u_uid != ndp->ni_pdir->i_uid && 643 dp->i_uid != u.u_uid) { 644 iput(ndp->ni_pdir); 645 u.u_error = EPERM; 646 goto bad; 647 } 648 } 649 } 650 FREE(nbp, M_NAMEI); 651 return (dp); 652 } 653 654 /* 655 * If rewriting (rename), return the inode and the 656 * information required to rewrite the present directory 657 * Must get inode of directory entry to verify it's a 658 * regular file, or empty directory. 659 */ 660 if ((flag == CREATE && lockparent) && *cp == 0) { 661 if (access(dp, IWRITE)) 662 goto bad; 663 ndp->ni_pdir = dp; /* for dirrewrite() */ 664 /* 665 * Careful about locking second inode. 666 * This can only occur if the target is ".". 667 */ 668 if (dp->i_number == ndp->ni_dent.d_ino) { 669 u.u_error = EISDIR; /* XXX */ 670 goto bad; 671 } 672 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 673 if (dp == NULL) { 674 iput(ndp->ni_pdir); 675 goto bad; 676 } 677 FREE(nbp, M_NAMEI); 678 return (dp); 679 } 680 681 /* 682 * Check for symbolic link, which may require us to massage the 683 * name before we continue translation. We do not `iput' the 684 * directory because we may need it again if the symbolic link 685 * is relative to the current directory. Instead we save it 686 * unlocked as "pdp". We must get the target inode before unlocking 687 * the directory to insure that the inode will not be removed 688 * before we get it. We prevent deadlock by always fetching 689 * inodes from the root, moving down the directory tree. Thus 690 * when following backward pointers ".." we must unlock the 691 * parent directory before getting the requested directory. 692 * There is a potential race condition here if both the current 693 * and parent directories are removed before the `iget' for the 694 * inode associated with ".." returns. We hope that this occurs 695 * infrequently since we cannot avoid this race condition without 696 * implementing a sophisticated deadlock detection algorithm. 697 * Note also that this simple deadlock detection scheme will not 698 * work if the file system has any hard links other than ".." 699 * that point backwards in the directory structure. 700 */ 701 pdp = dp; 702 if (isdotdot) { 703 IUNLOCK(pdp); /* race to get the inode */ 704 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 705 if (dp == NULL) 706 goto bad2; 707 } else if (dp->i_number == ndp->ni_dent.d_ino) { 708 dp->i_count++; /* we want ourself, ie "." */ 709 } else { 710 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 711 IUNLOCK(pdp); 712 if (dp == NULL) 713 goto bad2; 714 } 715 716 /* 717 * Insert name into cache if appropriate. 718 */ 719 if (makeentry) { 720 if (ncp != NULL) 721 panic("namei: duplicating cache"); 722 /* 723 * Free the cache slot at head of lru chain. 724 */ 725 if (ncp = nchhead) { 726 /* remove from lru chain */ 727 *ncp->nc_prev = ncp->nc_nxt; 728 if (ncp->nc_nxt) 729 ncp->nc_nxt->nc_prev = ncp->nc_prev; 730 else 731 nchtail = ncp->nc_prev; 732 remque(ncp); /* remove from old hash chain */ 733 /* grab the inode we just found */ 734 ncp->nc_ip = dp; 735 /* fill in cache info */ 736 ncp->nc_ino = pdp->i_number; /* parents inum */ 737 ncp->nc_dev = pdp->i_dev; /* & device */ 738 ncp->nc_idev = dp->i_dev; /* our device */ 739 ncp->nc_id = dp->i_id; /* identifier */ 740 ncp->nc_nlen = ndp->ni_dent.d_namlen; 741 bcopy(ndp->ni_dent.d_name, ncp->nc_name, 742 (unsigned)ncp->nc_nlen); 743 /* link at end of lru chain */ 744 ncp->nc_nxt = NULL; 745 ncp->nc_prev = nchtail; 746 *nchtail = ncp; 747 nchtail = &ncp->nc_nxt; 748 /* and insert on hash chain */ 749 insque(ncp, nhp); 750 } 751 } 752 753 haveino: 754 fs = dp->i_fs; 755 756 /* 757 * Check for symbolic link 758 */ 759 if ((dp->i_mode & IFMT) == IFLNK && 760 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) { 761 u_int pathlen = strlen(cp) + 1; 762 763 if (dp->i_size + pathlen >= MAXPATHLEN - 1) { 764 u.u_error = ENAMETOOLONG; 765 goto bad2; 766 } 767 if (++nlink > MAXSYMLINKS) { 768 u.u_error = ELOOP; 769 goto bad2; 770 } 771 ovbcopy(cp, nbp + dp->i_size, pathlen); 772 u.u_error = 773 rdwri(UIO_READ, dp, nbp, (int)dp->i_size, 774 (off_t)0, 1, (int *)0); 775 if (u.u_error) 776 goto bad2; 777 cp = nbp; 778 iput(dp); 779 if (*cp == '/') { 780 irele(pdp); 781 while (*cp == '/') 782 cp++; 783 if ((dp = u.u_rdir) == NULL) 784 dp = rootdir; 785 ILOCK(dp); 786 dp->i_count++; 787 } else { 788 dp = pdp; 789 ILOCK(dp); 790 } 791 fs = dp->i_fs; 792 goto dirloop; 793 } 794 795 /* 796 * Not a symbolic link. If more pathname, 797 * continue at next component, else return. 798 */ 799 if (*cp == '/') { 800 while (*cp == '/') 801 cp++; 802 irele(pdp); 803 goto dirloop; 804 } 805 FREE(nbp, M_NAMEI); 806 if (lockparent) 807 ndp->ni_pdir = pdp; 808 else 809 irele(pdp); 810 return (dp); 811 bad2: 812 irele(pdp); 813 bad: 814 if (bp) 815 brelse(bp); 816 if (dp) 817 iput(dp); 818 FREE(nbp, M_NAMEI); 819 return (NULL); 820 } 821 822 823 dirbad(ip, offset, how) 824 struct inode *ip; 825 off_t offset; 826 char *how; 827 { 828 829 printf("%s: bad dir ino %d at offset %d: %s\n", 830 ip->i_fs->fs_fsmnt, ip->i_number, offset, how); 831 } 832 833 /* 834 * Do consistency checking on a directory entry: 835 * record length must be multiple of 4 836 * entry must fit in rest of its DIRBLKSIZ block 837 * record must be large enough to contain entry 838 * name is not longer than MAXNAMLEN 839 * name must be as long as advertised, and null terminated 840 */ 841 dirbadentry(ep, entryoffsetinblock) 842 register struct direct *ep; 843 int entryoffsetinblock; 844 { 845 register int i; 846 847 if ((ep->d_reclen & 0x3) != 0 || 848 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 849 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN) 850 return (1); 851 for (i = 0; i < ep->d_namlen; i++) 852 if (ep->d_name[i] == '\0') 853 return (1); 854 return (ep->d_name[i]); 855 } 856 857 /* 858 * Write a directory entry after a call to namei, using the parameters 859 * which it left in the u. area. The argument ip is the inode which 860 * the new directory entry will refer to. The u. area field ndp->ni_pdir is 861 * a pointer to the directory to be written, which was left locked by 862 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate 863 * how the space for the new entry is to be gotten. 864 */ 865 direnter(ip, ndp) 866 struct inode *ip; 867 register struct nameidata *ndp; 868 { 869 register struct direct *ep, *nep; 870 register struct inode *dp = ndp->ni_pdir; 871 struct buf *bp; 872 int loc, spacefree, error = 0; 873 u_int dsize; 874 int newentrysize; 875 char *dirbuf; 876 877 ndp->ni_dent.d_ino = ip->i_number; 878 newentrysize = DIRSIZ(&ndp->ni_dent); 879 if (ndp->ni_count == 0) { 880 /* 881 * If ndp->ni_count is 0, then namei could find no space in the 882 * directory. In this case ndp->ni_offset will be on a directory 883 * block boundary and we will write the new entry into a fresh 884 * block. 885 */ 886 if (ndp->ni_offset&(DIRBLKSIZ-1)) 887 panic("wdir: newblk"); 888 ndp->ni_dent.d_reclen = DIRBLKSIZ; 889 error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 890 newentrysize, ndp->ni_offset, 1, (int *)0); 891 if (DIRBLKSIZ > dp->i_fs->fs_fsize) 892 panic("wdir: blksize"); /* XXX - should grow w/bmap() */ 893 else 894 dp->i_size = roundup(dp->i_size, DIRBLKSIZ); 895 iput(dp); 896 return (error); 897 } 898 899 /* 900 * If ndp->ni_count is non-zero, then namei found space for the new 901 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count. 902 * in the directory. To use this space, we may have to compact 903 * the entries located there, by copying them together towards 904 * the beginning of the block, leaving the free space in 905 * one usable chunk at the end. 906 */ 907 908 /* 909 * Increase size of directory if entry eats into new space. 910 * This should never push the size past a new multiple of 911 * DIRBLKSIZE. 912 * 913 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 914 */ 915 if (ndp->ni_offset + ndp->ni_count > dp->i_size) 916 dp->i_size = ndp->ni_offset + ndp->ni_count; 917 /* 918 * Get the block containing the space for the new directory 919 * entry. Should return error by result instead of u.u_error. 920 */ 921 bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf); 922 if (bp == 0) { 923 iput(dp); 924 return (u.u_error); 925 } 926 /* 927 * Find space for the new entry. In the simple case, the 928 * entry at offset base will have the space. If it does 929 * not, then namei arranged that compacting the region 930 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space. 931 */ 932 ep = (struct direct *)dirbuf; 933 dsize = DIRSIZ(ep); 934 spacefree = ep->d_reclen - dsize; 935 for (loc = ep->d_reclen; loc < ndp->ni_count; ) { 936 nep = (struct direct *)(dirbuf + loc); 937 if (ep->d_ino) { 938 /* trim the existing slot */ 939 ep->d_reclen = dsize; 940 ep = (struct direct *)((char *)ep + dsize); 941 } else { 942 /* overwrite; nothing there; header is ours */ 943 spacefree += dsize; 944 } 945 dsize = DIRSIZ(nep); 946 spacefree += nep->d_reclen - dsize; 947 loc += nep->d_reclen; 948 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 949 } 950 /* 951 * Update the pointer fields in the previous entry (if any), 952 * copy in the new entry, and write out the block. 953 */ 954 if (ep->d_ino == 0) { 955 if (spacefree + dsize < newentrysize) 956 panic("wdir: compact1"); 957 ndp->ni_dent.d_reclen = spacefree + dsize; 958 } else { 959 if (spacefree < newentrysize) 960 panic("wdir: compact2"); 961 ndp->ni_dent.d_reclen = spacefree; 962 ep->d_reclen = dsize; 963 ep = (struct direct *)((char *)ep + dsize); 964 } 965 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize); 966 bwrite(bp); 967 dp->i_flag |= IUPD|ICHG; 968 if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size) 969 itrunc(dp, (u_long)ndp->ni_endoff); 970 iput(dp); 971 return (error); 972 } 973 974 /* 975 * Remove a directory entry after a call to namei, using the 976 * parameters which it left in the u. area. The u. entry 977 * ni_offset contains the offset into the directory of the 978 * entry to be eliminated. The ni_count field contains the 979 * size of the previous record in the directory. If this 980 * is 0, the first entry is being deleted, so we need only 981 * zero the inode number to mark the entry as free. If the 982 * entry isn't the first in the directory, we must reclaim 983 * the space of the now empty record by adding the record size 984 * to the size of the previous entry. 985 */ 986 dirremove(ndp) 987 register struct nameidata *ndp; 988 { 989 register struct inode *dp = ndp->ni_pdir; 990 register struct buf *bp; 991 struct direct *ep; 992 993 if (ndp->ni_count == 0) { 994 /* 995 * First entry in block: set d_ino to zero. 996 */ 997 ndp->ni_dent.d_ino = 0; 998 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 999 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1000 } else { 1001 /* 1002 * Collapse new free space into previous entry. 1003 */ 1004 bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep); 1005 if (bp == 0) 1006 return (0); 1007 ep->d_reclen += ndp->ni_dent.d_reclen; 1008 bwrite(bp); 1009 dp->i_flag |= IUPD|ICHG; 1010 } 1011 return (1); 1012 } 1013 1014 /* 1015 * Rewrite an existing directory entry to point at the inode 1016 * supplied. The parameters describing the directory entry are 1017 * set up by a call to namei. 1018 */ 1019 dirrewrite(dp, ip, ndp) 1020 struct inode *dp, *ip; 1021 struct nameidata *ndp; 1022 { 1023 1024 ndp->ni_dent.d_ino = ip->i_number; 1025 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1026 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1027 iput(dp); 1028 } 1029 1030 /* 1031 * Return buffer with contents of block "offset" 1032 * from the beginning of directory "ip". If "res" 1033 * is non-zero, fill it in with a pointer to the 1034 * remaining space in the directory. 1035 */ 1036 struct buf * 1037 blkatoff(ip, offset, res) 1038 struct inode *ip; 1039 off_t offset; 1040 char **res; 1041 { 1042 register struct fs *fs = ip->i_fs; 1043 daddr_t lbn = lblkno(fs, offset); 1044 int bsize = blksize(fs, ip, lbn); 1045 register struct buf *bp; 1046 daddr_t bn; 1047 1048 bn = bmap(ip, lbn, B_READ, bsize); 1049 if (u.u_error) 1050 return (0); 1051 if (bn == (daddr_t)-1) { 1052 dirbad(ip, offset, "hole in dir"); 1053 return (0); 1054 } 1055 bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize); 1056 if (bp->b_flags & B_ERROR) { 1057 brelse(bp); 1058 return (0); 1059 } 1060 if (res) 1061 *res = bp->b_un.b_addr + blkoff(fs, offset); 1062 return (bp); 1063 } 1064 1065 /* 1066 * Check if a directory is empty or not. 1067 * Inode supplied must be locked. 1068 * 1069 * Using a struct dirtemplate here is not precisely 1070 * what we want, but better than using a struct direct. 1071 * 1072 * NB: does not handle corrupted directories. 1073 */ 1074 dirempty(ip, parentino) 1075 register struct inode *ip; 1076 ino_t parentino; 1077 { 1078 register off_t off; 1079 struct dirtemplate dbuf; 1080 register struct direct *dp = (struct direct *)&dbuf; 1081 int error, count; 1082 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1083 1084 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1085 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ, 1086 off, 1, &count); 1087 /* 1088 * Since we read MINDIRSIZ, residual must 1089 * be 0 unless we're at end of file. 1090 */ 1091 if (error || count != 0) 1092 return (0); 1093 /* avoid infinite loops */ 1094 if (dp->d_reclen == 0) 1095 return (0); 1096 /* skip empty entries */ 1097 if (dp->d_ino == 0) 1098 continue; 1099 /* accept only "." and ".." */ 1100 if (dp->d_namlen > 2) 1101 return (0); 1102 if (dp->d_name[0] != '.') 1103 return (0); 1104 /* 1105 * At this point d_namlen must be 1 or 2. 1106 * 1 implies ".", 2 implies ".." if second 1107 * char is also "." 1108 */ 1109 if (dp->d_namlen == 1) 1110 continue; 1111 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1112 continue; 1113 return (0); 1114 } 1115 return (1); 1116 } 1117 1118 /* 1119 * Check if source directory is in the path of the target directory. 1120 * Target is supplied locked, source is unlocked. 1121 * The target is always iput() before returning. 1122 */ 1123 checkpath(source, target) 1124 struct inode *source, *target; 1125 { 1126 struct dirtemplate dirbuf; 1127 register struct inode *ip; 1128 int error = 0; 1129 1130 ip = target; 1131 if (ip->i_number == source->i_number) { 1132 error = EEXIST; 1133 goto out; 1134 } 1135 if (ip->i_number == ROOTINO) 1136 goto out; 1137 1138 for (;;) { 1139 if ((ip->i_mode&IFMT) != IFDIR) { 1140 error = ENOTDIR; 1141 break; 1142 } 1143 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, 1144 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); 1145 if (error != 0) 1146 break; 1147 if (dirbuf.dotdot_namlen != 2 || 1148 dirbuf.dotdot_name[0] != '.' || 1149 dirbuf.dotdot_name[1] != '.') { 1150 error = ENOTDIR; 1151 break; 1152 } 1153 if (dirbuf.dotdot_ino == source->i_number) { 1154 error = EINVAL; 1155 break; 1156 } 1157 if (dirbuf.dotdot_ino == ROOTINO) 1158 break; 1159 iput(ip); 1160 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino); 1161 if (ip == NULL) { 1162 error = u.u_error; 1163 break; 1164 } 1165 } 1166 1167 out: 1168 if (error == ENOTDIR) 1169 printf("checkpath: .. not a directory\n"); 1170 if (ip != NULL) 1171 iput(ip); 1172 return (error); 1173 } 1174 1175 /* 1176 * Name cache initialization, from main() when we are booting 1177 */ 1178 nchinit() 1179 { 1180 register union nchash *nchp; 1181 register struct namecache *ncp; 1182 1183 nchhead = 0; 1184 nchtail = &nchhead; 1185 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) { 1186 ncp->nc_forw = ncp; /* hash chain */ 1187 ncp->nc_back = ncp; 1188 ncp->nc_nxt = NULL; /* lru chain */ 1189 *nchtail = ncp; 1190 ncp->nc_prev = nchtail; 1191 nchtail = &ncp->nc_nxt; 1192 /* all else is zero already */ 1193 } 1194 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) { 1195 nchp->nch_head[0] = nchp; 1196 nchp->nch_head[1] = nchp; 1197 } 1198 } 1199 1200 /* 1201 * Cache flush, called when filesys is umounted to 1202 * remove entries that would now be invalid 1203 * 1204 * The line "nxtcp = nchhead" near the end is to avoid potential problems 1205 * if the cache lru chain is modified while we are dumping the 1206 * inode. This makes the algorithm O(n^2), but do you think I care? 1207 */ 1208 nchinval(dev) 1209 register dev_t dev; 1210 { 1211 register struct namecache *ncp, *nxtcp; 1212 1213 for (ncp = nchhead; ncp; ncp = nxtcp) { 1214 nxtcp = ncp->nc_nxt; 1215 if (ncp->nc_ip == NULL || 1216 (ncp->nc_idev != dev && ncp->nc_dev != dev)) 1217 continue; 1218 /* free the resources we had */ 1219 ncp->nc_idev = NODEV; 1220 ncp->nc_dev = NODEV; 1221 ncp->nc_id = NULL; 1222 ncp->nc_ino = 0; 1223 ncp->nc_ip = NULL; 1224 remque(ncp); /* remove entry from its hash chain */ 1225 ncp->nc_forw = ncp; /* and make a dummy one */ 1226 ncp->nc_back = ncp; 1227 /* delete this entry from LRU chain */ 1228 *ncp->nc_prev = nxtcp; 1229 if (nxtcp) 1230 nxtcp->nc_prev = ncp->nc_prev; 1231 else 1232 nchtail = ncp->nc_prev; 1233 /* cause rescan of list, it may have altered */ 1234 nxtcp = nchhead; 1235 /* put the now-free entry at head of LRU */ 1236 ncp->nc_nxt = nxtcp; 1237 ncp->nc_prev = &nchhead; 1238 nxtcp->nc_prev = &ncp->nc_nxt; 1239 nchhead = ncp; 1240 } 1241 } 1242 1243 /* 1244 * Name cache invalidation of all entries. 1245 */ 1246 cacheinvalall() 1247 { 1248 register struct namecache *ncp; 1249 1250 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) 1251 ncp->nc_id = 0; 1252 } 1253