1 /* 2 * Copyright (c) 1982, 1986 Regents of the University of California. 3 * All rights reserved. The Berkeley software License Agreement 4 * specifies the terms and conditions for redistribution. 5 * 6 * @(#)ufs_lookup.c 7.4 (Berkeley) 06/21/87 7 */ 8 9 #include "param.h" 10 #include "systm.h" 11 #include "inode.h" 12 #include "fs.h" 13 #include "mount.h" 14 #include "dir.h" 15 #include "user.h" 16 #include "buf.h" 17 #include "conf.h" 18 #include "uio.h" 19 #include "kernel.h" 20 #include "malloc.h" 21 22 struct buf *blkatoff(); 23 int dirchk = 0; 24 25 /* 26 * Structures associated with name cacheing. 27 */ 28 #define NCHHASH 32 /* size of hash table */ 29 30 #if ((NCHHASH)&((NCHHASH)-1)) != 0 31 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH)) 32 #else 33 #define NHASH(h, i, d) ((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1)) 34 #endif 35 36 union nchash { 37 union nchash *nch_head[2]; 38 struct namecache *nch_chain[2]; 39 } nchash[NCHHASH]; 40 #define nch_forw nch_chain[0] 41 #define nch_back nch_chain[1] 42 43 struct namecache *nchhead, **nchtail; /* LRU chain pointers */ 44 struct nchstats nchstats; /* cache effectiveness statistics */ 45 46 /* 47 * Convert a pathname into a pointer to a locked inode. 48 * This is a very central and rather complicated routine. 49 * If the file system is not maintained in a strict tree hierarchy, 50 * this can result in a deadlock situation (see comments in code below). 51 * 52 * The flag argument is LOOKUP, CREATE, or DELETE depending on whether 53 * the name is to be looked up, created, or deleted. When CREATE or 54 * DELETE is specified, information usable in creating or deleteing a 55 * directory entry is also calculated. If flag has LOCKPARENT or'ed 56 * into it and the target of the pathname exists, namei returns both 57 * the target and its parent directory locked. When creating and 58 * LOCKPARENT is specified, the target may not be ".". When deleting 59 * and LOCKPARENT is specified, the target may be ".", but the caller 60 * must check to insure it does an irele and iput instead of two iputs. 61 * 62 * The FOLLOW flag is set when symbolic links are to be followed 63 * when they occur at the end of the name translation process. 64 * Symbolic links are always followed for all other pathname 65 * components other than the last. 66 * 67 * The segflg defines whether the name is to be copied from user 68 * space or kernel space. 69 * 70 * Name caching works as follows: 71 * 72 * Names found by directory scans are retained in a cache 73 * for future reference. It is managed LRU, so frequently 74 * used names will hang around. Cache is indexed by hash value 75 * obtained from (ino,dev,name) where ino & dev refer to the 76 * directory containing name. 77 * 78 * For simplicity (and economy of storage), names longer than 79 * a maximum length of NCHNAMLEN are not cached; they occur 80 * infrequently in any case, and are almost never of interest. 81 * 82 * Upon reaching the last segment of a path, if the reference 83 * is for DELETE, or NOCACHE is set (rewrite), and the 84 * name is located in the cache, it will be dropped. 85 * 86 * Overall outline of namei: 87 * 88 * copy in name 89 * get starting directory 90 * dirloop: 91 * check accessibility of directory 92 * dirloop2: 93 * copy next component of name to ndp->ni_dent 94 * handle degenerate case where name is null string 95 * look for name in cache, if found, then if at end of path 96 * and deleting or creating, drop it, else to haveino 97 * search for name in directory, to found or notfound 98 * notfound: 99 * if creating, return locked directory, leaving info on avail. slots 100 * else return error 101 * found: 102 * if at end of path and deleting, return information to allow delete 103 * if at end of path and rewriting (CREATE and LOCKPARENT), lock target 104 * inode and return info to allow rewrite 105 * if .. and on mounted filesys, look in mount table for parent 106 * if not at end, add name to cache; if at end and neither creating 107 * nor deleting, add name to cache 108 * haveino: 109 * if symbolic link, massage name in buffer and continue at dirloop 110 * if more components of name, do next level at dirloop 111 * return the answer as locked inode 112 * 113 * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode, 114 * but unlocked. 115 */ 116 struct inode * 117 namei(ndp) 118 register struct nameidata *ndp; 119 { 120 register char *cp; /* pointer into pathname argument */ 121 /* these variables refer to things which must be freed or unlocked */ 122 register struct inode *dp = 0; /* the directory we are searching */ 123 register struct namecache *ncp; /* cache slot for entry */ 124 register struct fs *fs; /* file system that directory is in */ 125 register struct buf *bp = 0; /* a buffer of directory entries */ 126 register struct direct *ep; /* the current directory entry */ 127 int entryoffsetinblock; /* offset of ep in bp's buffer */ 128 register caddr_t nbp; /* buffer storing path name argument */ 129 /* these variables hold information about the search for a slot */ 130 enum {NONE, COMPACT, FOUND} slotstatus; 131 int slotoffset = -1; /* offset of area with free space */ 132 int slotsize; /* size of area at slotoffset */ 133 int slotfreespace; /* amount of space free in slot */ 134 int slotneeded; /* size of the entry we're seeking */ 135 /* */ 136 int numdirpasses; /* strategy for directory search */ 137 int endsearch; /* offset to end directory search */ 138 int prevoff; /* ndp->ni_offset of previous entry */ 139 int nlink = 0; /* number of symbolic links taken */ 140 struct inode *pdp; /* saved dp during symlink work */ 141 int error, i; 142 int lockparent; 143 int docache; /* == 0 do not cache last component */ 144 int makeentry; /* != 0 if name to be added to cache */ 145 unsigned hash; /* value of name hash for entry */ 146 union nchash *nhp; /* cache chain head for entry */ 147 int isdotdot; /* != 0 if current name is ".." */ 148 int flag; /* op ie, LOOKUP, CREATE, or DELETE */ 149 off_t enduseful; /* pointer past last used dir slot */ 150 151 lockparent = ndp->ni_nameiop & LOCKPARENT; 152 docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE; 153 flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW); 154 if (flag == DELETE || lockparent) 155 docache = 0; 156 /* 157 * Get a buffer for the name to be translated, and copy the 158 * name into the buffer. 159 */ 160 MALLOC(nbp, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK); 161 if (ndp->ni_segflg == UIO_SYSSPACE) 162 error = copystr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0); 163 else 164 error = copyinstr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0); 165 if (error) { 166 u.u_error = error; 167 goto bad; 168 } 169 170 /* 171 * Get starting directory. 172 */ 173 cp = nbp; 174 if (*cp == '/') { 175 while (*cp == '/') 176 cp++; 177 if ((dp = u.u_rdir) == NULL) 178 dp = rootdir; 179 } else 180 dp = u.u_cdir; 181 fs = dp->i_fs; 182 ILOCK(dp); 183 dp->i_count++; 184 ndp->ni_pdir = (struct inode *)0xc0000000; /* illegal */ 185 ndp->ni_endoff = 0; 186 187 /* 188 * We come to dirloop to search a new directory. 189 * The directory must be locked so that it can be 190 * iput, and fs must be already set to dp->i_fs. 191 */ 192 dirloop: 193 /* 194 * Check accessiblity of directory. 195 */ 196 if ((dp->i_mode&IFMT) != IFDIR) { 197 u.u_error = ENOTDIR; 198 goto bad; 199 } 200 if (access(dp, IEXEC)) 201 goto bad; 202 203 dirloop2: 204 /* 205 * Copy next component of name to ndp->ni_dent. 206 */ 207 hash = 0; 208 for (i = 0; *cp != 0 && *cp != '/'; cp++) { 209 if (i >= MAXNAMLEN) { 210 u.u_error = ENAMETOOLONG; 211 goto bad; 212 } 213 if (*cp & 0200) 214 if ((*cp&0377) == ('/'|0200) || flag != DELETE) { 215 u.u_error = EINVAL; 216 goto bad; 217 } 218 ndp->ni_dent.d_name[i++] = *cp; 219 hash += (unsigned char)*cp * i; 220 } 221 ndp->ni_dent.d_namlen = i; 222 ndp->ni_dent.d_name[i] = '\0'; 223 isdotdot = (i == 2 && 224 ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.'); 225 makeentry = 1; 226 if (*cp == '\0' && docache == 0) 227 makeentry = 0; 228 229 /* 230 * Check for degenerate name (e.g. / or "") 231 * which is a way of talking about a directory, 232 * e.g. like "/." or ".". 233 */ 234 if (ndp->ni_dent.d_name[0] == '\0') { 235 if (flag != LOOKUP || lockparent) { 236 u.u_error = EISDIR; 237 goto bad; 238 } 239 FREE(nbp, M_NAMEI); 240 return (dp); 241 } 242 243 /* 244 * We now have a segment name to search for, and a directory to search. 245 * 246 * Before tediously performing a linear scan of the directory, 247 * check the name cache to see if the directory/name pair 248 * we are looking for is known already. We don't do this 249 * if the segment name is long, simply so the cache can avoid 250 * holding long names (which would either waste space, or 251 * add greatly to the complexity). 252 */ 253 if (ndp->ni_dent.d_namlen > NCHNAMLEN) { 254 nchstats.ncs_long++; 255 makeentry = 0; 256 } else { 257 nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)]; 258 for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp; 259 ncp = ncp->nc_forw) { 260 if (ncp->nc_ino == dp->i_number && 261 ncp->nc_dev == dp->i_dev && 262 ncp->nc_nlen == ndp->ni_dent.d_namlen && 263 !bcmp(ncp->nc_name, ndp->ni_dent.d_name, 264 (unsigned)ncp->nc_nlen)) 265 break; 266 } 267 if (ncp == (struct namecache *)nhp) { 268 nchstats.ncs_miss++; 269 ncp = NULL; 270 } else { 271 if (ncp->nc_id != ncp->nc_ip->i_id) 272 nchstats.ncs_falsehits++; 273 else if (!makeentry) 274 nchstats.ncs_badhits++; 275 else { 276 /* 277 * move this slot to end of LRU 278 * chain, if not already there 279 */ 280 if (ncp->nc_nxt) { 281 /* remove from LRU chain */ 282 *ncp->nc_prev = ncp->nc_nxt; 283 ncp->nc_nxt->nc_prev = ncp->nc_prev; 284 285 /* and replace at end of it */ 286 ncp->nc_nxt = NULL; 287 ncp->nc_prev = nchtail; 288 *nchtail = ncp; 289 nchtail = &ncp->nc_nxt; 290 } 291 292 /* 293 * Get the next inode in the path. 294 * See comment above other `IUNLOCK' code for 295 * an explaination of the locking protocol. 296 */ 297 pdp = dp; 298 if (!isdotdot || dp != u.u_rdir) 299 dp = ncp->nc_ip; 300 if (dp == NULL) 301 panic("namei: null cache ino"); 302 if (pdp == dp) 303 dp->i_count++; 304 else if (isdotdot) { 305 IUNLOCK(pdp); 306 igrab(dp); 307 } else { 308 igrab(dp); 309 IUNLOCK(pdp); 310 } 311 312 /* 313 * Verify that the inode that we got 314 * did not change while we were waiting 315 * for it to be locked. 316 */ 317 if (ncp->nc_id != ncp->nc_ip->i_id) { 318 iput(dp); 319 ILOCK(pdp); 320 dp = pdp; 321 nchstats.ncs_falsehits++; 322 } else { 323 ndp->ni_dent.d_ino = dp->i_number; 324 /* ni_dent.d_reclen is garbage ... */ 325 nchstats.ncs_goodhits++; 326 goto haveino; 327 } 328 } 329 330 /* 331 * Last component and we are renaming or deleting, 332 * the cache entry is invalid, or otherwise don't 333 * want cache entry to exist. 334 */ 335 /* remove from LRU chain */ 336 *ncp->nc_prev = ncp->nc_nxt; 337 if (ncp->nc_nxt) 338 ncp->nc_nxt->nc_prev = ncp->nc_prev; 339 else 340 nchtail = ncp->nc_prev; 341 remque(ncp); /* remove from hash chain */ 342 /* insert at head of LRU list (first to grab) */ 343 ncp->nc_nxt = nchhead; 344 ncp->nc_prev = &nchhead; 345 nchhead->nc_prev = &ncp->nc_nxt; 346 nchhead = ncp; 347 /* and make a dummy hash chain */ 348 ncp->nc_forw = ncp; 349 ncp->nc_back = ncp; 350 ncp = NULL; 351 } 352 } 353 354 /* 355 * Suppress search for slots unless creating 356 * file and at end of pathname, in which case 357 * we watch for a place to put the new file in 358 * case it doesn't already exist. 359 */ 360 slotstatus = FOUND; 361 if (flag == CREATE && *cp == 0) { 362 slotstatus = NONE; 363 slotfreespace = 0; 364 slotneeded = DIRSIZ(&ndp->ni_dent); 365 } 366 /* 367 * If this is the same directory that this process 368 * previously searched, pick up where we last left off. 369 * We cache only lookups as these are the most common 370 * and have the greatest payoff. Caching CREATE has little 371 * benefit as it usually must search the entire directory 372 * to determine that the entry does not exist. Caching the 373 * location of the last DELETE has not reduced profiling time 374 * and hence has been removed in the interest of simplicity. 375 */ 376 if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber || 377 dp->i_dev != u.u_ncache.nc_dev) { 378 ndp->ni_offset = 0; 379 numdirpasses = 1; 380 } else { 381 if (u.u_ncache.nc_prevoffset > dp->i_size) 382 u.u_ncache.nc_prevoffset = 0; 383 ndp->ni_offset = u.u_ncache.nc_prevoffset; 384 entryoffsetinblock = blkoff(fs, ndp->ni_offset); 385 if (entryoffsetinblock != 0) { 386 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 387 if (bp == 0) 388 goto bad; 389 } 390 numdirpasses = 2; 391 nchstats.ncs_2passes++; 392 } 393 endsearch = roundup(dp->i_size, DIRBLKSIZ); 394 enduseful = 0; 395 396 searchloop: 397 while (ndp->ni_offset < endsearch) { 398 /* 399 * If offset is on a block boundary, 400 * read the next directory block. 401 * Release previous if it exists. 402 */ 403 if (blkoff(fs, ndp->ni_offset) == 0) { 404 if (bp != NULL) 405 brelse(bp); 406 bp = blkatoff(dp, ndp->ni_offset, (char **)0); 407 if (bp == 0) 408 goto bad; 409 entryoffsetinblock = 0; 410 } 411 /* 412 * If still looking for a slot, and at a DIRBLKSIZE 413 * boundary, have to start looking for free space again. 414 */ 415 if (slotstatus == NONE && 416 (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) { 417 slotoffset = -1; 418 slotfreespace = 0; 419 } 420 /* 421 * Get pointer to next entry. 422 * Full validation checks are slow, so we only check 423 * enough to insure forward progress through the 424 * directory. Complete checks can be run by patching 425 * "dirchk" to be true. 426 */ 427 ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock); 428 if (ep->d_reclen == 0 || 429 dirchk && dirbadentry(ep, entryoffsetinblock)) { 430 dirbad(dp, ndp->ni_offset, "mangled entry"); 431 i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)); 432 ndp->ni_offset += i; 433 entryoffsetinblock += i; 434 continue; 435 } 436 437 /* 438 * If an appropriate sized slot has not yet been found, 439 * check to see if one is available. Also accumulate space 440 * in the current block so that we can determine if 441 * compaction is viable. 442 */ 443 if (slotstatus != FOUND) { 444 int size = ep->d_reclen; 445 446 if (ep->d_ino != 0) 447 size -= DIRSIZ(ep); 448 if (size > 0) { 449 if (size >= slotneeded) { 450 slotstatus = FOUND; 451 slotoffset = ndp->ni_offset; 452 slotsize = ep->d_reclen; 453 } else if (slotstatus == NONE) { 454 slotfreespace += size; 455 if (slotoffset == -1) 456 slotoffset = ndp->ni_offset; 457 if (slotfreespace >= slotneeded) { 458 slotstatus = COMPACT; 459 slotsize = ndp->ni_offset + 460 ep->d_reclen - slotoffset; 461 } 462 } 463 } 464 } 465 466 /* 467 * Check for a name match. 468 */ 469 if (ep->d_ino) { 470 if (ep->d_namlen == ndp->ni_dent.d_namlen && 471 !bcmp(ndp->ni_dent.d_name, ep->d_name, 472 (unsigned)ep->d_namlen)) 473 goto found; 474 } 475 prevoff = ndp->ni_offset; 476 ndp->ni_offset += ep->d_reclen; 477 entryoffsetinblock += ep->d_reclen; 478 if (ep->d_ino) 479 enduseful = ndp->ni_offset; 480 } 481 /* notfound: */ 482 /* 483 * If we started in the middle of the directory and failed 484 * to find our target, we must check the beginning as well. 485 */ 486 if (numdirpasses == 2) { 487 numdirpasses--; 488 ndp->ni_offset = 0; 489 endsearch = u.u_ncache.nc_prevoffset; 490 goto searchloop; 491 } 492 /* 493 * If creating, and at end of pathname and current 494 * directory has not been removed, then can consider 495 * allowing file to be created. 496 */ 497 if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) { 498 /* 499 * Access for write is interpreted as allowing 500 * creation of files in the directory. 501 */ 502 if (access(dp, IWRITE)) 503 goto bad; 504 /* 505 * Return an indication of where the new directory 506 * entry should be put. If we didn't find a slot, 507 * then set ndp->ni_count to 0 indicating that the new 508 * slot belongs at the end of the directory. If we found 509 * a slot, then the new entry can be put in the range 510 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count) 511 */ 512 if (slotstatus == NONE) { 513 ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ); 514 ndp->ni_count = 0; 515 enduseful = ndp->ni_offset; 516 } else { 517 ndp->ni_offset = slotoffset; 518 ndp->ni_count = slotsize; 519 if (enduseful < slotoffset + slotsize) 520 enduseful = slotoffset + slotsize; 521 } 522 ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ); 523 dp->i_flag |= IUPD|ICHG; 524 if (bp) 525 brelse(bp); 526 FREE(nbp, M_NAMEI); 527 /* 528 * We return with the directory locked, so that 529 * the parameters we set up above will still be 530 * valid if we actually decide to do a direnter(). 531 * We return NULL to indicate that the entry doesn't 532 * currently exist, leaving a pointer to the (locked) 533 * directory inode in ndp->ni_pdir. 534 */ 535 ndp->ni_pdir = dp; 536 return (NULL); 537 } 538 u.u_error = ENOENT; 539 goto bad; 540 found: 541 if (numdirpasses == 2) 542 nchstats.ncs_pass2++; 543 /* 544 * Check that directory length properly reflects presence 545 * of this entry. 546 */ 547 if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) { 548 dirbad(dp, ndp->ni_offset, "i_size too small"); 549 dp->i_size = entryoffsetinblock + DIRSIZ(ep); 550 dp->i_flag |= IUPD|ICHG; 551 } 552 553 /* 554 * Found component in pathname. 555 * If the final component of path name, save information 556 * in the cache as to where the entry was found. 557 */ 558 if (*cp == '\0' && flag == LOOKUP) { 559 u.u_ncache.nc_prevoffset = ndp->ni_offset &~ (DIRBLKSIZ - 1); 560 u.u_ncache.nc_inumber = dp->i_number; 561 u.u_ncache.nc_dev = dp->i_dev; 562 } 563 /* 564 * Save directory entry's inode number and reclen in ndp->ni_dent, 565 * and release directory buffer. 566 */ 567 ndp->ni_dent.d_ino = ep->d_ino; 568 ndp->ni_dent.d_reclen = ep->d_reclen; 569 brelse(bp); 570 bp = NULL; 571 572 /* 573 * If deleting, and at end of pathname, return 574 * parameters which can be used to remove file. 575 * If the lockparent flag isn't set, we return only 576 * the directory (in ndp->ni_pdir), otherwise we go 577 * on and lock the inode, being careful with ".". 578 */ 579 if (flag == DELETE && *cp == 0) { 580 /* 581 * Write access to directory required to delete files. 582 */ 583 if (access(dp, IWRITE)) 584 goto bad; 585 ndp->ni_pdir = dp; /* for dirremove() */ 586 /* 587 * Return pointer to current entry in ndp->ni_offset, 588 * and distance past previous entry (if there 589 * is a previous entry in this block) in ndp->ni_count. 590 * Save directory inode pointer in ndp->ni_pdir for dirremove(). 591 */ 592 if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0) 593 ndp->ni_count = 0; 594 else 595 ndp->ni_count = ndp->ni_offset - prevoff; 596 if (lockparent) { 597 if (dp->i_number == ndp->ni_dent.d_ino) 598 dp->i_count++; 599 else { 600 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 601 if (dp == NULL) { 602 iput(ndp->ni_pdir); 603 goto bad; 604 } 605 /* 606 * If directory is "sticky", then user must own 607 * the directory, or the file in it, else he 608 * may not delete it (unless he's root). This 609 * implements append-only directories. 610 */ 611 if ((ndp->ni_pdir->i_mode & ISVTX) && 612 u.u_uid != 0 && 613 u.u_uid != ndp->ni_pdir->i_uid && 614 dp->i_uid != u.u_uid) { 615 iput(ndp->ni_pdir); 616 u.u_error = EPERM; 617 goto bad; 618 } 619 } 620 } 621 FREE(nbp, M_NAMEI); 622 return (dp); 623 } 624 625 /* 626 * Special handling for ".." allowing chdir out of mounted 627 * file system: indirect .. in root inode to reevaluate 628 * in directory file system was mounted on. 629 */ 630 if (isdotdot) { 631 if (dp == u.u_rdir) { 632 ndp->ni_dent.d_ino = dp->i_number; 633 makeentry = 0; 634 } else if (ndp->ni_dent.d_ino == ROOTINO && 635 dp->i_number == ROOTINO) { 636 for (i = 1; i < NMOUNT; i++) 637 if (mount[i].m_fs != NULL && 638 mount[i].m_dev == dp->i_dev) { 639 iput(dp); 640 dp = mount[i].m_inodp; 641 ILOCK(dp); 642 dp->i_count++; 643 fs = dp->i_fs; 644 cp -= 2; /* back over .. */ 645 goto dirloop2; 646 } 647 } 648 } 649 650 /* 651 * If rewriting (rename), return the inode and the 652 * information required to rewrite the present directory 653 * Must get inode of directory entry to verify it's a 654 * regular file, or empty directory. 655 */ 656 if ((flag == CREATE && lockparent) && *cp == 0) { 657 if (access(dp, IWRITE)) 658 goto bad; 659 ndp->ni_pdir = dp; /* for dirrewrite() */ 660 /* 661 * Careful about locking second inode. 662 * This can only occur if the target is ".". 663 */ 664 if (dp->i_number == ndp->ni_dent.d_ino) { 665 u.u_error = EISDIR; /* XXX */ 666 goto bad; 667 } 668 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 669 if (dp == NULL) { 670 iput(ndp->ni_pdir); 671 goto bad; 672 } 673 FREE(nbp, M_NAMEI); 674 return (dp); 675 } 676 677 /* 678 * Check for symbolic link, which may require us to massage the 679 * name before we continue translation. We do not `iput' the 680 * directory because we may need it again if the symbolic link 681 * is relative to the current directory. Instead we save it 682 * unlocked as "pdp". We must get the target inode before unlocking 683 * the directory to insure that the inode will not be removed 684 * before we get it. We prevent deadlock by always fetching 685 * inodes from the root, moving down the directory tree. Thus 686 * when following backward pointers ".." we must unlock the 687 * parent directory before getting the requested directory. 688 * There is a potential race condition here if both the current 689 * and parent directories are removed before the `iget' for the 690 * inode associated with ".." returns. We hope that this occurs 691 * infrequently since we cannot avoid this race condition without 692 * implementing a sophisticated deadlock detection algorithm. 693 * Note also that this simple deadlock detection scheme will not 694 * work if the file system has any hard links other than ".." 695 * that point backwards in the directory structure. 696 */ 697 pdp = dp; 698 if (isdotdot) { 699 IUNLOCK(pdp); /* race to get the inode */ 700 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 701 if (dp == NULL) 702 goto bad2; 703 } else if (dp->i_number == ndp->ni_dent.d_ino) { 704 dp->i_count++; /* we want ourself, ie "." */ 705 } else { 706 dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino); 707 IUNLOCK(pdp); 708 if (dp == NULL) 709 goto bad2; 710 } 711 712 /* 713 * Insert name into cache if appropriate. 714 */ 715 if (makeentry) { 716 if (ncp != NULL) 717 panic("namei: duplicating cache"); 718 /* 719 * Free the cache slot at head of lru chain. 720 */ 721 if (ncp = nchhead) { 722 /* remove from lru chain */ 723 *ncp->nc_prev = ncp->nc_nxt; 724 if (ncp->nc_nxt) 725 ncp->nc_nxt->nc_prev = ncp->nc_prev; 726 else 727 nchtail = ncp->nc_prev; 728 remque(ncp); /* remove from old hash chain */ 729 /* grab the inode we just found */ 730 ncp->nc_ip = dp; 731 /* fill in cache info */ 732 ncp->nc_ino = pdp->i_number; /* parents inum */ 733 ncp->nc_dev = pdp->i_dev; /* & device */ 734 ncp->nc_idev = dp->i_dev; /* our device */ 735 ncp->nc_id = dp->i_id; /* identifier */ 736 ncp->nc_nlen = ndp->ni_dent.d_namlen; 737 bcopy(ndp->ni_dent.d_name, ncp->nc_name, 738 (unsigned)ncp->nc_nlen); 739 /* link at end of lru chain */ 740 ncp->nc_nxt = NULL; 741 ncp->nc_prev = nchtail; 742 *nchtail = ncp; 743 nchtail = &ncp->nc_nxt; 744 /* and insert on hash chain */ 745 insque(ncp, nhp); 746 } 747 } 748 749 haveino: 750 fs = dp->i_fs; 751 752 /* 753 * Check for symbolic link 754 */ 755 if ((dp->i_mode & IFMT) == IFLNK && 756 ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) { 757 u_int pathlen = strlen(cp) + 1; 758 759 if (dp->i_size + pathlen >= MAXPATHLEN - 1) { 760 u.u_error = ENAMETOOLONG; 761 goto bad2; 762 } 763 if (++nlink > MAXSYMLINKS) { 764 u.u_error = ELOOP; 765 goto bad2; 766 } 767 ovbcopy(cp, nbp + dp->i_size, pathlen); 768 u.u_error = 769 rdwri(UIO_READ, dp, nbp, (int)dp->i_size, 770 (off_t)0, 1, (int *)0); 771 if (u.u_error) 772 goto bad2; 773 cp = nbp; 774 iput(dp); 775 if (*cp == '/') { 776 irele(pdp); 777 while (*cp == '/') 778 cp++; 779 if ((dp = u.u_rdir) == NULL) 780 dp = rootdir; 781 ILOCK(dp); 782 dp->i_count++; 783 } else { 784 dp = pdp; 785 ILOCK(dp); 786 } 787 fs = dp->i_fs; 788 goto dirloop; 789 } 790 791 /* 792 * Not a symbolic link. If more pathname, 793 * continue at next component, else return. 794 */ 795 if (*cp == '/') { 796 while (*cp == '/') 797 cp++; 798 irele(pdp); 799 goto dirloop; 800 } 801 FREE(nbp, M_NAMEI); 802 if (lockparent) 803 ndp->ni_pdir = pdp; 804 else 805 irele(pdp); 806 return (dp); 807 bad2: 808 irele(pdp); 809 bad: 810 if (bp) 811 brelse(bp); 812 if (dp) 813 iput(dp); 814 FREE(nbp, M_NAMEI); 815 return (NULL); 816 } 817 818 819 dirbad(ip, offset, how) 820 struct inode *ip; 821 off_t offset; 822 char *how; 823 { 824 825 printf("%s: bad dir ino %d at offset %d: %s\n", 826 ip->i_fs->fs_fsmnt, ip->i_number, offset, how); 827 } 828 829 /* 830 * Do consistency checking on a directory entry: 831 * record length must be multiple of 4 832 * entry must fit in rest of its DIRBLKSIZ block 833 * record must be large enough to contain entry 834 * name is not longer than MAXNAMLEN 835 * name must be as long as advertised, and null terminated 836 */ 837 dirbadentry(ep, entryoffsetinblock) 838 register struct direct *ep; 839 int entryoffsetinblock; 840 { 841 register int i; 842 843 if ((ep->d_reclen & 0x3) != 0 || 844 ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) || 845 ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN) 846 return (1); 847 for (i = 0; i < ep->d_namlen; i++) 848 if (ep->d_name[i] == '\0') 849 return (1); 850 return (ep->d_name[i]); 851 } 852 853 /* 854 * Write a directory entry after a call to namei, using the parameters 855 * which it left in the u. area. The argument ip is the inode which 856 * the new directory entry will refer to. The u. area field ndp->ni_pdir is 857 * a pointer to the directory to be written, which was left locked by 858 * namei. Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate 859 * how the space for the new entry is to be gotten. 860 */ 861 direnter(ip, ndp) 862 struct inode *ip; 863 register struct nameidata *ndp; 864 { 865 register struct direct *ep, *nep; 866 register struct inode *dp = ndp->ni_pdir; 867 struct buf *bp; 868 int loc, spacefree, error = 0; 869 u_int dsize; 870 int newentrysize; 871 char *dirbuf; 872 873 ndp->ni_dent.d_ino = ip->i_number; 874 newentrysize = DIRSIZ(&ndp->ni_dent); 875 if (ndp->ni_count == 0) { 876 /* 877 * If ndp->ni_count is 0, then namei could find no space in the 878 * directory. In this case ndp->ni_offset will be on a directory 879 * block boundary and we will write the new entry into a fresh 880 * block. 881 */ 882 if (ndp->ni_offset&(DIRBLKSIZ-1)) 883 panic("wdir: newblk"); 884 ndp->ni_dent.d_reclen = DIRBLKSIZ; 885 error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 886 newentrysize, ndp->ni_offset, 1, (int *)0); 887 if (DIRBLKSIZ > dp->i_fs->fs_fsize) 888 panic("wdir: blksize"); /* XXX - should grow w/bmap() */ 889 else 890 dp->i_size = roundup(dp->i_size, DIRBLKSIZ); 891 iput(dp); 892 return (error); 893 } 894 895 /* 896 * If ndp->ni_count is non-zero, then namei found space for the new 897 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count. 898 * in the directory. To use this space, we may have to compact 899 * the entries located there, by copying them together towards 900 * the beginning of the block, leaving the free space in 901 * one usable chunk at the end. 902 */ 903 904 /* 905 * Increase size of directory if entry eats into new space. 906 * This should never push the size past a new multiple of 907 * DIRBLKSIZE. 908 * 909 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN. 910 */ 911 if (ndp->ni_offset + ndp->ni_count > dp->i_size) 912 dp->i_size = ndp->ni_offset + ndp->ni_count; 913 /* 914 * Get the block containing the space for the new directory 915 * entry. Should return error by result instead of u.u_error. 916 */ 917 bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf); 918 if (bp == 0) { 919 iput(dp); 920 return (u.u_error); 921 } 922 /* 923 * Find space for the new entry. In the simple case, the 924 * entry at offset base will have the space. If it does 925 * not, then namei arranged that compacting the region 926 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space. 927 */ 928 ep = (struct direct *)dirbuf; 929 dsize = DIRSIZ(ep); 930 spacefree = ep->d_reclen - dsize; 931 for (loc = ep->d_reclen; loc < ndp->ni_count; ) { 932 nep = (struct direct *)(dirbuf + loc); 933 if (ep->d_ino) { 934 /* trim the existing slot */ 935 ep->d_reclen = dsize; 936 ep = (struct direct *)((char *)ep + dsize); 937 } else { 938 /* overwrite; nothing there; header is ours */ 939 spacefree += dsize; 940 } 941 dsize = DIRSIZ(nep); 942 spacefree += nep->d_reclen - dsize; 943 loc += nep->d_reclen; 944 bcopy((caddr_t)nep, (caddr_t)ep, dsize); 945 } 946 /* 947 * Update the pointer fields in the previous entry (if any), 948 * copy in the new entry, and write out the block. 949 */ 950 if (ep->d_ino == 0) { 951 if (spacefree + dsize < newentrysize) 952 panic("wdir: compact1"); 953 ndp->ni_dent.d_reclen = spacefree + dsize; 954 } else { 955 if (spacefree < newentrysize) 956 panic("wdir: compact2"); 957 ndp->ni_dent.d_reclen = spacefree; 958 ep->d_reclen = dsize; 959 ep = (struct direct *)((char *)ep + dsize); 960 } 961 bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize); 962 bwrite(bp); 963 dp->i_flag |= IUPD|ICHG; 964 if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size) 965 itrunc(dp, (u_long)ndp->ni_endoff); 966 iput(dp); 967 return (error); 968 } 969 970 /* 971 * Remove a directory entry after a call to namei, using the 972 * parameters which it left in the u. area. The u. entry 973 * ni_offset contains the offset into the directory of the 974 * entry to be eliminated. The ni_count field contains the 975 * size of the previous record in the directory. If this 976 * is 0, the first entry is being deleted, so we need only 977 * zero the inode number to mark the entry as free. If the 978 * entry isn't the first in the directory, we must reclaim 979 * the space of the now empty record by adding the record size 980 * to the size of the previous entry. 981 */ 982 dirremove(ndp) 983 register struct nameidata *ndp; 984 { 985 register struct inode *dp = ndp->ni_pdir; 986 register struct buf *bp; 987 struct direct *ep; 988 989 if (ndp->ni_count == 0) { 990 /* 991 * First entry in block: set d_ino to zero. 992 */ 993 ndp->ni_dent.d_ino = 0; 994 (void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 995 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 996 } else { 997 /* 998 * Collapse new free space into previous entry. 999 */ 1000 bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep); 1001 if (bp == 0) 1002 return (0); 1003 ep->d_reclen += ndp->ni_dent.d_reclen; 1004 bwrite(bp); 1005 dp->i_flag |= IUPD|ICHG; 1006 } 1007 return (1); 1008 } 1009 1010 /* 1011 * Rewrite an existing directory entry to point at the inode 1012 * supplied. The parameters describing the directory entry are 1013 * set up by a call to namei. 1014 */ 1015 dirrewrite(dp, ip, ndp) 1016 struct inode *dp, *ip; 1017 struct nameidata *ndp; 1018 { 1019 1020 ndp->ni_dent.d_ino = ip->i_number; 1021 u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent, 1022 (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0); 1023 iput(dp); 1024 } 1025 1026 /* 1027 * Return buffer with contents of block "offset" 1028 * from the beginning of directory "ip". If "res" 1029 * is non-zero, fill it in with a pointer to the 1030 * remaining space in the directory. 1031 */ 1032 struct buf * 1033 blkatoff(ip, offset, res) 1034 struct inode *ip; 1035 off_t offset; 1036 char **res; 1037 { 1038 register struct fs *fs = ip->i_fs; 1039 daddr_t lbn = lblkno(fs, offset); 1040 int bsize = blksize(fs, ip, lbn); 1041 register struct buf *bp; 1042 daddr_t bn; 1043 1044 bn = bmap(ip, lbn, B_READ, bsize); 1045 if (u.u_error) 1046 return (0); 1047 if (bn == (daddr_t)-1) { 1048 dirbad(ip, offset, "hole in dir"); 1049 return (0); 1050 } 1051 bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize); 1052 if (bp->b_flags & B_ERROR) { 1053 brelse(bp); 1054 return (0); 1055 } 1056 if (res) 1057 *res = bp->b_un.b_addr + blkoff(fs, offset); 1058 return (bp); 1059 } 1060 1061 /* 1062 * Check if a directory is empty or not. 1063 * Inode supplied must be locked. 1064 * 1065 * Using a struct dirtemplate here is not precisely 1066 * what we want, but better than using a struct direct. 1067 * 1068 * NB: does not handle corrupted directories. 1069 */ 1070 dirempty(ip, parentino) 1071 register struct inode *ip; 1072 ino_t parentino; 1073 { 1074 register off_t off; 1075 struct dirtemplate dbuf; 1076 register struct direct *dp = (struct direct *)&dbuf; 1077 int error, count; 1078 #define MINDIRSIZ (sizeof (struct dirtemplate) / 2) 1079 1080 for (off = 0; off < ip->i_size; off += dp->d_reclen) { 1081 error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ, 1082 off, 1, &count); 1083 /* 1084 * Since we read MINDIRSIZ, residual must 1085 * be 0 unless we're at end of file. 1086 */ 1087 if (error || count != 0) 1088 return (0); 1089 /* avoid infinite loops */ 1090 if (dp->d_reclen == 0) 1091 return (0); 1092 /* skip empty entries */ 1093 if (dp->d_ino == 0) 1094 continue; 1095 /* accept only "." and ".." */ 1096 if (dp->d_namlen > 2) 1097 return (0); 1098 if (dp->d_name[0] != '.') 1099 return (0); 1100 /* 1101 * At this point d_namlen must be 1 or 2. 1102 * 1 implies ".", 2 implies ".." if second 1103 * char is also "." 1104 */ 1105 if (dp->d_namlen == 1) 1106 continue; 1107 if (dp->d_name[1] == '.' && dp->d_ino == parentino) 1108 continue; 1109 return (0); 1110 } 1111 return (1); 1112 } 1113 1114 /* 1115 * Check if source directory is in the path of the target directory. 1116 * Target is supplied locked, source is unlocked. 1117 * The target is always iput() before returning. 1118 */ 1119 checkpath(source, target) 1120 struct inode *source, *target; 1121 { 1122 struct dirtemplate dirbuf; 1123 register struct inode *ip; 1124 int error = 0; 1125 1126 ip = target; 1127 if (ip->i_number == source->i_number) { 1128 error = EEXIST; 1129 goto out; 1130 } 1131 if (ip->i_number == ROOTINO) 1132 goto out; 1133 1134 for (;;) { 1135 if ((ip->i_mode&IFMT) != IFDIR) { 1136 error = ENOTDIR; 1137 break; 1138 } 1139 error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf, 1140 sizeof (struct dirtemplate), (off_t)0, 1, (int *)0); 1141 if (error != 0) 1142 break; 1143 if (dirbuf.dotdot_namlen != 2 || 1144 dirbuf.dotdot_name[0] != '.' || 1145 dirbuf.dotdot_name[1] != '.') { 1146 error = ENOTDIR; 1147 break; 1148 } 1149 if (dirbuf.dotdot_ino == source->i_number) { 1150 error = EINVAL; 1151 break; 1152 } 1153 if (dirbuf.dotdot_ino == ROOTINO) 1154 break; 1155 iput(ip); 1156 ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino); 1157 if (ip == NULL) { 1158 error = u.u_error; 1159 break; 1160 } 1161 } 1162 1163 out: 1164 if (error == ENOTDIR) 1165 printf("checkpath: .. not a directory\n"); 1166 if (ip != NULL) 1167 iput(ip); 1168 return (error); 1169 } 1170 1171 /* 1172 * Name cache initialization, from main() when we are booting 1173 */ 1174 nchinit() 1175 { 1176 register union nchash *nchp; 1177 register struct namecache *ncp; 1178 1179 nchhead = 0; 1180 nchtail = &nchhead; 1181 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) { 1182 ncp->nc_forw = ncp; /* hash chain */ 1183 ncp->nc_back = ncp; 1184 ncp->nc_nxt = NULL; /* lru chain */ 1185 *nchtail = ncp; 1186 ncp->nc_prev = nchtail; 1187 nchtail = &ncp->nc_nxt; 1188 /* all else is zero already */ 1189 } 1190 for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) { 1191 nchp->nch_head[0] = nchp; 1192 nchp->nch_head[1] = nchp; 1193 } 1194 } 1195 1196 /* 1197 * Cache flush, called when filesys is umounted to 1198 * remove entries that would now be invalid 1199 * 1200 * The line "nxtcp = nchhead" near the end is to avoid potential problems 1201 * if the cache lru chain is modified while we are dumping the 1202 * inode. This makes the algorithm O(n^2), but do you think I care? 1203 */ 1204 nchinval(dev) 1205 register dev_t dev; 1206 { 1207 register struct namecache *ncp, *nxtcp; 1208 1209 for (ncp = nchhead; ncp; ncp = nxtcp) { 1210 nxtcp = ncp->nc_nxt; 1211 if (ncp->nc_ip == NULL || 1212 (ncp->nc_idev != dev && ncp->nc_dev != dev)) 1213 continue; 1214 /* free the resources we had */ 1215 ncp->nc_idev = NODEV; 1216 ncp->nc_dev = NODEV; 1217 ncp->nc_id = NULL; 1218 ncp->nc_ino = 0; 1219 ncp->nc_ip = NULL; 1220 remque(ncp); /* remove entry from its hash chain */ 1221 ncp->nc_forw = ncp; /* and make a dummy one */ 1222 ncp->nc_back = ncp; 1223 /* delete this entry from LRU chain */ 1224 *ncp->nc_prev = nxtcp; 1225 if (nxtcp) 1226 nxtcp->nc_prev = ncp->nc_prev; 1227 else 1228 nchtail = ncp->nc_prev; 1229 /* cause rescan of list, it may have altered */ 1230 nxtcp = nchhead; 1231 /* put the now-free entry at head of LRU */ 1232 ncp->nc_nxt = nxtcp; 1233 ncp->nc_prev = &nchhead; 1234 nxtcp->nc_prev = &ncp->nc_nxt; 1235 nchhead = ncp; 1236 } 1237 } 1238 1239 /* 1240 * Name cache invalidation of all entries. 1241 */ 1242 cacheinvalall() 1243 { 1244 register struct namecache *ncp; 1245 1246 for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) 1247 ncp->nc_id = 0; 1248 } 1249