xref: /original-bsd/sys/ufs/ffs/ufs_lookup.c (revision 05e388f5)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)ufs_lookup.c	7.1 (Berkeley) 06/05/86
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "inode.h"
12 #include "fs.h"
13 #include "mount.h"
14 #include "dir.h"
15 #include "user.h"
16 #include "buf.h"
17 #include "conf.h"
18 #include "uio.h"
19 #include "kernel.h"
20 
21 struct	buf *blkatoff();
22 struct	buf *freenamebuf;
23 int	dirchk = 0;
24 
25 /*
26  * Structures associated with name cacheing.
27  */
28 #define	NCHHASH		32	/* size of hash table */
29 
30 #if	((NCHHASH)&((NCHHASH)-1)) != 0
31 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
32 #else
33 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
34 #endif
35 
36 union nchash {
37 	union	nchash *nch_head[2];
38 	struct	namecache *nch_chain[2];
39 } nchash[NCHHASH];
40 #define	nch_forw	nch_chain[0]
41 #define	nch_back	nch_chain[1]
42 
43 struct	namecache *nchhead, **nchtail;	/* LRU chain pointers */
44 struct	nchstats nchstats;		/* cache effectiveness statistics */
45 
46 /*
47  * Convert a pathname into a pointer to a locked inode.
48  * This is a very central and rather complicated routine.
49  * If the file system is not maintained in a strict tree hierarchy,
50  * this can result in a deadlock situation (see comments in code below).
51  *
52  * The flag argument is LOOKUP, CREATE, or DELETE depending on whether
53  * the name is to be looked up, created, or deleted. When CREATE or
54  * DELETE is specified, information usable in creating or deleteing a
55  * directory entry is also calculated. If flag has LOCKPARENT or'ed
56  * into it and the target of the pathname exists, namei returns both
57  * the target and its parent directory locked. When creating and
58  * LOCKPARENT is specified, the target may not be ".".  When deleting
59  * and LOCKPARENT is specified, the target may be ".", but the caller
60  * must check to insure it does an irele and iput instead of two iputs.
61  *
62  * The FOLLOW flag is set when symbolic links are to be followed
63  * when they occur at the end of the name translation process.
64  * Symbolic links are always followed for all other pathname
65  * components other than the last.
66  *
67  * The segflg defines whether the name is to be copied from user
68  * space or kernel space.
69  *
70  * Name caching works as follows:
71  *
72  * Names found by directory scans are retained in a cache
73  * for future reference.  It is managed LRU, so frequently
74  * used names will hang around.  Cache is indexed by hash value
75  * obtained from (ino,dev,name) where ino & dev refer to the
76  * directory containing name.
77  *
78  * For simplicity (and economy of storage), names longer than
79  * a maximum length of NCHNAMLEN are not cached; they occur
80  * infrequently in any case, and are almost never of interest.
81  *
82  * Upon reaching the last segment of a path, if the reference
83  * is for DELETE, or NOCACHE is set (rewrite), and the
84  * name is located in the cache, it will be dropped.
85  *
86  * Overall outline of namei:
87  *
88  *	copy in name
89  *	get starting directory
90  * dirloop:
91  *	check accessibility of directory
92  * dirloop2:
93  *	copy next component of name to ndp->ni_dent
94  *	handle degenerate case where name is null string
95  *	look for name in cache, if found, then if at end of path
96  *	  and deleting or creating, drop it, else to haveino
97  *	search for name in directory, to found or notfound
98  * notfound:
99  *	if creating, return locked directory, leaving info on avail. slots
100  *	else return error
101  * found:
102  *	if at end of path and deleting, return information to allow delete
103  *	if at end of path and rewriting (CREATE and LOCKPARENT), lock target
104  *	  inode and return info to allow rewrite
105  *	if .. and on mounted filesys, look in mount table for parent
106  *	if not at end, add name to cache; if at end and neither creating
107  *	  nor deleting, add name to cache
108  * haveino:
109  *	if symbolic link, massage name in buffer and continue at dirloop
110  *	if more components of name, do next level at dirloop
111  *	return the answer as locked inode
112  *
113  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
114  *	 but unlocked.
115  */
116 struct inode *
117 namei(ndp)
118 	register struct nameidata *ndp;
119 {
120 	register char *cp;		/* pointer into pathname argument */
121 /* these variables refer to things which must be freed or unlocked */
122 	register struct inode *dp = 0;	/* the directory we are searching */
123 	register struct namecache *ncp;	/* cache slot for entry */
124 	register struct fs *fs;		/* file system that directory is in */
125 	register struct buf *bp = 0;	/* a buffer of directory entries */
126 	register struct direct *ep;	/* the current directory entry */
127 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
128 	register struct buf *nbp;	/* buffer storing path name argument */
129 /* these variables hold information about the search for a slot */
130 	enum {NONE, COMPACT, FOUND} slotstatus;
131 	int slotoffset = -1;		/* offset of area with free space */
132 	int slotsize;			/* size of area at slotoffset */
133 	int slotfreespace;		/* amount of space free in slot */
134 	int slotneeded;			/* size of the entry we're seeking */
135 /* */
136 	int numdirpasses;		/* strategy for directory search */
137 	int endsearch;			/* offset to end directory search */
138 	int prevoff;			/* ndp->ni_offset of previous entry */
139 	int nlink = 0;			/* number of symbolic links taken */
140 	struct inode *pdp;		/* saved dp during symlink work */
141 	int error, i;
142 	int lockparent;
143 	int docache;			/* == 0 do not cache last component */
144 	int makeentry;			/* != 0 if name to be added to cache */
145 	unsigned hash;			/* value of name hash for entry */
146 	union nchash *nhp;		/* cache chain head for entry */
147 	int isdotdot;			/* != 0 if current name is ".." */
148 	int flag;			/* op ie, LOOKUP, CREATE, or DELETE */
149 	off_t enduseful;		/* pointer past last used dir slot */
150 
151 	lockparent = ndp->ni_nameiop & LOCKPARENT;
152 	docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE;
153 	flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
154 	if (flag == DELETE || lockparent)
155 		docache = 0;
156 	/*
157 	 * Get a buffer for the name to be translated, and copy the
158 	 * name into the buffer.
159 	 */
160 	nbp = freenamebuf;
161 	if (nbp == NULL)
162 		nbp = geteblk(MAXPATHLEN);
163 	else
164 		freenamebuf = nbp->av_forw;
165 	if (ndp->ni_segflg == UIO_SYSSPACE)
166 		error = copystr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
167 		    (u_int *)0);
168 	else
169 		error = copyinstr(ndp->ni_dirp, nbp->b_un.b_addr, MAXPATHLEN,
170 		    (u_int *)0);
171 	if (error) {
172 		u.u_error = error;
173 		goto bad;
174 	}
175 
176 	/*
177 	 * Get starting directory.
178 	 */
179 	cp = nbp->b_un.b_addr;
180 	if (*cp == '/') {
181 		while (*cp == '/')
182 			cp++;
183 		if ((dp = u.u_rdir) == NULL)
184 			dp = rootdir;
185 	} else
186 		dp = u.u_cdir;
187 	fs = dp->i_fs;
188 	ILOCK(dp);
189 	dp->i_count++;
190 	ndp->ni_pdir = (struct inode *)0xc0000000;		/* illegal */
191 	ndp->ni_endoff = 0;
192 
193 	/*
194 	 * We come to dirloop to search a new directory.
195 	 * The directory must be locked so that it can be
196 	 * iput, and fs must be already set to dp->i_fs.
197 	 */
198 dirloop:
199 	/*
200 	 * Check accessiblity of directory.
201 	 */
202 	if ((dp->i_mode&IFMT) != IFDIR) {
203 		u.u_error = ENOTDIR;
204 		goto bad;
205 	}
206 	if (access(dp, IEXEC))
207 		goto bad;
208 
209 dirloop2:
210 	/*
211 	 * Copy next component of name to ndp->ni_dent.
212 	 */
213 	hash = 0;
214 	for (i = 0; *cp != 0 && *cp != '/'; cp++) {
215 		if (i >= MAXNAMLEN) {
216 			u.u_error = ENAMETOOLONG;
217 			goto bad;
218 		}
219 		if (*cp & 0200)
220 			if ((*cp&0377) == ('/'|0200) || flag != DELETE) {
221 				u.u_error = EINVAL;
222 				goto bad;
223 			}
224 		ndp->ni_dent.d_name[i++] = *cp;
225 		hash += (unsigned char)*cp * i;
226 	}
227 	ndp->ni_dent.d_namlen = i;
228 	ndp->ni_dent.d_name[i] = '\0';
229 	isdotdot = (i == 2 &&
230 		ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.');
231 	makeentry = 1;
232 	if (*cp == '\0' && docache == 0)
233 		makeentry = 0;
234 
235 	/*
236 	 * Check for degenerate name (e.g. / or "")
237 	 * which is a way of talking about a directory,
238 	 * e.g. like "/." or ".".
239 	 */
240 	if (ndp->ni_dent.d_name[0] == '\0') {
241 		if (flag != LOOKUP || lockparent) {
242 			u.u_error = EISDIR;
243 			goto bad;
244 		}
245 		nbp->av_forw = freenamebuf;
246 		freenamebuf = nbp;
247 		return (dp);
248 	}
249 
250 	/*
251 	 * We now have a segment name to search for, and a directory to search.
252 	 *
253 	 * Before tediously performing a linear scan of the directory,
254 	 * check the name cache to see if the directory/name pair
255 	 * we are looking for is known already.  We don't do this
256 	 * if the segment name is long, simply so the cache can avoid
257 	 * holding long names (which would either waste space, or
258 	 * add greatly to the complexity).
259 	 */
260 	if (ndp->ni_dent.d_namlen > NCHNAMLEN) {
261 		nchstats.ncs_long++;
262 		makeentry = 0;
263 	} else {
264 		nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
265 		for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp;
266 		    ncp = ncp->nc_forw) {
267 			if (ncp->nc_ino == dp->i_number &&
268 			    ncp->nc_dev == dp->i_dev &&
269 			    ncp->nc_nlen == ndp->ni_dent.d_namlen &&
270 			    !bcmp(ncp->nc_name, ndp->ni_dent.d_name,
271 				(unsigned)ncp->nc_nlen))
272 				break;
273 		}
274 		if (ncp == (struct namecache *)nhp) {
275 			nchstats.ncs_miss++;
276 			ncp = NULL;
277 		} else {
278 			if (ncp->nc_id != ncp->nc_ip->i_id)
279 				nchstats.ncs_falsehits++;
280 			else if (!makeentry)
281 				nchstats.ncs_badhits++;
282 			else {
283 				/*
284 				 * move this slot to end of LRU
285 				 * chain, if not already there
286 				 */
287 				if (ncp->nc_nxt) {
288 					/* remove from LRU chain */
289 					*ncp->nc_prev = ncp->nc_nxt;
290 					ncp->nc_nxt->nc_prev = ncp->nc_prev;
291 
292 					/* and replace at end of it */
293 					ncp->nc_nxt = NULL;
294 					ncp->nc_prev = nchtail;
295 					*nchtail = ncp;
296 					nchtail = &ncp->nc_nxt;
297 				}
298 
299 				/*
300 				 * Get the next inode in the path.
301 				 * See comment above other `IUNLOCK' code for
302 				 * an explaination of the locking protocol.
303 				 */
304 				pdp = dp;
305 				if (!isdotdot || dp != u.u_rdir)
306 					dp = ncp->nc_ip;
307 				if (dp == NULL)
308 					panic("namei: null cache ino");
309 				if (pdp == dp)
310 					dp->i_count++;
311 				else if (isdotdot) {
312 					IUNLOCK(pdp);
313 					igrab(dp);
314 				} else {
315 					igrab(dp);
316 					IUNLOCK(pdp);
317 				}
318 
319 				/*
320 				 * Verify that the inode that we got
321 				 * did not change while we were waiting
322 				 * for it to be locked.
323 				 */
324 				if (ncp->nc_id != ncp->nc_ip->i_id) {
325 					iput(dp);
326 					ILOCK(pdp);
327 					dp = pdp;
328 					nchstats.ncs_falsehits++;
329 				} else {
330 					ndp->ni_dent.d_ino = dp->i_number;
331 					/* ni_dent.d_reclen is garbage ... */
332 					nchstats.ncs_goodhits++;
333 					goto haveino;
334 				}
335 			}
336 
337 			/*
338 			 * Last component and we are renaming or deleting,
339 			 * the cache entry is invalid, or otherwise don't
340 			 * want cache entry to exist.
341 			 */
342 			/* remove from LRU chain */
343 			*ncp->nc_prev = ncp->nc_nxt;
344 			if (ncp->nc_nxt)
345 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
346 			else
347 				nchtail = ncp->nc_prev;
348 			remque(ncp);		/* remove from hash chain */
349 			/* insert at head of LRU list (first to grab) */
350 			ncp->nc_nxt = nchhead;
351 			ncp->nc_prev = &nchhead;
352 			nchhead->nc_prev = &ncp->nc_nxt;
353 			nchhead = ncp;
354 			/* and make a dummy hash chain */
355 			ncp->nc_forw = ncp;
356 			ncp->nc_back = ncp;
357 			ncp = NULL;
358 		}
359 	}
360 
361 	/*
362 	 * Suppress search for slots unless creating
363 	 * file and at end of pathname, in which case
364 	 * we watch for a place to put the new file in
365 	 * case it doesn't already exist.
366 	 */
367 	slotstatus = FOUND;
368 	if (flag == CREATE && *cp == 0) {
369 		slotstatus = NONE;
370 		slotfreespace = 0;
371 		slotneeded = DIRSIZ(&ndp->ni_dent);
372 	}
373 	/*
374 	 * If this is the same directory that this process
375 	 * previously searched, pick up where we last left off.
376 	 * We cache only lookups as these are the most common
377 	 * and have the greatest payoff. Caching CREATE has little
378 	 * benefit as it usually must search the entire directory
379 	 * to determine that the entry does not exist. Caching the
380 	 * location of the last DELETE has not reduced profiling time
381 	 * and hence has been removed in the interest of simplicity.
382 	 */
383 	if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
384 	    dp->i_dev != u.u_ncache.nc_dev) {
385 		ndp->ni_offset = 0;
386 		numdirpasses = 1;
387 	} else {
388 		if (u.u_ncache.nc_prevoffset > dp->i_size)
389 			u.u_ncache.nc_prevoffset = 0;
390 		ndp->ni_offset = u.u_ncache.nc_prevoffset;
391 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
392 		if (entryoffsetinblock != 0) {
393 			bp = blkatoff(dp, ndp->ni_offset, (char **)0);
394 			if (bp == 0)
395 				goto bad;
396 		}
397 		numdirpasses = 2;
398 		nchstats.ncs_2passes++;
399 	}
400 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
401 	enduseful = 0;
402 
403 searchloop:
404 	while (ndp->ni_offset < endsearch) {
405 		/*
406 		 * If offset is on a block boundary,
407 		 * read the next directory block.
408 		 * Release previous if it exists.
409 		 */
410 		if (blkoff(fs, ndp->ni_offset) == 0) {
411 			if (bp != NULL)
412 				brelse(bp);
413 			bp = blkatoff(dp, ndp->ni_offset, (char **)0);
414 			if (bp == 0)
415 				goto bad;
416 			entryoffsetinblock = 0;
417 		}
418 		/*
419 		 * If still looking for a slot, and at a DIRBLKSIZE
420 		 * boundary, have to start looking for free space again.
421 		 */
422 		if (slotstatus == NONE &&
423 		    (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
424 			slotoffset = -1;
425 			slotfreespace = 0;
426 		}
427 		/*
428 		 * Get pointer to next entry.
429 		 * Full validation checks are slow, so we only check
430 		 * enough to insure forward progress through the
431 		 * directory. Complete checks can be run by patching
432 		 * "dirchk" to be true.
433 		 */
434 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
435 		if (ep->d_reclen == 0 ||
436 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
437 			dirbad(dp, ndp->ni_offset, "mangled entry");
438 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
439 			ndp->ni_offset += i;
440 			entryoffsetinblock += i;
441 			continue;
442 		}
443 
444 		/*
445 		 * If an appropriate sized slot has not yet been found,
446 		 * check to see if one is available. Also accumulate space
447 		 * in the current block so that we can determine if
448 		 * compaction is viable.
449 		 */
450 		if (slotstatus != FOUND) {
451 			int size = ep->d_reclen;
452 
453 			if (ep->d_ino != 0)
454 				size -= DIRSIZ(ep);
455 			if (size > 0) {
456 				if (size >= slotneeded) {
457 					slotstatus = FOUND;
458 					slotoffset = ndp->ni_offset;
459 					slotsize = ep->d_reclen;
460 				} else if (slotstatus == NONE) {
461 					slotfreespace += size;
462 					if (slotoffset == -1)
463 						slotoffset = ndp->ni_offset;
464 					if (slotfreespace >= slotneeded) {
465 						slotstatus = COMPACT;
466 						slotsize = ndp->ni_offset +
467 						      ep->d_reclen - slotoffset;
468 					}
469 				}
470 			}
471 		}
472 
473 		/*
474 		 * Check for a name match.
475 		 */
476 		if (ep->d_ino) {
477 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
478 			    !bcmp(ndp->ni_dent.d_name, ep->d_name,
479 				(unsigned)ep->d_namlen))
480 				goto found;
481 		}
482 		prevoff = ndp->ni_offset;
483 		ndp->ni_offset += ep->d_reclen;
484 		entryoffsetinblock += ep->d_reclen;
485 		if (ep->d_ino)
486 			enduseful = ndp->ni_offset;
487 	}
488 /* notfound: */
489 	/*
490 	 * If we started in the middle of the directory and failed
491 	 * to find our target, we must check the beginning as well.
492 	 */
493 	if (numdirpasses == 2) {
494 		numdirpasses--;
495 		ndp->ni_offset = 0;
496 		endsearch = u.u_ncache.nc_prevoffset;
497 		goto searchloop;
498 	}
499 	/*
500 	 * If creating, and at end of pathname and current
501 	 * directory has not been removed, then can consider
502 	 * allowing file to be created.
503 	 */
504 	if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
505 		/*
506 		 * Access for write is interpreted as allowing
507 		 * creation of files in the directory.
508 		 */
509 		if (access(dp, IWRITE))
510 			goto bad;
511 		/*
512 		 * Return an indication of where the new directory
513 		 * entry should be put.  If we didn't find a slot,
514 		 * then set ndp->ni_count to 0 indicating that the new
515 		 * slot belongs at the end of the directory. If we found
516 		 * a slot, then the new entry can be put in the range
517 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
518 		 */
519 		if (slotstatus == NONE) {
520 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
521 			ndp->ni_count = 0;
522 			enduseful = ndp->ni_offset;
523 		} else {
524 			ndp->ni_offset = slotoffset;
525 			ndp->ni_count = slotsize;
526 			if (enduseful < slotoffset + slotsize)
527 				enduseful = slotoffset + slotsize;
528 		}
529 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
530 		dp->i_flag |= IUPD|ICHG;
531 		if (bp)
532 			brelse(bp);
533 		nbp->av_forw = freenamebuf;
534 		freenamebuf = nbp;
535 		/*
536 		 * We return with the directory locked, so that
537 		 * the parameters we set up above will still be
538 		 * valid if we actually decide to do a direnter().
539 		 * We return NULL to indicate that the entry doesn't
540 		 * currently exist, leaving a pointer to the (locked)
541 		 * directory inode in ndp->ni_pdir.
542 		 */
543 		ndp->ni_pdir = dp;
544 		return (NULL);
545 	}
546 	u.u_error = ENOENT;
547 	goto bad;
548 found:
549 	if (numdirpasses == 2)
550 		nchstats.ncs_pass2++;
551 	/*
552 	 * Check that directory length properly reflects presence
553 	 * of this entry.
554 	 */
555 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
556 		dirbad(dp, ndp->ni_offset, "i_size too small");
557 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
558 		dp->i_flag |= IUPD|ICHG;
559 	}
560 
561 	/*
562 	 * Found component in pathname.
563 	 * If the final component of path name, save information
564 	 * in the cache as to where the entry was found.
565 	 */
566 	if (*cp == '\0' && flag == LOOKUP) {
567 		u.u_ncache.nc_prevoffset = ndp->ni_offset &~ (DIRBLKSIZ - 1);
568 		u.u_ncache.nc_inumber = dp->i_number;
569 		u.u_ncache.nc_dev = dp->i_dev;
570 	}
571 	/*
572 	 * Save directory entry's inode number and reclen in ndp->ni_dent,
573 	 * and release directory buffer.
574 	 */
575 	ndp->ni_dent.d_ino = ep->d_ino;
576 	ndp->ni_dent.d_reclen = ep->d_reclen;
577 	brelse(bp);
578 	bp = NULL;
579 
580 	/*
581 	 * If deleting, and at end of pathname, return
582 	 * parameters which can be used to remove file.
583 	 * If the lockparent flag isn't set, we return only
584 	 * the directory (in ndp->ni_pdir), otherwise we go
585 	 * on and lock the inode, being careful with ".".
586 	 */
587 	if (flag == DELETE && *cp == 0) {
588 		/*
589 		 * Write access to directory required to delete files.
590 		 */
591 		if (access(dp, IWRITE))
592 			goto bad;
593 		ndp->ni_pdir = dp;		/* for dirremove() */
594 		/*
595 		 * Return pointer to current entry in ndp->ni_offset,
596 		 * and distance past previous entry (if there
597 		 * is a previous entry in this block) in ndp->ni_count.
598 		 * Save directory inode pointer in ndp->ni_pdir for dirremove().
599 		 */
600 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
601 			ndp->ni_count = 0;
602 		else
603 			ndp->ni_count = ndp->ni_offset - prevoff;
604 		if (lockparent) {
605 			if (dp->i_number == ndp->ni_dent.d_ino)
606 				dp->i_count++;
607 			else {
608 				dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
609 				if (dp == NULL) {
610 					iput(ndp->ni_pdir);
611 					goto bad;
612 				}
613 				/*
614 				 * If directory is "sticky", then user must own
615 				 * the directory, or the file in it, else he
616 				 * may not delete it (unless he's root). This
617 				 * implements append-only directories.
618 				 */
619 				if ((ndp->ni_pdir->i_mode & ISVTX) &&
620 				    u.u_uid != 0 &&
621 				    u.u_uid != ndp->ni_pdir->i_uid &&
622 				    dp->i_uid != u.u_uid) {
623 					iput(ndp->ni_pdir);
624 					u.u_error = EPERM;
625 					goto bad;
626 				}
627 			}
628 		}
629 		nbp->av_forw = freenamebuf;
630 		freenamebuf = nbp;
631 		return (dp);
632 	}
633 
634 	/*
635 	 * Special handling for ".." allowing chdir out of mounted
636 	 * file system: indirect .. in root inode to reevaluate
637 	 * in directory file system was mounted on.
638 	 */
639 	if (isdotdot) {
640 		if (dp == u.u_rdir) {
641 			ndp->ni_dent.d_ino = dp->i_number;
642 			makeentry = 0;
643 		} else if (ndp->ni_dent.d_ino == ROOTINO &&
644 		   dp->i_number == ROOTINO) {
645 			for (i = 1; i < NMOUNT; i++)
646 			if (mount[i].m_bufp != NULL &&
647 			   mount[i].m_dev == dp->i_dev) {
648 				iput(dp);
649 				dp = mount[i].m_inodp;
650 				ILOCK(dp);
651 				dp->i_count++;
652 				fs = dp->i_fs;
653 				cp -= 2;     /* back over .. */
654 				goto dirloop2;
655 			}
656 		}
657 	}
658 
659 	/*
660 	 * If rewriting (rename), return the inode and the
661 	 * information required to rewrite the present directory
662 	 * Must get inode of directory entry to verify it's a
663 	 * regular file, or empty directory.
664 	 */
665 	if ((flag == CREATE && lockparent) && *cp == 0) {
666 		if (access(dp, IWRITE))
667 			goto bad;
668 		ndp->ni_pdir = dp;		/* for dirrewrite() */
669 		/*
670 		 * Careful about locking second inode.
671 		 * This can only occur if the target is ".".
672 		 */
673 		if (dp->i_number == ndp->ni_dent.d_ino) {
674 			u.u_error = EISDIR;		/* XXX */
675 			goto bad;
676 		}
677 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
678 		if (dp == NULL) {
679 			iput(ndp->ni_pdir);
680 			goto bad;
681 		}
682 		nbp->av_forw = freenamebuf;
683 		freenamebuf = nbp;
684 		return (dp);
685 	}
686 
687 	/*
688 	 * Check for symbolic link, which may require us to massage the
689 	 * name before we continue translation.  We do not `iput' the
690 	 * directory because we may need it again if the symbolic link
691 	 * is relative to the current directory.  Instead we save it
692 	 * unlocked as "pdp".  We must get the target inode before unlocking
693 	 * the directory to insure that the inode will not be removed
694 	 * before we get it.  We prevent deadlock by always fetching
695 	 * inodes from the root, moving down the directory tree. Thus
696 	 * when following backward pointers ".." we must unlock the
697 	 * parent directory before getting the requested directory.
698 	 * There is a potential race condition here if both the current
699 	 * and parent directories are removed before the `iget' for the
700 	 * inode associated with ".." returns.  We hope that this occurs
701 	 * infrequently since we cannot avoid this race condition without
702 	 * implementing a sophisticated deadlock detection algorithm.
703 	 * Note also that this simple deadlock detection scheme will not
704 	 * work if the file system has any hard links other than ".."
705 	 * that point backwards in the directory structure.
706 	 */
707 	pdp = dp;
708 	if (isdotdot) {
709 		IUNLOCK(pdp);	/* race to get the inode */
710 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
711 		if (dp == NULL)
712 			goto bad2;
713 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
714 		dp->i_count++;	/* we want ourself, ie "." */
715 	} else {
716 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
717 		IUNLOCK(pdp);
718 		if (dp == NULL)
719 			goto bad2;
720 	}
721 
722 	/*
723 	 * Insert name into cache if appropriate.
724 	 */
725 	if (makeentry) {
726 		if (ncp != NULL)
727 			panic("namei: duplicating cache");
728 		/*
729 		 * Free the cache slot at head of lru chain.
730 		 */
731 		if (ncp = nchhead) {
732 			/* remove from lru chain */
733 			*ncp->nc_prev = ncp->nc_nxt;
734 			if (ncp->nc_nxt)
735 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
736 			else
737 				nchtail = ncp->nc_prev;
738 			remque(ncp);		/* remove from old hash chain */
739 			/* grab the inode we just found */
740 			ncp->nc_ip = dp;
741 			/* fill in cache info */
742 			ncp->nc_ino = pdp->i_number;	/* parents inum */
743 			ncp->nc_dev = pdp->i_dev;	/* & device */
744 			ncp->nc_idev = dp->i_dev;	/* our device */
745 			ncp->nc_id = dp->i_id;		/* identifier */
746 			ncp->nc_nlen = ndp->ni_dent.d_namlen;
747 			bcopy(ndp->ni_dent.d_name, ncp->nc_name,
748 			    (unsigned)ncp->nc_nlen);
749 			/* link at end of lru chain */
750 			ncp->nc_nxt = NULL;
751 			ncp->nc_prev = nchtail;
752 			*nchtail = ncp;
753 			nchtail = &ncp->nc_nxt;
754 			/* and insert on hash chain */
755 			insque(ncp, nhp);
756 		}
757 	}
758 
759 haveino:
760 	fs = dp->i_fs;
761 
762 	/*
763 	 * Check for symbolic link
764 	 */
765 	if ((dp->i_mode & IFMT) == IFLNK &&
766 	    ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) {
767 		u_int pathlen = strlen(cp) + 1;
768 
769 		if (dp->i_size + pathlen >= MAXPATHLEN - 1) {
770 			u.u_error = ENAMETOOLONG;
771 			goto bad2;
772 		}
773 		if (++nlink > MAXSYMLINKS) {
774 			u.u_error = ELOOP;
775 			goto bad2;
776 		}
777 		ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen);
778 		u.u_error =
779 		    rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size,
780 			(off_t)0, 1, (int *)0);
781 		if (u.u_error)
782 			goto bad2;
783 		cp = nbp->b_un.b_addr;
784 		iput(dp);
785 		if (*cp == '/') {
786 			irele(pdp);
787 			while (*cp == '/')
788 				cp++;
789 			if ((dp = u.u_rdir) == NULL)
790 				dp = rootdir;
791 			ILOCK(dp);
792 			dp->i_count++;
793 		} else {
794 			dp = pdp;
795 			ILOCK(dp);
796 		}
797 		fs = dp->i_fs;
798 		goto dirloop;
799 	}
800 
801 	/*
802 	 * Not a symbolic link.  If more pathname,
803 	 * continue at next component, else return.
804 	 */
805 	if (*cp == '/') {
806 		while (*cp == '/')
807 			cp++;
808 		irele(pdp);
809 		goto dirloop;
810 	}
811 	nbp->av_forw = freenamebuf;
812 	freenamebuf = nbp;
813 	if (lockparent)
814 		ndp->ni_pdir = pdp;
815 	else
816 		irele(pdp);
817 	return (dp);
818 bad2:
819 	irele(pdp);
820 bad:
821 	if (bp)
822 		brelse(bp);
823 	if (dp)
824 		iput(dp);
825 	nbp->av_forw = freenamebuf;
826 	freenamebuf = nbp;
827 	return (NULL);
828 }
829 
830 
831 dirbad(ip, offset, how)
832 	struct inode *ip;
833 	off_t offset;
834 	char *how;
835 {
836 
837 	printf("%s: bad dir ino %d at offset %d: %s\n",
838 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
839 }
840 
841 /*
842  * Do consistency checking on a directory entry:
843  *	record length must be multiple of 4
844  *	entry must fit in rest of its DIRBLKSIZ block
845  *	record must be large enough to contain entry
846  *	name is not longer than MAXNAMLEN
847  *	name must be as long as advertised, and null terminated
848  */
849 dirbadentry(ep, entryoffsetinblock)
850 	register struct direct *ep;
851 	int entryoffsetinblock;
852 {
853 	register int i;
854 
855 	if ((ep->d_reclen & 0x3) != 0 ||
856 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
857 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
858 		return (1);
859 	for (i = 0; i < ep->d_namlen; i++)
860 		if (ep->d_name[i] == '\0')
861 			return (1);
862 	return (ep->d_name[i]);
863 }
864 
865 /*
866  * Write a directory entry after a call to namei, using the parameters
867  * which it left in the u. area.  The argument ip is the inode which
868  * the new directory entry will refer to.  The u. area field ndp->ni_pdir is
869  * a pointer to the directory to be written, which was left locked by
870  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
871  * how the space for the new entry is to be gotten.
872  */
873 direnter(ip, ndp)
874 	struct inode *ip;
875 	register struct nameidata *ndp;
876 {
877 	register struct direct *ep, *nep;
878 	register struct inode *dp = ndp->ni_pdir;
879 	struct buf *bp;
880 	int loc, spacefree, error = 0;
881 	u_int dsize;
882 	int newentrysize;
883 	char *dirbuf;
884 
885 	ndp->ni_dent.d_ino = ip->i_number;
886 	newentrysize = DIRSIZ(&ndp->ni_dent);
887 	if (ndp->ni_count == 0) {
888 		/*
889 		 * If ndp->ni_count is 0, then namei could find no space in the
890 		 * directory. In this case ndp->ni_offset will be on a directory
891 		 * block boundary and we will write the new entry into a fresh
892 		 * block.
893 		 */
894 		if (ndp->ni_offset&(DIRBLKSIZ-1))
895 			panic("wdir: newblk");
896 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
897 		error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
898 		    newentrysize, ndp->ni_offset, 1, (int *)0);
899 		if (DIRBLKSIZ > dp->i_fs->fs_fsize)
900 			panic("wdir: blksize"); /* XXX - should grow w/bmap() */
901 		else
902 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
903 		iput(dp);
904 		return (error);
905 	}
906 
907 	/*
908 	 * If ndp->ni_count is non-zero, then namei found space for the new
909 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
910 	 * in the directory.  To use this space, we may have to compact
911 	 * the entries located there, by copying them together towards
912 	 * the beginning of the block, leaving the free space in
913 	 * one usable chunk at the end.
914 	 */
915 
916 	/*
917 	 * Increase size of directory if entry eats into new space.
918 	 * This should never push the size past a new multiple of
919 	 * DIRBLKSIZE.
920 	 *
921 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
922 	 */
923 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
924 		dp->i_size = ndp->ni_offset + ndp->ni_count;
925 	/*
926 	 * Get the block containing the space for the new directory
927 	 * entry.  Should return error by result instead of u.u_error.
928 	 */
929 	bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf);
930 	if (bp == 0) {
931 		iput(dp);
932 		return (u.u_error);
933 	}
934 	/*
935 	 * Find space for the new entry.  In the simple case, the
936 	 * entry at offset base will have the space.  If it does
937 	 * not, then namei arranged that compacting the region
938 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
939 	 */
940 	ep = (struct direct *)dirbuf;
941 	dsize = DIRSIZ(ep);
942 	spacefree = ep->d_reclen - dsize;
943 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
944 		nep = (struct direct *)(dirbuf + loc);
945 		if (ep->d_ino) {
946 			/* trim the existing slot */
947 			ep->d_reclen = dsize;
948 			ep = (struct direct *)((char *)ep + dsize);
949 		} else {
950 			/* overwrite; nothing there; header is ours */
951 			spacefree += dsize;
952 		}
953 		dsize = DIRSIZ(nep);
954 		spacefree += nep->d_reclen - dsize;
955 		loc += nep->d_reclen;
956 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
957 	}
958 	/*
959 	 * Update the pointer fields in the previous entry (if any),
960 	 * copy in the new entry, and write out the block.
961 	 */
962 	if (ep->d_ino == 0) {
963 		if (spacefree + dsize < newentrysize)
964 			panic("wdir: compact1");
965 		ndp->ni_dent.d_reclen = spacefree + dsize;
966 	} else {
967 		if (spacefree < newentrysize)
968 			panic("wdir: compact2");
969 		ndp->ni_dent.d_reclen = spacefree;
970 		ep->d_reclen = dsize;
971 		ep = (struct direct *)((char *)ep + dsize);
972 	}
973 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
974 	bwrite(bp);
975 	dp->i_flag |= IUPD|ICHG;
976 	if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
977 		itrunc(dp, (u_long)ndp->ni_endoff);
978 	iput(dp);
979 	return (error);
980 }
981 
982 /*
983  * Remove a directory entry after a call to namei, using the
984  * parameters which it left in the u. area.  The u. entry
985  * ni_offset contains the offset into the directory of the
986  * entry to be eliminated.  The ni_count field contains the
987  * size of the previous record in the directory.  If this
988  * is 0, the first entry is being deleted, so we need only
989  * zero the inode number to mark the entry as free.  If the
990  * entry isn't the first in the directory, we must reclaim
991  * the space of the now empty record by adding the record size
992  * to the size of the previous entry.
993  */
994 dirremove(ndp)
995 	register struct nameidata *ndp;
996 {
997 	register struct inode *dp = ndp->ni_pdir;
998 	register struct buf *bp;
999 	struct direct *ep;
1000 
1001 	if (ndp->ni_count == 0) {
1002 		/*
1003 		 * First entry in block: set d_ino to zero.
1004 		 */
1005 		ndp->ni_dent.d_ino = 0;
1006 		(void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1007 		    (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1008 	} else {
1009 		/*
1010 		 * Collapse new free space into previous entry.
1011 		 */
1012 		bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep);
1013 		if (bp == 0)
1014 			return (0);
1015 		ep->d_reclen += ndp->ni_dent.d_reclen;
1016 		bwrite(bp);
1017 		dp->i_flag |= IUPD|ICHG;
1018 	}
1019 	return (1);
1020 }
1021 
1022 /*
1023  * Rewrite an existing directory entry to point at the inode
1024  * supplied.  The parameters describing the directory entry are
1025  * set up by a call to namei.
1026  */
1027 dirrewrite(dp, ip, ndp)
1028 	struct inode *dp, *ip;
1029 	struct nameidata *ndp;
1030 {
1031 
1032 	ndp->ni_dent.d_ino = ip->i_number;
1033 	u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1034 		(int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1035 	iput(dp);
1036 }
1037 
1038 /*
1039  * Return buffer with contents of block "offset"
1040  * from the beginning of directory "ip".  If "res"
1041  * is non-zero, fill it in with a pointer to the
1042  * remaining space in the directory.
1043  */
1044 struct buf *
1045 blkatoff(ip, offset, res)
1046 	struct inode *ip;
1047 	off_t offset;
1048 	char **res;
1049 {
1050 	register struct fs *fs = ip->i_fs;
1051 	daddr_t lbn = lblkno(fs, offset);
1052 	int bsize = blksize(fs, ip, lbn);
1053 	register struct buf *bp;
1054 	daddr_t bn;
1055 
1056 	bn = bmap(ip, lbn, B_READ, bsize);
1057 	if (u.u_error)
1058 		return (0);
1059 	if (bn == (daddr_t)-1) {
1060 		dirbad(ip, offset, "hole in dir");
1061 		return (0);
1062 	}
1063 	bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize);
1064 	if (bp->b_flags & B_ERROR) {
1065 		brelse(bp);
1066 		return (0);
1067 	}
1068 	if (res)
1069 		*res = bp->b_un.b_addr + blkoff(fs, offset);
1070 	return (bp);
1071 }
1072 
1073 /*
1074  * Check if a directory is empty or not.
1075  * Inode supplied must be locked.
1076  *
1077  * Using a struct dirtemplate here is not precisely
1078  * what we want, but better than using a struct direct.
1079  *
1080  * NB: does not handle corrupted directories.
1081  */
1082 dirempty(ip, parentino)
1083 	register struct inode *ip;
1084 	ino_t parentino;
1085 {
1086 	register off_t off;
1087 	struct dirtemplate dbuf;
1088 	register struct direct *dp = (struct direct *)&dbuf;
1089 	int error, count;
1090 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1091 
1092 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1093 		error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
1094 		    off, 1, &count);
1095 		/*
1096 		 * Since we read MINDIRSIZ, residual must
1097 		 * be 0 unless we're at end of file.
1098 		 */
1099 		if (error || count != 0)
1100 			return (0);
1101 		/* avoid infinite loops */
1102 		if (dp->d_reclen == 0)
1103 			return (0);
1104 		/* skip empty entries */
1105 		if (dp->d_ino == 0)
1106 			continue;
1107 		/* accept only "." and ".." */
1108 		if (dp->d_namlen > 2)
1109 			return (0);
1110 		if (dp->d_name[0] != '.')
1111 			return (0);
1112 		/*
1113 		 * At this point d_namlen must be 1 or 2.
1114 		 * 1 implies ".", 2 implies ".." if second
1115 		 * char is also "."
1116 		 */
1117 		if (dp->d_namlen == 1)
1118 			continue;
1119 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1120 			continue;
1121 		return (0);
1122 	}
1123 	return (1);
1124 }
1125 
1126 /*
1127  * Check if source directory is in the path of the target directory.
1128  * Target is supplied locked, source is unlocked.
1129  * The target is always iput() before returning.
1130  */
1131 checkpath(source, target)
1132 	struct inode *source, *target;
1133 {
1134 	struct dirtemplate dirbuf;
1135 	register struct inode *ip;
1136 	int error = 0;
1137 
1138 	ip = target;
1139 	if (ip->i_number == source->i_number) {
1140 		error = EEXIST;
1141 		goto out;
1142 	}
1143 	if (ip->i_number == ROOTINO)
1144 		goto out;
1145 
1146 	for (;;) {
1147 		if ((ip->i_mode&IFMT) != IFDIR) {
1148 			error = ENOTDIR;
1149 			break;
1150 		}
1151 		error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
1152 			sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
1153 		if (error != 0)
1154 			break;
1155 		if (dirbuf.dotdot_namlen != 2 ||
1156 		    dirbuf.dotdot_name[0] != '.' ||
1157 		    dirbuf.dotdot_name[1] != '.') {
1158 			error = ENOTDIR;
1159 			break;
1160 		}
1161 		if (dirbuf.dotdot_ino == source->i_number) {
1162 			error = EINVAL;
1163 			break;
1164 		}
1165 		if (dirbuf.dotdot_ino == ROOTINO)
1166 			break;
1167 		iput(ip);
1168 		ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
1169 		if (ip == NULL) {
1170 			error = u.u_error;
1171 			break;
1172 		}
1173 	}
1174 
1175 out:
1176 	if (error == ENOTDIR)
1177 		printf("checkpath: .. not a directory\n");
1178 	if (ip != NULL)
1179 		iput(ip);
1180 	return (error);
1181 }
1182 
1183 /*
1184  * Name cache initialization, from main() when we are booting
1185  */
1186 nchinit()
1187 {
1188 	register union nchash *nchp;
1189 	register struct namecache *ncp;
1190 
1191 	nchhead = 0;
1192 	nchtail = &nchhead;
1193 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) {
1194 		ncp->nc_forw = ncp;			/* hash chain */
1195 		ncp->nc_back = ncp;
1196 		ncp->nc_nxt = NULL;			/* lru chain */
1197 		*nchtail = ncp;
1198 		ncp->nc_prev = nchtail;
1199 		nchtail = &ncp->nc_nxt;
1200 		/* all else is zero already */
1201 	}
1202 	for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
1203 		nchp->nch_head[0] = nchp;
1204 		nchp->nch_head[1] = nchp;
1205 	}
1206 }
1207 
1208 /*
1209  * Cache flush, called when filesys is umounted to
1210  * remove entries that would now be invalid
1211  *
1212  * The line "nxtcp = nchhead" near the end is to avoid potential problems
1213  * if the cache lru chain is modified while we are dumping the
1214  * inode.  This makes the algorithm O(n^2), but do you think I care?
1215  */
1216 nchinval(dev)
1217 	register dev_t dev;
1218 {
1219 	register struct namecache *ncp, *nxtcp;
1220 
1221 	for (ncp = nchhead; ncp; ncp = nxtcp) {
1222 		nxtcp = ncp->nc_nxt;
1223 		if (ncp->nc_ip == NULL ||
1224 		    (ncp->nc_idev != dev && ncp->nc_dev != dev))
1225 			continue;
1226 		/* free the resources we had */
1227 		ncp->nc_idev = NODEV;
1228 		ncp->nc_dev = NODEV;
1229 		ncp->nc_id = NULL;
1230 		ncp->nc_ino = 0;
1231 		ncp->nc_ip = NULL;
1232 		remque(ncp);		/* remove entry from its hash chain */
1233 		ncp->nc_forw = ncp;	/* and make a dummy one */
1234 		ncp->nc_back = ncp;
1235 		/* delete this entry from LRU chain */
1236 		*ncp->nc_prev = nxtcp;
1237 		if (nxtcp)
1238 			nxtcp->nc_prev = ncp->nc_prev;
1239 		else
1240 			nchtail = ncp->nc_prev;
1241 		/* cause rescan of list, it may have altered */
1242 		nxtcp = nchhead;
1243 		/* put the now-free entry at head of LRU */
1244 		ncp->nc_nxt = nxtcp;
1245 		ncp->nc_prev = &nchhead;
1246 		nxtcp->nc_prev = &ncp->nc_nxt;
1247 		nchhead = ncp;
1248 	}
1249 }
1250 
1251 /*
1252  * Name cache invalidation of all entries.
1253  */
1254 cacheinvalall()
1255 {
1256 	register struct namecache *ncp;
1257 
1258 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++)
1259 		ncp->nc_id = 0;
1260 }
1261