xref: /original-bsd/sys/ufs/ufs/ufs_lookup.c (revision 0db3846e)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)ufs_lookup.c	7.4 (Berkeley) 06/21/87
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "inode.h"
12 #include "fs.h"
13 #include "mount.h"
14 #include "dir.h"
15 #include "user.h"
16 #include "buf.h"
17 #include "conf.h"
18 #include "uio.h"
19 #include "kernel.h"
20 #include "malloc.h"
21 
22 struct	buf *blkatoff();
23 int	dirchk = 0;
24 
25 /*
26  * Structures associated with name cacheing.
27  */
28 #define	NCHHASH		32	/* size of hash table */
29 
30 #if	((NCHHASH)&((NCHHASH)-1)) != 0
31 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
32 #else
33 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
34 #endif
35 
36 union nchash {
37 	union	nchash *nch_head[2];
38 	struct	namecache *nch_chain[2];
39 } nchash[NCHHASH];
40 #define	nch_forw	nch_chain[0]
41 #define	nch_back	nch_chain[1]
42 
43 struct	namecache *nchhead, **nchtail;	/* LRU chain pointers */
44 struct	nchstats nchstats;		/* cache effectiveness statistics */
45 
46 /*
47  * Convert a pathname into a pointer to a locked inode.
48  * This is a very central and rather complicated routine.
49  * If the file system is not maintained in a strict tree hierarchy,
50  * this can result in a deadlock situation (see comments in code below).
51  *
52  * The flag argument is LOOKUP, CREATE, or DELETE depending on whether
53  * the name is to be looked up, created, or deleted. When CREATE or
54  * DELETE is specified, information usable in creating or deleteing a
55  * directory entry is also calculated. If flag has LOCKPARENT or'ed
56  * into it and the target of the pathname exists, namei returns both
57  * the target and its parent directory locked. When creating and
58  * LOCKPARENT is specified, the target may not be ".".  When deleting
59  * and LOCKPARENT is specified, the target may be ".", but the caller
60  * must check to insure it does an irele and iput instead of two iputs.
61  *
62  * The FOLLOW flag is set when symbolic links are to be followed
63  * when they occur at the end of the name translation process.
64  * Symbolic links are always followed for all other pathname
65  * components other than the last.
66  *
67  * The segflg defines whether the name is to be copied from user
68  * space or kernel space.
69  *
70  * Name caching works as follows:
71  *
72  * Names found by directory scans are retained in a cache
73  * for future reference.  It is managed LRU, so frequently
74  * used names will hang around.  Cache is indexed by hash value
75  * obtained from (ino,dev,name) where ino & dev refer to the
76  * directory containing name.
77  *
78  * For simplicity (and economy of storage), names longer than
79  * a maximum length of NCHNAMLEN are not cached; they occur
80  * infrequently in any case, and are almost never of interest.
81  *
82  * Upon reaching the last segment of a path, if the reference
83  * is for DELETE, or NOCACHE is set (rewrite), and the
84  * name is located in the cache, it will be dropped.
85  *
86  * Overall outline of namei:
87  *
88  *	copy in name
89  *	get starting directory
90  * dirloop:
91  *	check accessibility of directory
92  * dirloop2:
93  *	copy next component of name to ndp->ni_dent
94  *	handle degenerate case where name is null string
95  *	look for name in cache, if found, then if at end of path
96  *	  and deleting or creating, drop it, else to haveino
97  *	search for name in directory, to found or notfound
98  * notfound:
99  *	if creating, return locked directory, leaving info on avail. slots
100  *	else return error
101  * found:
102  *	if at end of path and deleting, return information to allow delete
103  *	if at end of path and rewriting (CREATE and LOCKPARENT), lock target
104  *	  inode and return info to allow rewrite
105  *	if .. and on mounted filesys, look in mount table for parent
106  *	if not at end, add name to cache; if at end and neither creating
107  *	  nor deleting, add name to cache
108  * haveino:
109  *	if symbolic link, massage name in buffer and continue at dirloop
110  *	if more components of name, do next level at dirloop
111  *	return the answer as locked inode
112  *
113  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
114  *	 but unlocked.
115  */
116 struct inode *
117 namei(ndp)
118 	register struct nameidata *ndp;
119 {
120 	register char *cp;		/* pointer into pathname argument */
121 /* these variables refer to things which must be freed or unlocked */
122 	register struct inode *dp = 0;	/* the directory we are searching */
123 	register struct namecache *ncp;	/* cache slot for entry */
124 	register struct fs *fs;		/* file system that directory is in */
125 	register struct buf *bp = 0;	/* a buffer of directory entries */
126 	register struct direct *ep;	/* the current directory entry */
127 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
128 	register caddr_t nbp;		/* buffer storing path name argument */
129 /* these variables hold information about the search for a slot */
130 	enum {NONE, COMPACT, FOUND} slotstatus;
131 	int slotoffset = -1;		/* offset of area with free space */
132 	int slotsize;			/* size of area at slotoffset */
133 	int slotfreespace;		/* amount of space free in slot */
134 	int slotneeded;			/* size of the entry we're seeking */
135 /* */
136 	int numdirpasses;		/* strategy for directory search */
137 	int endsearch;			/* offset to end directory search */
138 	int prevoff;			/* ndp->ni_offset of previous entry */
139 	int nlink = 0;			/* number of symbolic links taken */
140 	struct inode *pdp;		/* saved dp during symlink work */
141 	int error, i;
142 	int lockparent;
143 	int docache;			/* == 0 do not cache last component */
144 	int makeentry;			/* != 0 if name to be added to cache */
145 	unsigned hash;			/* value of name hash for entry */
146 	union nchash *nhp;		/* cache chain head for entry */
147 	int isdotdot;			/* != 0 if current name is ".." */
148 	int flag;			/* op ie, LOOKUP, CREATE, or DELETE */
149 	off_t enduseful;		/* pointer past last used dir slot */
150 
151 	lockparent = ndp->ni_nameiop & LOCKPARENT;
152 	docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE;
153 	flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
154 	if (flag == DELETE || lockparent)
155 		docache = 0;
156 	/*
157 	 * Get a buffer for the name to be translated, and copy the
158 	 * name into the buffer.
159 	 */
160 	MALLOC(nbp, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
161 	if (ndp->ni_segflg == UIO_SYSSPACE)
162 		error = copystr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0);
163 	else
164 		error = copyinstr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0);
165 	if (error) {
166 		u.u_error = error;
167 		goto bad;
168 	}
169 
170 	/*
171 	 * Get starting directory.
172 	 */
173 	cp = nbp;
174 	if (*cp == '/') {
175 		while (*cp == '/')
176 			cp++;
177 		if ((dp = u.u_rdir) == NULL)
178 			dp = rootdir;
179 	} else
180 		dp = u.u_cdir;
181 	fs = dp->i_fs;
182 	ILOCK(dp);
183 	dp->i_count++;
184 	ndp->ni_pdir = (struct inode *)0xc0000000;		/* illegal */
185 	ndp->ni_endoff = 0;
186 
187 	/*
188 	 * We come to dirloop to search a new directory.
189 	 * The directory must be locked so that it can be
190 	 * iput, and fs must be already set to dp->i_fs.
191 	 */
192 dirloop:
193 	/*
194 	 * Check accessiblity of directory.
195 	 */
196 	if ((dp->i_mode&IFMT) != IFDIR) {
197 		u.u_error = ENOTDIR;
198 		goto bad;
199 	}
200 	if (access(dp, IEXEC))
201 		goto bad;
202 
203 dirloop2:
204 	/*
205 	 * Copy next component of name to ndp->ni_dent.
206 	 */
207 	hash = 0;
208 	for (i = 0; *cp != 0 && *cp != '/'; cp++) {
209 		if (i >= MAXNAMLEN) {
210 			u.u_error = ENAMETOOLONG;
211 			goto bad;
212 		}
213 		if (*cp & 0200)
214 			if ((*cp&0377) == ('/'|0200) || flag != DELETE) {
215 				u.u_error = EINVAL;
216 				goto bad;
217 			}
218 		ndp->ni_dent.d_name[i++] = *cp;
219 		hash += (unsigned char)*cp * i;
220 	}
221 	ndp->ni_dent.d_namlen = i;
222 	ndp->ni_dent.d_name[i] = '\0';
223 	isdotdot = (i == 2 &&
224 		ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.');
225 	makeentry = 1;
226 	if (*cp == '\0' && docache == 0)
227 		makeentry = 0;
228 
229 	/*
230 	 * Check for degenerate name (e.g. / or "")
231 	 * which is a way of talking about a directory,
232 	 * e.g. like "/." or ".".
233 	 */
234 	if (ndp->ni_dent.d_name[0] == '\0') {
235 		if (flag != LOOKUP || lockparent) {
236 			u.u_error = EISDIR;
237 			goto bad;
238 		}
239 		FREE(nbp, M_NAMEI);
240 		return (dp);
241 	}
242 
243 	/*
244 	 * We now have a segment name to search for, and a directory to search.
245 	 *
246 	 * Before tediously performing a linear scan of the directory,
247 	 * check the name cache to see if the directory/name pair
248 	 * we are looking for is known already.  We don't do this
249 	 * if the segment name is long, simply so the cache can avoid
250 	 * holding long names (which would either waste space, or
251 	 * add greatly to the complexity).
252 	 */
253 	if (ndp->ni_dent.d_namlen > NCHNAMLEN) {
254 		nchstats.ncs_long++;
255 		makeentry = 0;
256 	} else {
257 		nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
258 		for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp;
259 		    ncp = ncp->nc_forw) {
260 			if (ncp->nc_ino == dp->i_number &&
261 			    ncp->nc_dev == dp->i_dev &&
262 			    ncp->nc_nlen == ndp->ni_dent.d_namlen &&
263 			    !bcmp(ncp->nc_name, ndp->ni_dent.d_name,
264 				(unsigned)ncp->nc_nlen))
265 				break;
266 		}
267 		if (ncp == (struct namecache *)nhp) {
268 			nchstats.ncs_miss++;
269 			ncp = NULL;
270 		} else {
271 			if (ncp->nc_id != ncp->nc_ip->i_id)
272 				nchstats.ncs_falsehits++;
273 			else if (!makeentry)
274 				nchstats.ncs_badhits++;
275 			else {
276 				/*
277 				 * move this slot to end of LRU
278 				 * chain, if not already there
279 				 */
280 				if (ncp->nc_nxt) {
281 					/* remove from LRU chain */
282 					*ncp->nc_prev = ncp->nc_nxt;
283 					ncp->nc_nxt->nc_prev = ncp->nc_prev;
284 
285 					/* and replace at end of it */
286 					ncp->nc_nxt = NULL;
287 					ncp->nc_prev = nchtail;
288 					*nchtail = ncp;
289 					nchtail = &ncp->nc_nxt;
290 				}
291 
292 				/*
293 				 * Get the next inode in the path.
294 				 * See comment above other `IUNLOCK' code for
295 				 * an explaination of the locking protocol.
296 				 */
297 				pdp = dp;
298 				if (!isdotdot || dp != u.u_rdir)
299 					dp = ncp->nc_ip;
300 				if (dp == NULL)
301 					panic("namei: null cache ino");
302 				if (pdp == dp)
303 					dp->i_count++;
304 				else if (isdotdot) {
305 					IUNLOCK(pdp);
306 					igrab(dp);
307 				} else {
308 					igrab(dp);
309 					IUNLOCK(pdp);
310 				}
311 
312 				/*
313 				 * Verify that the inode that we got
314 				 * did not change while we were waiting
315 				 * for it to be locked.
316 				 */
317 				if (ncp->nc_id != ncp->nc_ip->i_id) {
318 					iput(dp);
319 					ILOCK(pdp);
320 					dp = pdp;
321 					nchstats.ncs_falsehits++;
322 				} else {
323 					ndp->ni_dent.d_ino = dp->i_number;
324 					/* ni_dent.d_reclen is garbage ... */
325 					nchstats.ncs_goodhits++;
326 					goto haveino;
327 				}
328 			}
329 
330 			/*
331 			 * Last component and we are renaming or deleting,
332 			 * the cache entry is invalid, or otherwise don't
333 			 * want cache entry to exist.
334 			 */
335 			/* remove from LRU chain */
336 			*ncp->nc_prev = ncp->nc_nxt;
337 			if (ncp->nc_nxt)
338 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
339 			else
340 				nchtail = ncp->nc_prev;
341 			remque(ncp);		/* remove from hash chain */
342 			/* insert at head of LRU list (first to grab) */
343 			ncp->nc_nxt = nchhead;
344 			ncp->nc_prev = &nchhead;
345 			nchhead->nc_prev = &ncp->nc_nxt;
346 			nchhead = ncp;
347 			/* and make a dummy hash chain */
348 			ncp->nc_forw = ncp;
349 			ncp->nc_back = ncp;
350 			ncp = NULL;
351 		}
352 	}
353 
354 	/*
355 	 * Suppress search for slots unless creating
356 	 * file and at end of pathname, in which case
357 	 * we watch for a place to put the new file in
358 	 * case it doesn't already exist.
359 	 */
360 	slotstatus = FOUND;
361 	if (flag == CREATE && *cp == 0) {
362 		slotstatus = NONE;
363 		slotfreespace = 0;
364 		slotneeded = DIRSIZ(&ndp->ni_dent);
365 	}
366 	/*
367 	 * If this is the same directory that this process
368 	 * previously searched, pick up where we last left off.
369 	 * We cache only lookups as these are the most common
370 	 * and have the greatest payoff. Caching CREATE has little
371 	 * benefit as it usually must search the entire directory
372 	 * to determine that the entry does not exist. Caching the
373 	 * location of the last DELETE has not reduced profiling time
374 	 * and hence has been removed in the interest of simplicity.
375 	 */
376 	if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
377 	    dp->i_dev != u.u_ncache.nc_dev) {
378 		ndp->ni_offset = 0;
379 		numdirpasses = 1;
380 	} else {
381 		if (u.u_ncache.nc_prevoffset > dp->i_size)
382 			u.u_ncache.nc_prevoffset = 0;
383 		ndp->ni_offset = u.u_ncache.nc_prevoffset;
384 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
385 		if (entryoffsetinblock != 0) {
386 			bp = blkatoff(dp, ndp->ni_offset, (char **)0);
387 			if (bp == 0)
388 				goto bad;
389 		}
390 		numdirpasses = 2;
391 		nchstats.ncs_2passes++;
392 	}
393 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
394 	enduseful = 0;
395 
396 searchloop:
397 	while (ndp->ni_offset < endsearch) {
398 		/*
399 		 * If offset is on a block boundary,
400 		 * read the next directory block.
401 		 * Release previous if it exists.
402 		 */
403 		if (blkoff(fs, ndp->ni_offset) == 0) {
404 			if (bp != NULL)
405 				brelse(bp);
406 			bp = blkatoff(dp, ndp->ni_offset, (char **)0);
407 			if (bp == 0)
408 				goto bad;
409 			entryoffsetinblock = 0;
410 		}
411 		/*
412 		 * If still looking for a slot, and at a DIRBLKSIZE
413 		 * boundary, have to start looking for free space again.
414 		 */
415 		if (slotstatus == NONE &&
416 		    (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
417 			slotoffset = -1;
418 			slotfreespace = 0;
419 		}
420 		/*
421 		 * Get pointer to next entry.
422 		 * Full validation checks are slow, so we only check
423 		 * enough to insure forward progress through the
424 		 * directory. Complete checks can be run by patching
425 		 * "dirchk" to be true.
426 		 */
427 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
428 		if (ep->d_reclen == 0 ||
429 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
430 			dirbad(dp, ndp->ni_offset, "mangled entry");
431 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
432 			ndp->ni_offset += i;
433 			entryoffsetinblock += i;
434 			continue;
435 		}
436 
437 		/*
438 		 * If an appropriate sized slot has not yet been found,
439 		 * check to see if one is available. Also accumulate space
440 		 * in the current block so that we can determine if
441 		 * compaction is viable.
442 		 */
443 		if (slotstatus != FOUND) {
444 			int size = ep->d_reclen;
445 
446 			if (ep->d_ino != 0)
447 				size -= DIRSIZ(ep);
448 			if (size > 0) {
449 				if (size >= slotneeded) {
450 					slotstatus = FOUND;
451 					slotoffset = ndp->ni_offset;
452 					slotsize = ep->d_reclen;
453 				} else if (slotstatus == NONE) {
454 					slotfreespace += size;
455 					if (slotoffset == -1)
456 						slotoffset = ndp->ni_offset;
457 					if (slotfreespace >= slotneeded) {
458 						slotstatus = COMPACT;
459 						slotsize = ndp->ni_offset +
460 						      ep->d_reclen - slotoffset;
461 					}
462 				}
463 			}
464 		}
465 
466 		/*
467 		 * Check for a name match.
468 		 */
469 		if (ep->d_ino) {
470 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
471 			    !bcmp(ndp->ni_dent.d_name, ep->d_name,
472 				(unsigned)ep->d_namlen))
473 				goto found;
474 		}
475 		prevoff = ndp->ni_offset;
476 		ndp->ni_offset += ep->d_reclen;
477 		entryoffsetinblock += ep->d_reclen;
478 		if (ep->d_ino)
479 			enduseful = ndp->ni_offset;
480 	}
481 /* notfound: */
482 	/*
483 	 * If we started in the middle of the directory and failed
484 	 * to find our target, we must check the beginning as well.
485 	 */
486 	if (numdirpasses == 2) {
487 		numdirpasses--;
488 		ndp->ni_offset = 0;
489 		endsearch = u.u_ncache.nc_prevoffset;
490 		goto searchloop;
491 	}
492 	/*
493 	 * If creating, and at end of pathname and current
494 	 * directory has not been removed, then can consider
495 	 * allowing file to be created.
496 	 */
497 	if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
498 		/*
499 		 * Access for write is interpreted as allowing
500 		 * creation of files in the directory.
501 		 */
502 		if (access(dp, IWRITE))
503 			goto bad;
504 		/*
505 		 * Return an indication of where the new directory
506 		 * entry should be put.  If we didn't find a slot,
507 		 * then set ndp->ni_count to 0 indicating that the new
508 		 * slot belongs at the end of the directory. If we found
509 		 * a slot, then the new entry can be put in the range
510 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
511 		 */
512 		if (slotstatus == NONE) {
513 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
514 			ndp->ni_count = 0;
515 			enduseful = ndp->ni_offset;
516 		} else {
517 			ndp->ni_offset = slotoffset;
518 			ndp->ni_count = slotsize;
519 			if (enduseful < slotoffset + slotsize)
520 				enduseful = slotoffset + slotsize;
521 		}
522 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
523 		dp->i_flag |= IUPD|ICHG;
524 		if (bp)
525 			brelse(bp);
526 		FREE(nbp, M_NAMEI);
527 		/*
528 		 * We return with the directory locked, so that
529 		 * the parameters we set up above will still be
530 		 * valid if we actually decide to do a direnter().
531 		 * We return NULL to indicate that the entry doesn't
532 		 * currently exist, leaving a pointer to the (locked)
533 		 * directory inode in ndp->ni_pdir.
534 		 */
535 		ndp->ni_pdir = dp;
536 		return (NULL);
537 	}
538 	u.u_error = ENOENT;
539 	goto bad;
540 found:
541 	if (numdirpasses == 2)
542 		nchstats.ncs_pass2++;
543 	/*
544 	 * Check that directory length properly reflects presence
545 	 * of this entry.
546 	 */
547 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
548 		dirbad(dp, ndp->ni_offset, "i_size too small");
549 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
550 		dp->i_flag |= IUPD|ICHG;
551 	}
552 
553 	/*
554 	 * Found component in pathname.
555 	 * If the final component of path name, save information
556 	 * in the cache as to where the entry was found.
557 	 */
558 	if (*cp == '\0' && flag == LOOKUP) {
559 		u.u_ncache.nc_prevoffset = ndp->ni_offset &~ (DIRBLKSIZ - 1);
560 		u.u_ncache.nc_inumber = dp->i_number;
561 		u.u_ncache.nc_dev = dp->i_dev;
562 	}
563 	/*
564 	 * Save directory entry's inode number and reclen in ndp->ni_dent,
565 	 * and release directory buffer.
566 	 */
567 	ndp->ni_dent.d_ino = ep->d_ino;
568 	ndp->ni_dent.d_reclen = ep->d_reclen;
569 	brelse(bp);
570 	bp = NULL;
571 
572 	/*
573 	 * If deleting, and at end of pathname, return
574 	 * parameters which can be used to remove file.
575 	 * If the lockparent flag isn't set, we return only
576 	 * the directory (in ndp->ni_pdir), otherwise we go
577 	 * on and lock the inode, being careful with ".".
578 	 */
579 	if (flag == DELETE && *cp == 0) {
580 		/*
581 		 * Write access to directory required to delete files.
582 		 */
583 		if (access(dp, IWRITE))
584 			goto bad;
585 		ndp->ni_pdir = dp;		/* for dirremove() */
586 		/*
587 		 * Return pointer to current entry in ndp->ni_offset,
588 		 * and distance past previous entry (if there
589 		 * is a previous entry in this block) in ndp->ni_count.
590 		 * Save directory inode pointer in ndp->ni_pdir for dirremove().
591 		 */
592 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
593 			ndp->ni_count = 0;
594 		else
595 			ndp->ni_count = ndp->ni_offset - prevoff;
596 		if (lockparent) {
597 			if (dp->i_number == ndp->ni_dent.d_ino)
598 				dp->i_count++;
599 			else {
600 				dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
601 				if (dp == NULL) {
602 					iput(ndp->ni_pdir);
603 					goto bad;
604 				}
605 				/*
606 				 * If directory is "sticky", then user must own
607 				 * the directory, or the file in it, else he
608 				 * may not delete it (unless he's root). This
609 				 * implements append-only directories.
610 				 */
611 				if ((ndp->ni_pdir->i_mode & ISVTX) &&
612 				    u.u_uid != 0 &&
613 				    u.u_uid != ndp->ni_pdir->i_uid &&
614 				    dp->i_uid != u.u_uid) {
615 					iput(ndp->ni_pdir);
616 					u.u_error = EPERM;
617 					goto bad;
618 				}
619 			}
620 		}
621 		FREE(nbp, M_NAMEI);
622 		return (dp);
623 	}
624 
625 	/*
626 	 * Special handling for ".." allowing chdir out of mounted
627 	 * file system: indirect .. in root inode to reevaluate
628 	 * in directory file system was mounted on.
629 	 */
630 	if (isdotdot) {
631 		if (dp == u.u_rdir) {
632 			ndp->ni_dent.d_ino = dp->i_number;
633 			makeentry = 0;
634 		} else if (ndp->ni_dent.d_ino == ROOTINO &&
635 		   dp->i_number == ROOTINO) {
636 			for (i = 1; i < NMOUNT; i++)
637 			if (mount[i].m_fs != NULL &&
638 			   mount[i].m_dev == dp->i_dev) {
639 				iput(dp);
640 				dp = mount[i].m_inodp;
641 				ILOCK(dp);
642 				dp->i_count++;
643 				fs = dp->i_fs;
644 				cp -= 2;     /* back over .. */
645 				goto dirloop2;
646 			}
647 		}
648 	}
649 
650 	/*
651 	 * If rewriting (rename), return the inode and the
652 	 * information required to rewrite the present directory
653 	 * Must get inode of directory entry to verify it's a
654 	 * regular file, or empty directory.
655 	 */
656 	if ((flag == CREATE && lockparent) && *cp == 0) {
657 		if (access(dp, IWRITE))
658 			goto bad;
659 		ndp->ni_pdir = dp;		/* for dirrewrite() */
660 		/*
661 		 * Careful about locking second inode.
662 		 * This can only occur if the target is ".".
663 		 */
664 		if (dp->i_number == ndp->ni_dent.d_ino) {
665 			u.u_error = EISDIR;		/* XXX */
666 			goto bad;
667 		}
668 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
669 		if (dp == NULL) {
670 			iput(ndp->ni_pdir);
671 			goto bad;
672 		}
673 		FREE(nbp, M_NAMEI);
674 		return (dp);
675 	}
676 
677 	/*
678 	 * Check for symbolic link, which may require us to massage the
679 	 * name before we continue translation.  We do not `iput' the
680 	 * directory because we may need it again if the symbolic link
681 	 * is relative to the current directory.  Instead we save it
682 	 * unlocked as "pdp".  We must get the target inode before unlocking
683 	 * the directory to insure that the inode will not be removed
684 	 * before we get it.  We prevent deadlock by always fetching
685 	 * inodes from the root, moving down the directory tree. Thus
686 	 * when following backward pointers ".." we must unlock the
687 	 * parent directory before getting the requested directory.
688 	 * There is a potential race condition here if both the current
689 	 * and parent directories are removed before the `iget' for the
690 	 * inode associated with ".." returns.  We hope that this occurs
691 	 * infrequently since we cannot avoid this race condition without
692 	 * implementing a sophisticated deadlock detection algorithm.
693 	 * Note also that this simple deadlock detection scheme will not
694 	 * work if the file system has any hard links other than ".."
695 	 * that point backwards in the directory structure.
696 	 */
697 	pdp = dp;
698 	if (isdotdot) {
699 		IUNLOCK(pdp);	/* race to get the inode */
700 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
701 		if (dp == NULL)
702 			goto bad2;
703 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
704 		dp->i_count++;	/* we want ourself, ie "." */
705 	} else {
706 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
707 		IUNLOCK(pdp);
708 		if (dp == NULL)
709 			goto bad2;
710 	}
711 
712 	/*
713 	 * Insert name into cache if appropriate.
714 	 */
715 	if (makeentry) {
716 		if (ncp != NULL)
717 			panic("namei: duplicating cache");
718 		/*
719 		 * Free the cache slot at head of lru chain.
720 		 */
721 		if (ncp = nchhead) {
722 			/* remove from lru chain */
723 			*ncp->nc_prev = ncp->nc_nxt;
724 			if (ncp->nc_nxt)
725 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
726 			else
727 				nchtail = ncp->nc_prev;
728 			remque(ncp);		/* remove from old hash chain */
729 			/* grab the inode we just found */
730 			ncp->nc_ip = dp;
731 			/* fill in cache info */
732 			ncp->nc_ino = pdp->i_number;	/* parents inum */
733 			ncp->nc_dev = pdp->i_dev;	/* & device */
734 			ncp->nc_idev = dp->i_dev;	/* our device */
735 			ncp->nc_id = dp->i_id;		/* identifier */
736 			ncp->nc_nlen = ndp->ni_dent.d_namlen;
737 			bcopy(ndp->ni_dent.d_name, ncp->nc_name,
738 			    (unsigned)ncp->nc_nlen);
739 			/* link at end of lru chain */
740 			ncp->nc_nxt = NULL;
741 			ncp->nc_prev = nchtail;
742 			*nchtail = ncp;
743 			nchtail = &ncp->nc_nxt;
744 			/* and insert on hash chain */
745 			insque(ncp, nhp);
746 		}
747 	}
748 
749 haveino:
750 	fs = dp->i_fs;
751 
752 	/*
753 	 * Check for symbolic link
754 	 */
755 	if ((dp->i_mode & IFMT) == IFLNK &&
756 	    ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) {
757 		u_int pathlen = strlen(cp) + 1;
758 
759 		if (dp->i_size + pathlen >= MAXPATHLEN - 1) {
760 			u.u_error = ENAMETOOLONG;
761 			goto bad2;
762 		}
763 		if (++nlink > MAXSYMLINKS) {
764 			u.u_error = ELOOP;
765 			goto bad2;
766 		}
767 		ovbcopy(cp, nbp + dp->i_size, pathlen);
768 		u.u_error =
769 		    rdwri(UIO_READ, dp, nbp, (int)dp->i_size,
770 			(off_t)0, 1, (int *)0);
771 		if (u.u_error)
772 			goto bad2;
773 		cp = nbp;
774 		iput(dp);
775 		if (*cp == '/') {
776 			irele(pdp);
777 			while (*cp == '/')
778 				cp++;
779 			if ((dp = u.u_rdir) == NULL)
780 				dp = rootdir;
781 			ILOCK(dp);
782 			dp->i_count++;
783 		} else {
784 			dp = pdp;
785 			ILOCK(dp);
786 		}
787 		fs = dp->i_fs;
788 		goto dirloop;
789 	}
790 
791 	/*
792 	 * Not a symbolic link.  If more pathname,
793 	 * continue at next component, else return.
794 	 */
795 	if (*cp == '/') {
796 		while (*cp == '/')
797 			cp++;
798 		irele(pdp);
799 		goto dirloop;
800 	}
801 	FREE(nbp, M_NAMEI);
802 	if (lockparent)
803 		ndp->ni_pdir = pdp;
804 	else
805 		irele(pdp);
806 	return (dp);
807 bad2:
808 	irele(pdp);
809 bad:
810 	if (bp)
811 		brelse(bp);
812 	if (dp)
813 		iput(dp);
814 	FREE(nbp, M_NAMEI);
815 	return (NULL);
816 }
817 
818 
819 dirbad(ip, offset, how)
820 	struct inode *ip;
821 	off_t offset;
822 	char *how;
823 {
824 
825 	printf("%s: bad dir ino %d at offset %d: %s\n",
826 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
827 }
828 
829 /*
830  * Do consistency checking on a directory entry:
831  *	record length must be multiple of 4
832  *	entry must fit in rest of its DIRBLKSIZ block
833  *	record must be large enough to contain entry
834  *	name is not longer than MAXNAMLEN
835  *	name must be as long as advertised, and null terminated
836  */
837 dirbadentry(ep, entryoffsetinblock)
838 	register struct direct *ep;
839 	int entryoffsetinblock;
840 {
841 	register int i;
842 
843 	if ((ep->d_reclen & 0x3) != 0 ||
844 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
845 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
846 		return (1);
847 	for (i = 0; i < ep->d_namlen; i++)
848 		if (ep->d_name[i] == '\0')
849 			return (1);
850 	return (ep->d_name[i]);
851 }
852 
853 /*
854  * Write a directory entry after a call to namei, using the parameters
855  * which it left in the u. area.  The argument ip is the inode which
856  * the new directory entry will refer to.  The u. area field ndp->ni_pdir is
857  * a pointer to the directory to be written, which was left locked by
858  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
859  * how the space for the new entry is to be gotten.
860  */
861 direnter(ip, ndp)
862 	struct inode *ip;
863 	register struct nameidata *ndp;
864 {
865 	register struct direct *ep, *nep;
866 	register struct inode *dp = ndp->ni_pdir;
867 	struct buf *bp;
868 	int loc, spacefree, error = 0;
869 	u_int dsize;
870 	int newentrysize;
871 	char *dirbuf;
872 
873 	ndp->ni_dent.d_ino = ip->i_number;
874 	newentrysize = DIRSIZ(&ndp->ni_dent);
875 	if (ndp->ni_count == 0) {
876 		/*
877 		 * If ndp->ni_count is 0, then namei could find no space in the
878 		 * directory. In this case ndp->ni_offset will be on a directory
879 		 * block boundary and we will write the new entry into a fresh
880 		 * block.
881 		 */
882 		if (ndp->ni_offset&(DIRBLKSIZ-1))
883 			panic("wdir: newblk");
884 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
885 		error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
886 		    newentrysize, ndp->ni_offset, 1, (int *)0);
887 		if (DIRBLKSIZ > dp->i_fs->fs_fsize)
888 			panic("wdir: blksize"); /* XXX - should grow w/bmap() */
889 		else
890 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
891 		iput(dp);
892 		return (error);
893 	}
894 
895 	/*
896 	 * If ndp->ni_count is non-zero, then namei found space for the new
897 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
898 	 * in the directory.  To use this space, we may have to compact
899 	 * the entries located there, by copying them together towards
900 	 * the beginning of the block, leaving the free space in
901 	 * one usable chunk at the end.
902 	 */
903 
904 	/*
905 	 * Increase size of directory if entry eats into new space.
906 	 * This should never push the size past a new multiple of
907 	 * DIRBLKSIZE.
908 	 *
909 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
910 	 */
911 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
912 		dp->i_size = ndp->ni_offset + ndp->ni_count;
913 	/*
914 	 * Get the block containing the space for the new directory
915 	 * entry.  Should return error by result instead of u.u_error.
916 	 */
917 	bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf);
918 	if (bp == 0) {
919 		iput(dp);
920 		return (u.u_error);
921 	}
922 	/*
923 	 * Find space for the new entry.  In the simple case, the
924 	 * entry at offset base will have the space.  If it does
925 	 * not, then namei arranged that compacting the region
926 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
927 	 */
928 	ep = (struct direct *)dirbuf;
929 	dsize = DIRSIZ(ep);
930 	spacefree = ep->d_reclen - dsize;
931 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
932 		nep = (struct direct *)(dirbuf + loc);
933 		if (ep->d_ino) {
934 			/* trim the existing slot */
935 			ep->d_reclen = dsize;
936 			ep = (struct direct *)((char *)ep + dsize);
937 		} else {
938 			/* overwrite; nothing there; header is ours */
939 			spacefree += dsize;
940 		}
941 		dsize = DIRSIZ(nep);
942 		spacefree += nep->d_reclen - dsize;
943 		loc += nep->d_reclen;
944 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
945 	}
946 	/*
947 	 * Update the pointer fields in the previous entry (if any),
948 	 * copy in the new entry, and write out the block.
949 	 */
950 	if (ep->d_ino == 0) {
951 		if (spacefree + dsize < newentrysize)
952 			panic("wdir: compact1");
953 		ndp->ni_dent.d_reclen = spacefree + dsize;
954 	} else {
955 		if (spacefree < newentrysize)
956 			panic("wdir: compact2");
957 		ndp->ni_dent.d_reclen = spacefree;
958 		ep->d_reclen = dsize;
959 		ep = (struct direct *)((char *)ep + dsize);
960 	}
961 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
962 	bwrite(bp);
963 	dp->i_flag |= IUPD|ICHG;
964 	if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
965 		itrunc(dp, (u_long)ndp->ni_endoff);
966 	iput(dp);
967 	return (error);
968 }
969 
970 /*
971  * Remove a directory entry after a call to namei, using the
972  * parameters which it left in the u. area.  The u. entry
973  * ni_offset contains the offset into the directory of the
974  * entry to be eliminated.  The ni_count field contains the
975  * size of the previous record in the directory.  If this
976  * is 0, the first entry is being deleted, so we need only
977  * zero the inode number to mark the entry as free.  If the
978  * entry isn't the first in the directory, we must reclaim
979  * the space of the now empty record by adding the record size
980  * to the size of the previous entry.
981  */
982 dirremove(ndp)
983 	register struct nameidata *ndp;
984 {
985 	register struct inode *dp = ndp->ni_pdir;
986 	register struct buf *bp;
987 	struct direct *ep;
988 
989 	if (ndp->ni_count == 0) {
990 		/*
991 		 * First entry in block: set d_ino to zero.
992 		 */
993 		ndp->ni_dent.d_ino = 0;
994 		(void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
995 		    (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
996 	} else {
997 		/*
998 		 * Collapse new free space into previous entry.
999 		 */
1000 		bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep);
1001 		if (bp == 0)
1002 			return (0);
1003 		ep->d_reclen += ndp->ni_dent.d_reclen;
1004 		bwrite(bp);
1005 		dp->i_flag |= IUPD|ICHG;
1006 	}
1007 	return (1);
1008 }
1009 
1010 /*
1011  * Rewrite an existing directory entry to point at the inode
1012  * supplied.  The parameters describing the directory entry are
1013  * set up by a call to namei.
1014  */
1015 dirrewrite(dp, ip, ndp)
1016 	struct inode *dp, *ip;
1017 	struct nameidata *ndp;
1018 {
1019 
1020 	ndp->ni_dent.d_ino = ip->i_number;
1021 	u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1022 		(int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1023 	iput(dp);
1024 }
1025 
1026 /*
1027  * Return buffer with contents of block "offset"
1028  * from the beginning of directory "ip".  If "res"
1029  * is non-zero, fill it in with a pointer to the
1030  * remaining space in the directory.
1031  */
1032 struct buf *
1033 blkatoff(ip, offset, res)
1034 	struct inode *ip;
1035 	off_t offset;
1036 	char **res;
1037 {
1038 	register struct fs *fs = ip->i_fs;
1039 	daddr_t lbn = lblkno(fs, offset);
1040 	int bsize = blksize(fs, ip, lbn);
1041 	register struct buf *bp;
1042 	daddr_t bn;
1043 
1044 	bn = bmap(ip, lbn, B_READ, bsize);
1045 	if (u.u_error)
1046 		return (0);
1047 	if (bn == (daddr_t)-1) {
1048 		dirbad(ip, offset, "hole in dir");
1049 		return (0);
1050 	}
1051 	bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize);
1052 	if (bp->b_flags & B_ERROR) {
1053 		brelse(bp);
1054 		return (0);
1055 	}
1056 	if (res)
1057 		*res = bp->b_un.b_addr + blkoff(fs, offset);
1058 	return (bp);
1059 }
1060 
1061 /*
1062  * Check if a directory is empty or not.
1063  * Inode supplied must be locked.
1064  *
1065  * Using a struct dirtemplate here is not precisely
1066  * what we want, but better than using a struct direct.
1067  *
1068  * NB: does not handle corrupted directories.
1069  */
1070 dirempty(ip, parentino)
1071 	register struct inode *ip;
1072 	ino_t parentino;
1073 {
1074 	register off_t off;
1075 	struct dirtemplate dbuf;
1076 	register struct direct *dp = (struct direct *)&dbuf;
1077 	int error, count;
1078 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1079 
1080 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1081 		error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
1082 		    off, 1, &count);
1083 		/*
1084 		 * Since we read MINDIRSIZ, residual must
1085 		 * be 0 unless we're at end of file.
1086 		 */
1087 		if (error || count != 0)
1088 			return (0);
1089 		/* avoid infinite loops */
1090 		if (dp->d_reclen == 0)
1091 			return (0);
1092 		/* skip empty entries */
1093 		if (dp->d_ino == 0)
1094 			continue;
1095 		/* accept only "." and ".." */
1096 		if (dp->d_namlen > 2)
1097 			return (0);
1098 		if (dp->d_name[0] != '.')
1099 			return (0);
1100 		/*
1101 		 * At this point d_namlen must be 1 or 2.
1102 		 * 1 implies ".", 2 implies ".." if second
1103 		 * char is also "."
1104 		 */
1105 		if (dp->d_namlen == 1)
1106 			continue;
1107 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1108 			continue;
1109 		return (0);
1110 	}
1111 	return (1);
1112 }
1113 
1114 /*
1115  * Check if source directory is in the path of the target directory.
1116  * Target is supplied locked, source is unlocked.
1117  * The target is always iput() before returning.
1118  */
1119 checkpath(source, target)
1120 	struct inode *source, *target;
1121 {
1122 	struct dirtemplate dirbuf;
1123 	register struct inode *ip;
1124 	int error = 0;
1125 
1126 	ip = target;
1127 	if (ip->i_number == source->i_number) {
1128 		error = EEXIST;
1129 		goto out;
1130 	}
1131 	if (ip->i_number == ROOTINO)
1132 		goto out;
1133 
1134 	for (;;) {
1135 		if ((ip->i_mode&IFMT) != IFDIR) {
1136 			error = ENOTDIR;
1137 			break;
1138 		}
1139 		error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
1140 			sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
1141 		if (error != 0)
1142 			break;
1143 		if (dirbuf.dotdot_namlen != 2 ||
1144 		    dirbuf.dotdot_name[0] != '.' ||
1145 		    dirbuf.dotdot_name[1] != '.') {
1146 			error = ENOTDIR;
1147 			break;
1148 		}
1149 		if (dirbuf.dotdot_ino == source->i_number) {
1150 			error = EINVAL;
1151 			break;
1152 		}
1153 		if (dirbuf.dotdot_ino == ROOTINO)
1154 			break;
1155 		iput(ip);
1156 		ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
1157 		if (ip == NULL) {
1158 			error = u.u_error;
1159 			break;
1160 		}
1161 	}
1162 
1163 out:
1164 	if (error == ENOTDIR)
1165 		printf("checkpath: .. not a directory\n");
1166 	if (ip != NULL)
1167 		iput(ip);
1168 	return (error);
1169 }
1170 
1171 /*
1172  * Name cache initialization, from main() when we are booting
1173  */
1174 nchinit()
1175 {
1176 	register union nchash *nchp;
1177 	register struct namecache *ncp;
1178 
1179 	nchhead = 0;
1180 	nchtail = &nchhead;
1181 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) {
1182 		ncp->nc_forw = ncp;			/* hash chain */
1183 		ncp->nc_back = ncp;
1184 		ncp->nc_nxt = NULL;			/* lru chain */
1185 		*nchtail = ncp;
1186 		ncp->nc_prev = nchtail;
1187 		nchtail = &ncp->nc_nxt;
1188 		/* all else is zero already */
1189 	}
1190 	for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
1191 		nchp->nch_head[0] = nchp;
1192 		nchp->nch_head[1] = nchp;
1193 	}
1194 }
1195 
1196 /*
1197  * Cache flush, called when filesys is umounted to
1198  * remove entries that would now be invalid
1199  *
1200  * The line "nxtcp = nchhead" near the end is to avoid potential problems
1201  * if the cache lru chain is modified while we are dumping the
1202  * inode.  This makes the algorithm O(n^2), but do you think I care?
1203  */
1204 nchinval(dev)
1205 	register dev_t dev;
1206 {
1207 	register struct namecache *ncp, *nxtcp;
1208 
1209 	for (ncp = nchhead; ncp; ncp = nxtcp) {
1210 		nxtcp = ncp->nc_nxt;
1211 		if (ncp->nc_ip == NULL ||
1212 		    (ncp->nc_idev != dev && ncp->nc_dev != dev))
1213 			continue;
1214 		/* free the resources we had */
1215 		ncp->nc_idev = NODEV;
1216 		ncp->nc_dev = NODEV;
1217 		ncp->nc_id = NULL;
1218 		ncp->nc_ino = 0;
1219 		ncp->nc_ip = NULL;
1220 		remque(ncp);		/* remove entry from its hash chain */
1221 		ncp->nc_forw = ncp;	/* and make a dummy one */
1222 		ncp->nc_back = ncp;
1223 		/* delete this entry from LRU chain */
1224 		*ncp->nc_prev = nxtcp;
1225 		if (nxtcp)
1226 			nxtcp->nc_prev = ncp->nc_prev;
1227 		else
1228 			nchtail = ncp->nc_prev;
1229 		/* cause rescan of list, it may have altered */
1230 		nxtcp = nchhead;
1231 		/* put the now-free entry at head of LRU */
1232 		ncp->nc_nxt = nxtcp;
1233 		ncp->nc_prev = &nchhead;
1234 		nxtcp->nc_prev = &ncp->nc_nxt;
1235 		nchhead = ncp;
1236 	}
1237 }
1238 
1239 /*
1240  * Name cache invalidation of all entries.
1241  */
1242 cacheinvalall()
1243 {
1244 	register struct namecache *ncp;
1245 
1246 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++)
1247 		ncp->nc_id = 0;
1248 }
1249