xref: /original-bsd/sys/ufs/ffs/ufs_lookup.c (revision 3d751ffd)
1 /*
2  * Copyright (c) 1982, 1986 Regents of the University of California.
3  * All rights reserved.  The Berkeley software License Agreement
4  * specifies the terms and conditions for redistribution.
5  *
6  *	@(#)ufs_lookup.c	7.5 (Berkeley) 01/12/89
7  */
8 
9 #include "param.h"
10 #include "systm.h"
11 #include "inode.h"
12 #include "fs.h"
13 #include "mount.h"
14 #include "dir.h"
15 #include "user.h"
16 #include "buf.h"
17 #include "conf.h"
18 #include "uio.h"
19 #include "kernel.h"
20 #include "malloc.h"
21 
22 struct	buf *blkatoff();
23 int	dirchk = 0;
24 
25 /*
26  * Structures associated with name cacheing.
27  */
28 #define	NCHHASH		32	/* size of hash table */
29 
30 #if	((NCHHASH)&((NCHHASH)-1)) != 0
31 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
32 #else
33 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
34 #endif
35 
36 union nchash {
37 	union	nchash *nch_head[2];
38 	struct	namecache *nch_chain[2];
39 } nchash[NCHHASH];
40 #define	nch_forw	nch_chain[0]
41 #define	nch_back	nch_chain[1]
42 
43 struct	namecache *nchhead, **nchtail;	/* LRU chain pointers */
44 struct	nchstats nchstats;		/* cache effectiveness statistics */
45 
46 /*
47  * Convert a pathname into a pointer to a locked inode.
48  * This is a very central and rather complicated routine.
49  * If the file system is not maintained in a strict tree hierarchy,
50  * this can result in a deadlock situation (see comments in code below).
51  *
52  * The flag argument is LOOKUP, CREATE, or DELETE depending on whether
53  * the name is to be looked up, created, or deleted. When CREATE or
54  * DELETE is specified, information usable in creating or deleteing a
55  * directory entry is also calculated. If flag has LOCKPARENT or'ed
56  * into it and the target of the pathname exists, namei returns both
57  * the target and its parent directory locked. When creating and
58  * LOCKPARENT is specified, the target may not be ".".  When deleting
59  * and LOCKPARENT is specified, the target may be ".", but the caller
60  * must check to insure it does an irele and iput instead of two iputs.
61  *
62  * The FOLLOW flag is set when symbolic links are to be followed
63  * when they occur at the end of the name translation process.
64  * Symbolic links are always followed for all other pathname
65  * components other than the last.
66  *
67  * The segflg defines whether the name is to be copied from user
68  * space or kernel space.
69  *
70  * Name caching works as follows:
71  *
72  * Names found by directory scans are retained in a cache
73  * for future reference.  It is managed LRU, so frequently
74  * used names will hang around.  Cache is indexed by hash value
75  * obtained from (ino,dev,name) where ino & dev refer to the
76  * directory containing name.
77  *
78  * For simplicity (and economy of storage), names longer than
79  * a maximum length of NCHNAMLEN are not cached; they occur
80  * infrequently in any case, and are almost never of interest.
81  *
82  * Upon reaching the last segment of a path, if the reference
83  * is for DELETE, or NOCACHE is set (rewrite), and the
84  * name is located in the cache, it will be dropped.
85  *
86  * Overall outline of namei:
87  *
88  *	copy in name
89  *	get starting directory
90  * dirloop:
91  *	check accessibility of directory
92  * dirloop2:
93  *	copy next component of name to ndp->ni_dent
94  *	handle degenerate case where name is null string
95  *	look for name in cache, if found, then if at end of path
96  *	  and deleting or creating, drop it, else to haveino
97  *	search for name in directory, to found or notfound
98  * notfound:
99  *	if creating, return locked directory, leaving info on avail. slots
100  *	else return error
101  * found:
102  *	if at end of path and deleting, return information to allow delete
103  *	if at end of path and rewriting (CREATE and LOCKPARENT), lock target
104  *	  inode and return info to allow rewrite
105  *	if .. and on mounted filesys, look in mount table for parent
106  *	if not at end, add name to cache; if at end and neither creating
107  *	  nor deleting, add name to cache
108  * haveino:
109  *	if symbolic link, massage name in buffer and continue at dirloop
110  *	if more components of name, do next level at dirloop
111  *	return the answer as locked inode
112  *
113  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
114  *	 but unlocked.
115  */
116 struct inode *
117 namei(ndp)
118 	register struct nameidata *ndp;
119 {
120 	register char *cp;		/* pointer into pathname argument */
121 /* these variables refer to things which must be freed or unlocked */
122 	register struct inode *dp = 0;	/* the directory we are searching */
123 	register struct namecache *ncp;	/* cache slot for entry */
124 	register struct fs *fs;		/* file system that directory is in */
125 	register struct buf *bp = 0;	/* a buffer of directory entries */
126 	register struct direct *ep;	/* the current directory entry */
127 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
128 	register caddr_t nbp;		/* buffer storing path name argument */
129 /* these variables hold information about the search for a slot */
130 	enum {NONE, COMPACT, FOUND} slotstatus;
131 	int slotoffset = -1;		/* offset of area with free space */
132 	int slotsize;			/* size of area at slotoffset */
133 	int slotfreespace;		/* amount of space free in slot */
134 	int slotneeded;			/* size of the entry we're seeking */
135 /* */
136 	int numdirpasses;		/* strategy for directory search */
137 	int endsearch;			/* offset to end directory search */
138 	int prevoff;			/* ndp->ni_offset of previous entry */
139 	int nlink = 0;			/* number of symbolic links taken */
140 	struct inode *pdp;		/* saved dp during symlink work */
141 	int error, i;
142 	int lockparent;
143 	int docache;			/* == 0 do not cache last component */
144 	int makeentry;			/* != 0 if name to be added to cache */
145 	unsigned hash;			/* value of name hash for entry */
146 	union nchash *nhp;		/* cache chain head for entry */
147 	int isdotdot;			/* != 0 if current name is ".." */
148 	int flag;			/* op ie, LOOKUP, CREATE, or DELETE */
149 	off_t enduseful;		/* pointer past last used dir slot */
150 
151 	lockparent = ndp->ni_nameiop & LOCKPARENT;
152 	docache = (ndp->ni_nameiop & NOCACHE) ^ NOCACHE;
153 	flag = ndp->ni_nameiop &~ (LOCKPARENT|NOCACHE|FOLLOW);
154 	if (flag == DELETE || lockparent)
155 		docache = 0;
156 	/*
157 	 * Get a buffer for the name to be translated, and copy the
158 	 * name into the buffer.
159 	 */
160 	MALLOC(nbp, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
161 	if (ndp->ni_segflg == UIO_SYSSPACE)
162 		error = copystr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0);
163 	else
164 		error = copyinstr(ndp->ni_dirp, nbp, MAXPATHLEN, (u_int *)0);
165 	if (error) {
166 		u.u_error = error;
167 		goto bad;
168 	}
169 
170 	/*
171 	 * Get starting directory.
172 	 */
173 	cp = nbp;
174 	if (*cp == '/') {
175 		while (*cp == '/')
176 			cp++;
177 		if ((dp = u.u_rdir) == NULL)
178 			dp = rootdir;
179 	} else
180 		dp = u.u_cdir;
181 	fs = dp->i_fs;
182 	ILOCK(dp);
183 	dp->i_count++;
184 	ndp->ni_pdir = (struct inode *)0xc0000000;		/* illegal */
185 	ndp->ni_endoff = 0;
186 
187 	/*
188 	 * We come to dirloop to search a new directory.
189 	 * The directory must be locked so that it can be
190 	 * iput, and fs must be already set to dp->i_fs.
191 	 */
192 dirloop:
193 	/*
194 	 * Check accessiblity of directory.
195 	 */
196 	if ((dp->i_mode&IFMT) != IFDIR) {
197 		u.u_error = ENOTDIR;
198 		goto bad;
199 	}
200 	if (access(dp, IEXEC))
201 		goto bad;
202 
203 dirloop2:
204 	/*
205 	 * Copy next component of name to ndp->ni_dent.
206 	 */
207 	hash = 0;
208 	for (i = 0; *cp != 0 && *cp != '/'; cp++) {
209 		if (i >= MAXNAMLEN) {
210 			u.u_error = ENAMETOOLONG;
211 			goto bad;
212 		}
213 		if (*cp & 0200)
214 			if ((*cp&0377) == ('/'|0200) || flag != DELETE) {
215 				u.u_error = EINVAL;
216 				goto bad;
217 			}
218 		ndp->ni_dent.d_name[i++] = *cp;
219 		hash += (unsigned char)*cp * i;
220 	}
221 	ndp->ni_dent.d_namlen = i;
222 	ndp->ni_dent.d_name[i] = '\0';
223 	isdotdot = (i == 2 &&
224 		ndp->ni_dent.d_name[0] == '.' && ndp->ni_dent.d_name[1] == '.');
225 	makeentry = 1;
226 	if (*cp == '\0' && docache == 0)
227 		makeentry = 0;
228 
229 	/*
230 	 * Check for degenerate name (e.g. / or "")
231 	 * which is a way of talking about a directory,
232 	 * e.g. like "/." or ".".
233 	 */
234 	if (ndp->ni_dent.d_name[0] == '\0') {
235 		if (flag != LOOKUP || lockparent) {
236 			u.u_error = EISDIR;
237 			goto bad;
238 		}
239 		FREE(nbp, M_NAMEI);
240 		return (dp);
241 	}
242 
243 	/*
244 	 * Special handling for ".." allowing chdir out of mounted
245 	 * file system: indirect .. in root inode to reevaluate
246 	 * in directory file system was mounted on.
247 	 */
248 	if (isdotdot) {
249 		for (;;) {
250 			if (dp == u.u_rdir || dp == rootdir) {
251 				ndp->ni_dent.d_ino = dp->i_number;
252 				pdp = dp;
253 				dp->i_count++;
254 				goto haveino;
255 			}
256 			if (dp->i_number != ROOTINO)
257 				break;
258 			for (i = 1; i < NMOUNT; i++) {
259 				if (mount[i].m_fs != NULL &&
260 				    mount[i].m_dev == dp->i_dev) {
261 					iput(dp);
262 					dp = mount[i].m_inodp;
263 					ILOCK(dp);
264 					dp->i_count++;
265 					fs = dp->i_fs;
266 					break;
267 				}
268 			}
269 		}
270 	}
271 
272 	/*
273 	 * We now have a segment name to search for, and a directory to search.
274 	 *
275 	 * Before tediously performing a linear scan of the directory,
276 	 * check the name cache to see if the directory/name pair
277 	 * we are looking for is known already.  We don't do this
278 	 * if the segment name is long, simply so the cache can avoid
279 	 * holding long names (which would either waste space, or
280 	 * add greatly to the complexity).
281 	 */
282 	if (ndp->ni_dent.d_namlen > NCHNAMLEN) {
283 		nchstats.ncs_long++;
284 		makeentry = 0;
285 	} else {
286 		nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
287 		for (ncp = nhp->nch_forw; ncp != (struct namecache *)nhp;
288 		    ncp = ncp->nc_forw) {
289 			if (ncp->nc_ino == dp->i_number &&
290 			    ncp->nc_dev == dp->i_dev &&
291 			    ncp->nc_nlen == ndp->ni_dent.d_namlen &&
292 			    !bcmp(ncp->nc_name, ndp->ni_dent.d_name,
293 				(unsigned)ncp->nc_nlen))
294 				break;
295 		}
296 		if (ncp == (struct namecache *)nhp) {
297 			nchstats.ncs_miss++;
298 			ncp = NULL;
299 		} else {
300 			if (ncp->nc_id != ncp->nc_ip->i_id)
301 				nchstats.ncs_falsehits++;
302 			else if (!makeentry)
303 				nchstats.ncs_badhits++;
304 			else {
305 				/*
306 				 * move this slot to end of LRU
307 				 * chain, if not already there
308 				 */
309 				if (ncp->nc_nxt) {
310 					/* remove from LRU chain */
311 					*ncp->nc_prev = ncp->nc_nxt;
312 					ncp->nc_nxt->nc_prev = ncp->nc_prev;
313 
314 					/* and replace at end of it */
315 					ncp->nc_nxt = NULL;
316 					ncp->nc_prev = nchtail;
317 					*nchtail = ncp;
318 					nchtail = &ncp->nc_nxt;
319 				}
320 
321 				/*
322 				 * Get the next inode in the path.
323 				 * See comment above other `IUNLOCK' code for
324 				 * an explaination of the locking protocol.
325 				 */
326 				pdp = dp;
327 				if (!isdotdot || dp != u.u_rdir)
328 					dp = ncp->nc_ip;
329 				if (dp == NULL)
330 					panic("namei: null cache ino");
331 				if (pdp == dp)
332 					dp->i_count++;
333 				else if (isdotdot) {
334 					IUNLOCK(pdp);
335 					igrab(dp);
336 				} else {
337 					igrab(dp);
338 					IUNLOCK(pdp);
339 				}
340 
341 				/*
342 				 * Verify that the inode that we got
343 				 * did not change while we were waiting
344 				 * for it to be locked.
345 				 */
346 				if (ncp->nc_id != ncp->nc_ip->i_id) {
347 					iput(dp);
348 					ILOCK(pdp);
349 					dp = pdp;
350 					nchstats.ncs_falsehits++;
351 				} else {
352 					ndp->ni_dent.d_ino = dp->i_number;
353 					/* ni_dent.d_reclen is garbage ... */
354 					nchstats.ncs_goodhits++;
355 					goto haveino;
356 				}
357 			}
358 
359 			/*
360 			 * Last component and we are renaming or deleting,
361 			 * the cache entry is invalid, or otherwise don't
362 			 * want cache entry to exist.
363 			 */
364 			/* remove from LRU chain */
365 			*ncp->nc_prev = ncp->nc_nxt;
366 			if (ncp->nc_nxt)
367 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
368 			else
369 				nchtail = ncp->nc_prev;
370 			remque(ncp);		/* remove from hash chain */
371 			/* insert at head of LRU list (first to grab) */
372 			ncp->nc_nxt = nchhead;
373 			ncp->nc_prev = &nchhead;
374 			nchhead->nc_prev = &ncp->nc_nxt;
375 			nchhead = ncp;
376 			/* and make a dummy hash chain */
377 			ncp->nc_forw = ncp;
378 			ncp->nc_back = ncp;
379 			ncp = NULL;
380 		}
381 	}
382 
383 	/*
384 	 * Suppress search for slots unless creating
385 	 * file and at end of pathname, in which case
386 	 * we watch for a place to put the new file in
387 	 * case it doesn't already exist.
388 	 */
389 	slotstatus = FOUND;
390 	if (flag == CREATE && *cp == 0) {
391 		slotstatus = NONE;
392 		slotfreespace = 0;
393 		slotneeded = DIRSIZ(&ndp->ni_dent);
394 	}
395 	/*
396 	 * If this is the same directory that this process
397 	 * previously searched, pick up where we last left off.
398 	 * We cache only lookups as these are the most common
399 	 * and have the greatest payoff. Caching CREATE has little
400 	 * benefit as it usually must search the entire directory
401 	 * to determine that the entry does not exist. Caching the
402 	 * location of the last DELETE has not reduced profiling time
403 	 * and hence has been removed in the interest of simplicity.
404 	 */
405 	if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
406 	    dp->i_dev != u.u_ncache.nc_dev) {
407 		ndp->ni_offset = 0;
408 		numdirpasses = 1;
409 	} else {
410 		if (u.u_ncache.nc_prevoffset > dp->i_size)
411 			u.u_ncache.nc_prevoffset = 0;
412 		ndp->ni_offset = u.u_ncache.nc_prevoffset;
413 		entryoffsetinblock = blkoff(fs, ndp->ni_offset);
414 		if (entryoffsetinblock != 0) {
415 			bp = blkatoff(dp, ndp->ni_offset, (char **)0);
416 			if (bp == 0)
417 				goto bad;
418 		}
419 		numdirpasses = 2;
420 		nchstats.ncs_2passes++;
421 	}
422 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
423 	enduseful = 0;
424 
425 searchloop:
426 	while (ndp->ni_offset < endsearch) {
427 		/*
428 		 * If offset is on a block boundary,
429 		 * read the next directory block.
430 		 * Release previous if it exists.
431 		 */
432 		if (blkoff(fs, ndp->ni_offset) == 0) {
433 			if (bp != NULL)
434 				brelse(bp);
435 			bp = blkatoff(dp, ndp->ni_offset, (char **)0);
436 			if (bp == 0)
437 				goto bad;
438 			entryoffsetinblock = 0;
439 		}
440 		/*
441 		 * If still looking for a slot, and at a DIRBLKSIZE
442 		 * boundary, have to start looking for free space again.
443 		 */
444 		if (slotstatus == NONE &&
445 		    (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
446 			slotoffset = -1;
447 			slotfreespace = 0;
448 		}
449 		/*
450 		 * Get pointer to next entry.
451 		 * Full validation checks are slow, so we only check
452 		 * enough to insure forward progress through the
453 		 * directory. Complete checks can be run by patching
454 		 * "dirchk" to be true.
455 		 */
456 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
457 		if (ep->d_reclen == 0 ||
458 		    dirchk && dirbadentry(ep, entryoffsetinblock)) {
459 			dirbad(dp, ndp->ni_offset, "mangled entry");
460 			i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
461 			ndp->ni_offset += i;
462 			entryoffsetinblock += i;
463 			continue;
464 		}
465 
466 		/*
467 		 * If an appropriate sized slot has not yet been found,
468 		 * check to see if one is available. Also accumulate space
469 		 * in the current block so that we can determine if
470 		 * compaction is viable.
471 		 */
472 		if (slotstatus != FOUND) {
473 			int size = ep->d_reclen;
474 
475 			if (ep->d_ino != 0)
476 				size -= DIRSIZ(ep);
477 			if (size > 0) {
478 				if (size >= slotneeded) {
479 					slotstatus = FOUND;
480 					slotoffset = ndp->ni_offset;
481 					slotsize = ep->d_reclen;
482 				} else if (slotstatus == NONE) {
483 					slotfreespace += size;
484 					if (slotoffset == -1)
485 						slotoffset = ndp->ni_offset;
486 					if (slotfreespace >= slotneeded) {
487 						slotstatus = COMPACT;
488 						slotsize = ndp->ni_offset +
489 						      ep->d_reclen - slotoffset;
490 					}
491 				}
492 			}
493 		}
494 
495 		/*
496 		 * Check for a name match.
497 		 */
498 		if (ep->d_ino) {
499 			if (ep->d_namlen == ndp->ni_dent.d_namlen &&
500 			    !bcmp(ndp->ni_dent.d_name, ep->d_name,
501 				(unsigned)ep->d_namlen))
502 				goto found;
503 		}
504 		prevoff = ndp->ni_offset;
505 		ndp->ni_offset += ep->d_reclen;
506 		entryoffsetinblock += ep->d_reclen;
507 		if (ep->d_ino)
508 			enduseful = ndp->ni_offset;
509 	}
510 /* notfound: */
511 	/*
512 	 * If we started in the middle of the directory and failed
513 	 * to find our target, we must check the beginning as well.
514 	 */
515 	if (numdirpasses == 2) {
516 		numdirpasses--;
517 		ndp->ni_offset = 0;
518 		endsearch = u.u_ncache.nc_prevoffset;
519 		goto searchloop;
520 	}
521 	/*
522 	 * If creating, and at end of pathname and current
523 	 * directory has not been removed, then can consider
524 	 * allowing file to be created.
525 	 */
526 	if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
527 		/*
528 		 * Access for write is interpreted as allowing
529 		 * creation of files in the directory.
530 		 */
531 		if (access(dp, IWRITE))
532 			goto bad;
533 		/*
534 		 * Return an indication of where the new directory
535 		 * entry should be put.  If we didn't find a slot,
536 		 * then set ndp->ni_count to 0 indicating that the new
537 		 * slot belongs at the end of the directory. If we found
538 		 * a slot, then the new entry can be put in the range
539 		 * [ndp->ni_offset .. ndp->ni_offset + ndp->ni_count)
540 		 */
541 		if (slotstatus == NONE) {
542 			ndp->ni_offset = roundup(dp->i_size, DIRBLKSIZ);
543 			ndp->ni_count = 0;
544 			enduseful = ndp->ni_offset;
545 		} else {
546 			ndp->ni_offset = slotoffset;
547 			ndp->ni_count = slotsize;
548 			if (enduseful < slotoffset + slotsize)
549 				enduseful = slotoffset + slotsize;
550 		}
551 		ndp->ni_endoff = roundup(enduseful, DIRBLKSIZ);
552 		dp->i_flag |= IUPD|ICHG;
553 		if (bp)
554 			brelse(bp);
555 		FREE(nbp, M_NAMEI);
556 		/*
557 		 * We return with the directory locked, so that
558 		 * the parameters we set up above will still be
559 		 * valid if we actually decide to do a direnter().
560 		 * We return NULL to indicate that the entry doesn't
561 		 * currently exist, leaving a pointer to the (locked)
562 		 * directory inode in ndp->ni_pdir.
563 		 */
564 		ndp->ni_pdir = dp;
565 		return (NULL);
566 	}
567 	u.u_error = ENOENT;
568 	goto bad;
569 found:
570 	if (numdirpasses == 2)
571 		nchstats.ncs_pass2++;
572 	/*
573 	 * Check that directory length properly reflects presence
574 	 * of this entry.
575 	 */
576 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
577 		dirbad(dp, ndp->ni_offset, "i_size too small");
578 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
579 		dp->i_flag |= IUPD|ICHG;
580 	}
581 
582 	/*
583 	 * Found component in pathname.
584 	 * If the final component of path name, save information
585 	 * in the cache as to where the entry was found.
586 	 */
587 	if (*cp == '\0' && flag == LOOKUP) {
588 		u.u_ncache.nc_prevoffset = ndp->ni_offset &~ (DIRBLKSIZ - 1);
589 		u.u_ncache.nc_inumber = dp->i_number;
590 		u.u_ncache.nc_dev = dp->i_dev;
591 	}
592 	/*
593 	 * Save directory entry's inode number and reclen in ndp->ni_dent,
594 	 * and release directory buffer.
595 	 */
596 	ndp->ni_dent.d_ino = ep->d_ino;
597 	ndp->ni_dent.d_reclen = ep->d_reclen;
598 	brelse(bp);
599 	bp = NULL;
600 
601 	/*
602 	 * If deleting, and at end of pathname, return
603 	 * parameters which can be used to remove file.
604 	 * If the lockparent flag isn't set, we return only
605 	 * the directory (in ndp->ni_pdir), otherwise we go
606 	 * on and lock the inode, being careful with ".".
607 	 */
608 	if (flag == DELETE && *cp == 0) {
609 		/*
610 		 * Write access to directory required to delete files.
611 		 */
612 		if (access(dp, IWRITE))
613 			goto bad;
614 		ndp->ni_pdir = dp;		/* for dirremove() */
615 		/*
616 		 * Return pointer to current entry in ndp->ni_offset,
617 		 * and distance past previous entry (if there
618 		 * is a previous entry in this block) in ndp->ni_count.
619 		 * Save directory inode pointer in ndp->ni_pdir for dirremove().
620 		 */
621 		if ((ndp->ni_offset&(DIRBLKSIZ-1)) == 0)
622 			ndp->ni_count = 0;
623 		else
624 			ndp->ni_count = ndp->ni_offset - prevoff;
625 		if (lockparent) {
626 			if (dp->i_number == ndp->ni_dent.d_ino)
627 				dp->i_count++;
628 			else {
629 				dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
630 				if (dp == NULL) {
631 					iput(ndp->ni_pdir);
632 					goto bad;
633 				}
634 				/*
635 				 * If directory is "sticky", then user must own
636 				 * the directory, or the file in it, else he
637 				 * may not delete it (unless he's root). This
638 				 * implements append-only directories.
639 				 */
640 				if ((ndp->ni_pdir->i_mode & ISVTX) &&
641 				    u.u_uid != 0 &&
642 				    u.u_uid != ndp->ni_pdir->i_uid &&
643 				    dp->i_uid != u.u_uid) {
644 					iput(ndp->ni_pdir);
645 					u.u_error = EPERM;
646 					goto bad;
647 				}
648 			}
649 		}
650 		FREE(nbp, M_NAMEI);
651 		return (dp);
652 	}
653 
654 	/*
655 	 * If rewriting (rename), return the inode and the
656 	 * information required to rewrite the present directory
657 	 * Must get inode of directory entry to verify it's a
658 	 * regular file, or empty directory.
659 	 */
660 	if ((flag == CREATE && lockparent) && *cp == 0) {
661 		if (access(dp, IWRITE))
662 			goto bad;
663 		ndp->ni_pdir = dp;		/* for dirrewrite() */
664 		/*
665 		 * Careful about locking second inode.
666 		 * This can only occur if the target is ".".
667 		 */
668 		if (dp->i_number == ndp->ni_dent.d_ino) {
669 			u.u_error = EISDIR;		/* XXX */
670 			goto bad;
671 		}
672 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
673 		if (dp == NULL) {
674 			iput(ndp->ni_pdir);
675 			goto bad;
676 		}
677 		FREE(nbp, M_NAMEI);
678 		return (dp);
679 	}
680 
681 	/*
682 	 * Check for symbolic link, which may require us to massage the
683 	 * name before we continue translation.  We do not `iput' the
684 	 * directory because we may need it again if the symbolic link
685 	 * is relative to the current directory.  Instead we save it
686 	 * unlocked as "pdp".  We must get the target inode before unlocking
687 	 * the directory to insure that the inode will not be removed
688 	 * before we get it.  We prevent deadlock by always fetching
689 	 * inodes from the root, moving down the directory tree. Thus
690 	 * when following backward pointers ".." we must unlock the
691 	 * parent directory before getting the requested directory.
692 	 * There is a potential race condition here if both the current
693 	 * and parent directories are removed before the `iget' for the
694 	 * inode associated with ".." returns.  We hope that this occurs
695 	 * infrequently since we cannot avoid this race condition without
696 	 * implementing a sophisticated deadlock detection algorithm.
697 	 * Note also that this simple deadlock detection scheme will not
698 	 * work if the file system has any hard links other than ".."
699 	 * that point backwards in the directory structure.
700 	 */
701 	pdp = dp;
702 	if (isdotdot) {
703 		IUNLOCK(pdp);	/* race to get the inode */
704 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
705 		if (dp == NULL)
706 			goto bad2;
707 	} else if (dp->i_number == ndp->ni_dent.d_ino) {
708 		dp->i_count++;	/* we want ourself, ie "." */
709 	} else {
710 		dp = iget(dp->i_dev, fs, ndp->ni_dent.d_ino);
711 		IUNLOCK(pdp);
712 		if (dp == NULL)
713 			goto bad2;
714 	}
715 
716 	/*
717 	 * Insert name into cache if appropriate.
718 	 */
719 	if (makeentry) {
720 		if (ncp != NULL)
721 			panic("namei: duplicating cache");
722 		/*
723 		 * Free the cache slot at head of lru chain.
724 		 */
725 		if (ncp = nchhead) {
726 			/* remove from lru chain */
727 			*ncp->nc_prev = ncp->nc_nxt;
728 			if (ncp->nc_nxt)
729 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
730 			else
731 				nchtail = ncp->nc_prev;
732 			remque(ncp);		/* remove from old hash chain */
733 			/* grab the inode we just found */
734 			ncp->nc_ip = dp;
735 			/* fill in cache info */
736 			ncp->nc_ino = pdp->i_number;	/* parents inum */
737 			ncp->nc_dev = pdp->i_dev;	/* & device */
738 			ncp->nc_idev = dp->i_dev;	/* our device */
739 			ncp->nc_id = dp->i_id;		/* identifier */
740 			ncp->nc_nlen = ndp->ni_dent.d_namlen;
741 			bcopy(ndp->ni_dent.d_name, ncp->nc_name,
742 			    (unsigned)ncp->nc_nlen);
743 			/* link at end of lru chain */
744 			ncp->nc_nxt = NULL;
745 			ncp->nc_prev = nchtail;
746 			*nchtail = ncp;
747 			nchtail = &ncp->nc_nxt;
748 			/* and insert on hash chain */
749 			insque(ncp, nhp);
750 		}
751 	}
752 
753 haveino:
754 	fs = dp->i_fs;
755 
756 	/*
757 	 * Check for symbolic link
758 	 */
759 	if ((dp->i_mode & IFMT) == IFLNK &&
760 	    ((ndp->ni_nameiop & FOLLOW) || *cp == '/')) {
761 		u_int pathlen = strlen(cp) + 1;
762 
763 		if (dp->i_size + pathlen >= MAXPATHLEN - 1) {
764 			u.u_error = ENAMETOOLONG;
765 			goto bad2;
766 		}
767 		if (++nlink > MAXSYMLINKS) {
768 			u.u_error = ELOOP;
769 			goto bad2;
770 		}
771 		ovbcopy(cp, nbp + dp->i_size, pathlen);
772 		u.u_error =
773 		    rdwri(UIO_READ, dp, nbp, (int)dp->i_size,
774 			(off_t)0, 1, (int *)0);
775 		if (u.u_error)
776 			goto bad2;
777 		cp = nbp;
778 		iput(dp);
779 		if (*cp == '/') {
780 			irele(pdp);
781 			while (*cp == '/')
782 				cp++;
783 			if ((dp = u.u_rdir) == NULL)
784 				dp = rootdir;
785 			ILOCK(dp);
786 			dp->i_count++;
787 		} else {
788 			dp = pdp;
789 			ILOCK(dp);
790 		}
791 		fs = dp->i_fs;
792 		goto dirloop;
793 	}
794 
795 	/*
796 	 * Not a symbolic link.  If more pathname,
797 	 * continue at next component, else return.
798 	 */
799 	if (*cp == '/') {
800 		while (*cp == '/')
801 			cp++;
802 		irele(pdp);
803 		goto dirloop;
804 	}
805 	FREE(nbp, M_NAMEI);
806 	if (lockparent)
807 		ndp->ni_pdir = pdp;
808 	else
809 		irele(pdp);
810 	return (dp);
811 bad2:
812 	irele(pdp);
813 bad:
814 	if (bp)
815 		brelse(bp);
816 	if (dp)
817 		iput(dp);
818 	FREE(nbp, M_NAMEI);
819 	return (NULL);
820 }
821 
822 
823 dirbad(ip, offset, how)
824 	struct inode *ip;
825 	off_t offset;
826 	char *how;
827 {
828 
829 	printf("%s: bad dir ino %d at offset %d: %s\n",
830 	    ip->i_fs->fs_fsmnt, ip->i_number, offset, how);
831 }
832 
833 /*
834  * Do consistency checking on a directory entry:
835  *	record length must be multiple of 4
836  *	entry must fit in rest of its DIRBLKSIZ block
837  *	record must be large enough to contain entry
838  *	name is not longer than MAXNAMLEN
839  *	name must be as long as advertised, and null terminated
840  */
841 dirbadentry(ep, entryoffsetinblock)
842 	register struct direct *ep;
843 	int entryoffsetinblock;
844 {
845 	register int i;
846 
847 	if ((ep->d_reclen & 0x3) != 0 ||
848 	    ep->d_reclen > DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1)) ||
849 	    ep->d_reclen < DIRSIZ(ep) || ep->d_namlen > MAXNAMLEN)
850 		return (1);
851 	for (i = 0; i < ep->d_namlen; i++)
852 		if (ep->d_name[i] == '\0')
853 			return (1);
854 	return (ep->d_name[i]);
855 }
856 
857 /*
858  * Write a directory entry after a call to namei, using the parameters
859  * which it left in the u. area.  The argument ip is the inode which
860  * the new directory entry will refer to.  The u. area field ndp->ni_pdir is
861  * a pointer to the directory to be written, which was left locked by
862  * namei.  Remaining parameters (ndp->ni_offset, ndp->ni_count) indicate
863  * how the space for the new entry is to be gotten.
864  */
865 direnter(ip, ndp)
866 	struct inode *ip;
867 	register struct nameidata *ndp;
868 {
869 	register struct direct *ep, *nep;
870 	register struct inode *dp = ndp->ni_pdir;
871 	struct buf *bp;
872 	int loc, spacefree, error = 0;
873 	u_int dsize;
874 	int newentrysize;
875 	char *dirbuf;
876 
877 	ndp->ni_dent.d_ino = ip->i_number;
878 	newentrysize = DIRSIZ(&ndp->ni_dent);
879 	if (ndp->ni_count == 0) {
880 		/*
881 		 * If ndp->ni_count is 0, then namei could find no space in the
882 		 * directory. In this case ndp->ni_offset will be on a directory
883 		 * block boundary and we will write the new entry into a fresh
884 		 * block.
885 		 */
886 		if (ndp->ni_offset&(DIRBLKSIZ-1))
887 			panic("wdir: newblk");
888 		ndp->ni_dent.d_reclen = DIRBLKSIZ;
889 		error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
890 		    newentrysize, ndp->ni_offset, 1, (int *)0);
891 		if (DIRBLKSIZ > dp->i_fs->fs_fsize)
892 			panic("wdir: blksize"); /* XXX - should grow w/bmap() */
893 		else
894 			dp->i_size = roundup(dp->i_size, DIRBLKSIZ);
895 		iput(dp);
896 		return (error);
897 	}
898 
899 	/*
900 	 * If ndp->ni_count is non-zero, then namei found space for the new
901 	 * entry in the range ndp->ni_offset to ndp->ni_offset + ndp->ni_count.
902 	 * in the directory.  To use this space, we may have to compact
903 	 * the entries located there, by copying them together towards
904 	 * the beginning of the block, leaving the free space in
905 	 * one usable chunk at the end.
906 	 */
907 
908 	/*
909 	 * Increase size of directory if entry eats into new space.
910 	 * This should never push the size past a new multiple of
911 	 * DIRBLKSIZE.
912 	 *
913 	 * N.B. - THIS IS AN ARTIFACT OF 4.2 AND SHOULD NEVER HAPPEN.
914 	 */
915 	if (ndp->ni_offset + ndp->ni_count > dp->i_size)
916 		dp->i_size = ndp->ni_offset + ndp->ni_count;
917 	/*
918 	 * Get the block containing the space for the new directory
919 	 * entry.  Should return error by result instead of u.u_error.
920 	 */
921 	bp = blkatoff(dp, ndp->ni_offset, (char **)&dirbuf);
922 	if (bp == 0) {
923 		iput(dp);
924 		return (u.u_error);
925 	}
926 	/*
927 	 * Find space for the new entry.  In the simple case, the
928 	 * entry at offset base will have the space.  If it does
929 	 * not, then namei arranged that compacting the region
930 	 * ndp->ni_offset to ndp->ni_offset+ndp->ni_count would yield the space.
931 	 */
932 	ep = (struct direct *)dirbuf;
933 	dsize = DIRSIZ(ep);
934 	spacefree = ep->d_reclen - dsize;
935 	for (loc = ep->d_reclen; loc < ndp->ni_count; ) {
936 		nep = (struct direct *)(dirbuf + loc);
937 		if (ep->d_ino) {
938 			/* trim the existing slot */
939 			ep->d_reclen = dsize;
940 			ep = (struct direct *)((char *)ep + dsize);
941 		} else {
942 			/* overwrite; nothing there; header is ours */
943 			spacefree += dsize;
944 		}
945 		dsize = DIRSIZ(nep);
946 		spacefree += nep->d_reclen - dsize;
947 		loc += nep->d_reclen;
948 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
949 	}
950 	/*
951 	 * Update the pointer fields in the previous entry (if any),
952 	 * copy in the new entry, and write out the block.
953 	 */
954 	if (ep->d_ino == 0) {
955 		if (spacefree + dsize < newentrysize)
956 			panic("wdir: compact1");
957 		ndp->ni_dent.d_reclen = spacefree + dsize;
958 	} else {
959 		if (spacefree < newentrysize)
960 			panic("wdir: compact2");
961 		ndp->ni_dent.d_reclen = spacefree;
962 		ep->d_reclen = dsize;
963 		ep = (struct direct *)((char *)ep + dsize);
964 	}
965 	bcopy((caddr_t)&ndp->ni_dent, (caddr_t)ep, (u_int)newentrysize);
966 	bwrite(bp);
967 	dp->i_flag |= IUPD|ICHG;
968 	if (ndp->ni_endoff && ndp->ni_endoff < dp->i_size)
969 		itrunc(dp, (u_long)ndp->ni_endoff);
970 	iput(dp);
971 	return (error);
972 }
973 
974 /*
975  * Remove a directory entry after a call to namei, using the
976  * parameters which it left in the u. area.  The u. entry
977  * ni_offset contains the offset into the directory of the
978  * entry to be eliminated.  The ni_count field contains the
979  * size of the previous record in the directory.  If this
980  * is 0, the first entry is being deleted, so we need only
981  * zero the inode number to mark the entry as free.  If the
982  * entry isn't the first in the directory, we must reclaim
983  * the space of the now empty record by adding the record size
984  * to the size of the previous entry.
985  */
986 dirremove(ndp)
987 	register struct nameidata *ndp;
988 {
989 	register struct inode *dp = ndp->ni_pdir;
990 	register struct buf *bp;
991 	struct direct *ep;
992 
993 	if (ndp->ni_count == 0) {
994 		/*
995 		 * First entry in block: set d_ino to zero.
996 		 */
997 		ndp->ni_dent.d_ino = 0;
998 		(void) rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
999 		    (int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1000 	} else {
1001 		/*
1002 		 * Collapse new free space into previous entry.
1003 		 */
1004 		bp = blkatoff(dp, ndp->ni_offset - ndp->ni_count, (char **)&ep);
1005 		if (bp == 0)
1006 			return (0);
1007 		ep->d_reclen += ndp->ni_dent.d_reclen;
1008 		bwrite(bp);
1009 		dp->i_flag |= IUPD|ICHG;
1010 	}
1011 	return (1);
1012 }
1013 
1014 /*
1015  * Rewrite an existing directory entry to point at the inode
1016  * supplied.  The parameters describing the directory entry are
1017  * set up by a call to namei.
1018  */
1019 dirrewrite(dp, ip, ndp)
1020 	struct inode *dp, *ip;
1021 	struct nameidata *ndp;
1022 {
1023 
1024 	ndp->ni_dent.d_ino = ip->i_number;
1025 	u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&ndp->ni_dent,
1026 		(int)DIRSIZ(&ndp->ni_dent), ndp->ni_offset, 1, (int *)0);
1027 	iput(dp);
1028 }
1029 
1030 /*
1031  * Return buffer with contents of block "offset"
1032  * from the beginning of directory "ip".  If "res"
1033  * is non-zero, fill it in with a pointer to the
1034  * remaining space in the directory.
1035  */
1036 struct buf *
1037 blkatoff(ip, offset, res)
1038 	struct inode *ip;
1039 	off_t offset;
1040 	char **res;
1041 {
1042 	register struct fs *fs = ip->i_fs;
1043 	daddr_t lbn = lblkno(fs, offset);
1044 	int bsize = blksize(fs, ip, lbn);
1045 	register struct buf *bp;
1046 	daddr_t bn;
1047 
1048 	bn = bmap(ip, lbn, B_READ, bsize);
1049 	if (u.u_error)
1050 		return (0);
1051 	if (bn == (daddr_t)-1) {
1052 		dirbad(ip, offset, "hole in dir");
1053 		return (0);
1054 	}
1055 	bp = bread(ip->i_dev, fsbtodb(fs, bn), bsize);
1056 	if (bp->b_flags & B_ERROR) {
1057 		brelse(bp);
1058 		return (0);
1059 	}
1060 	if (res)
1061 		*res = bp->b_un.b_addr + blkoff(fs, offset);
1062 	return (bp);
1063 }
1064 
1065 /*
1066  * Check if a directory is empty or not.
1067  * Inode supplied must be locked.
1068  *
1069  * Using a struct dirtemplate here is not precisely
1070  * what we want, but better than using a struct direct.
1071  *
1072  * NB: does not handle corrupted directories.
1073  */
1074 dirempty(ip, parentino)
1075 	register struct inode *ip;
1076 	ino_t parentino;
1077 {
1078 	register off_t off;
1079 	struct dirtemplate dbuf;
1080 	register struct direct *dp = (struct direct *)&dbuf;
1081 	int error, count;
1082 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1083 
1084 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1085 		error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
1086 		    off, 1, &count);
1087 		/*
1088 		 * Since we read MINDIRSIZ, residual must
1089 		 * be 0 unless we're at end of file.
1090 		 */
1091 		if (error || count != 0)
1092 			return (0);
1093 		/* avoid infinite loops */
1094 		if (dp->d_reclen == 0)
1095 			return (0);
1096 		/* skip empty entries */
1097 		if (dp->d_ino == 0)
1098 			continue;
1099 		/* accept only "." and ".." */
1100 		if (dp->d_namlen > 2)
1101 			return (0);
1102 		if (dp->d_name[0] != '.')
1103 			return (0);
1104 		/*
1105 		 * At this point d_namlen must be 1 or 2.
1106 		 * 1 implies ".", 2 implies ".." if second
1107 		 * char is also "."
1108 		 */
1109 		if (dp->d_namlen == 1)
1110 			continue;
1111 		if (dp->d_name[1] == '.' && dp->d_ino == parentino)
1112 			continue;
1113 		return (0);
1114 	}
1115 	return (1);
1116 }
1117 
1118 /*
1119  * Check if source directory is in the path of the target directory.
1120  * Target is supplied locked, source is unlocked.
1121  * The target is always iput() before returning.
1122  */
1123 checkpath(source, target)
1124 	struct inode *source, *target;
1125 {
1126 	struct dirtemplate dirbuf;
1127 	register struct inode *ip;
1128 	int error = 0;
1129 
1130 	ip = target;
1131 	if (ip->i_number == source->i_number) {
1132 		error = EEXIST;
1133 		goto out;
1134 	}
1135 	if (ip->i_number == ROOTINO)
1136 		goto out;
1137 
1138 	for (;;) {
1139 		if ((ip->i_mode&IFMT) != IFDIR) {
1140 			error = ENOTDIR;
1141 			break;
1142 		}
1143 		error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
1144 			sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
1145 		if (error != 0)
1146 			break;
1147 		if (dirbuf.dotdot_namlen != 2 ||
1148 		    dirbuf.dotdot_name[0] != '.' ||
1149 		    dirbuf.dotdot_name[1] != '.') {
1150 			error = ENOTDIR;
1151 			break;
1152 		}
1153 		if (dirbuf.dotdot_ino == source->i_number) {
1154 			error = EINVAL;
1155 			break;
1156 		}
1157 		if (dirbuf.dotdot_ino == ROOTINO)
1158 			break;
1159 		iput(ip);
1160 		ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
1161 		if (ip == NULL) {
1162 			error = u.u_error;
1163 			break;
1164 		}
1165 	}
1166 
1167 out:
1168 	if (error == ENOTDIR)
1169 		printf("checkpath: .. not a directory\n");
1170 	if (ip != NULL)
1171 		iput(ip);
1172 	return (error);
1173 }
1174 
1175 /*
1176  * Name cache initialization, from main() when we are booting
1177  */
1178 nchinit()
1179 {
1180 	register union nchash *nchp;
1181 	register struct namecache *ncp;
1182 
1183 	nchhead = 0;
1184 	nchtail = &nchhead;
1185 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++) {
1186 		ncp->nc_forw = ncp;			/* hash chain */
1187 		ncp->nc_back = ncp;
1188 		ncp->nc_nxt = NULL;			/* lru chain */
1189 		*nchtail = ncp;
1190 		ncp->nc_prev = nchtail;
1191 		nchtail = &ncp->nc_nxt;
1192 		/* all else is zero already */
1193 	}
1194 	for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
1195 		nchp->nch_head[0] = nchp;
1196 		nchp->nch_head[1] = nchp;
1197 	}
1198 }
1199 
1200 /*
1201  * Cache flush, called when filesys is umounted to
1202  * remove entries that would now be invalid
1203  *
1204  * The line "nxtcp = nchhead" near the end is to avoid potential problems
1205  * if the cache lru chain is modified while we are dumping the
1206  * inode.  This makes the algorithm O(n^2), but do you think I care?
1207  */
1208 nchinval(dev)
1209 	register dev_t dev;
1210 {
1211 	register struct namecache *ncp, *nxtcp;
1212 
1213 	for (ncp = nchhead; ncp; ncp = nxtcp) {
1214 		nxtcp = ncp->nc_nxt;
1215 		if (ncp->nc_ip == NULL ||
1216 		    (ncp->nc_idev != dev && ncp->nc_dev != dev))
1217 			continue;
1218 		/* free the resources we had */
1219 		ncp->nc_idev = NODEV;
1220 		ncp->nc_dev = NODEV;
1221 		ncp->nc_id = NULL;
1222 		ncp->nc_ino = 0;
1223 		ncp->nc_ip = NULL;
1224 		remque(ncp);		/* remove entry from its hash chain */
1225 		ncp->nc_forw = ncp;	/* and make a dummy one */
1226 		ncp->nc_back = ncp;
1227 		/* delete this entry from LRU chain */
1228 		*ncp->nc_prev = nxtcp;
1229 		if (nxtcp)
1230 			nxtcp->nc_prev = ncp->nc_prev;
1231 		else
1232 			nchtail = ncp->nc_prev;
1233 		/* cause rescan of list, it may have altered */
1234 		nxtcp = nchhead;
1235 		/* put the now-free entry at head of LRU */
1236 		ncp->nc_nxt = nxtcp;
1237 		ncp->nc_prev = &nchhead;
1238 		nxtcp->nc_prev = &ncp->nc_nxt;
1239 		nchhead = ncp;
1240 	}
1241 }
1242 
1243 /*
1244  * Name cache invalidation of all entries.
1245  */
1246 cacheinvalall()
1247 {
1248 	register struct namecache *ncp;
1249 
1250 	for (ncp = namecache; ncp < &namecache[nchsize]; ncp++)
1251 		ncp->nc_id = 0;
1252 }
1253