xref: /original-bsd/sys/ufs/ufs/ufs_lookup.c (revision 81a135f6)
1 /*	ufs_lookup.c	6.6	84/02/15	*/
2 
3 #include "../h/param.h"
4 #include "../h/systm.h"
5 #include "../h/inode.h"
6 #include "../h/fs.h"
7 #include "../h/mount.h"
8 #include "../h/dir.h"
9 #include "../h/user.h"
10 #include "../h/buf.h"
11 #include "../h/conf.h"
12 #include "../h/uio.h"
13 #include "../h/nami.h"
14 #include "../h/kernel.h"
15 
16 struct	buf *blkatoff();
17 int	dirchk = 0;
18 
19 /*
20  * Structures associated with name cacheing.
21  */
22 #define	NCHHASH		32	/* size of hash table */
23 
24 #if	((NCHHASH)&((NCHHASH)-1)) != 0
25 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) % (NCHHASH))
26 #else
27 #define	NHASH(h, i, d)	((unsigned)((h) + (i) + 13 * (int)(d)) & ((NCHHASH)-1))
28 #endif
29 
30 union	nchash	{
31 	union	nchash	*nch_head[2];
32 	struct	nch	*nch_chain[2];
33 } nchash[NCHHASH];
34 #define	nch_forw	nch_chain[0]
35 #define	nch_back	nch_chain[1]
36 
37 struct	nch	*nchhead, **nchtail;	/* LRU chain pointers */
38 struct	nchstats nchstats;		/* cache effectiveness statistics */
39 
40 /*
41  * Convert a pathname into a pointer to a locked inode,
42  * with side effects usable in creating and removing files.
43  * This is a very central and rather complicated routine.
44  *
45  * The func argument gives the routine which returns successive
46  * characters of the name to be translated.
47  *
48  * The flag argument is (LOOKUP, CREATE, DELETE) depending on whether
49  * the name is to be (looked up, created, deleted).  If flag has
50  * LOCKPARENT or'ed into it and the target of the pathname exists,
51  * namei returns both the target and its parent directory locked.
52  * If the file system is not maintained in a strict tree hierarchy,
53  * this can result in a deadlock situation.  When creating and
54  * LOCKPARENT is specified, the target may not be ".".  When deleting
55  * and LOCKPARENT is specified, the target may be ".", but the caller
56  * must check to insure it does an irele and iput instead of two iputs.
57  *
58  * The follow argument is 1 when symbolic links are to be followed
59  * when they occur at the end of the name translation process.
60  *
61  * Name caching works as follows:
62  *
63  *	names found by directory scans are retained in a cache
64  *	for future reference.  It is managed LRU, so frequently
65  *	used names will hang around.  Cache is indexed by hash value
66  *	obtained from (ino,dev,name) where ino & dev refer to the
67  *	directory containing name.
68  *
69  *	For simplicity (and economy of storage), names longer than
70  *	some (small) maximum length are not cached, they occur
71  *	infrequently in any case, and are almost never of interest.
72  *
73  *	Upon reaching the last segment of a path, if the reference
74  *	is for DELETE, or NOCACHE is set (rewrite), and the
75  *	name is located in the cache, it will be dropped.
76  *
77  *	We must be sure never to enter the name ".." into the cache
78  *	because of the extremely kludgey way that rename() alters
79  *	".." in a situation like
80  *		mv a/x b/x
81  *	where x is a directory, and x/.. is the ".." in question.
82  *
83  * Overall outline of namei:
84  *
85  *	copy in name
86  *	get starting directory
87  * dirloop:
88  *	check accessibility of directory
89  * dirloop2:
90  *	copy next component of name to u.u_dent
91  *	handle degenerate case where name is null string
92  *	look for name in cache, if found, then if at end of path
93  *	  and deleting or creating, drop it, else to haveino
94  *	search for name in directory, to found or notfound
95  * notfound:
96  *	if creating, return locked directory, leaving info on avail. slots
97  *	else return error
98  * found:
99  *	if at end of path and deleting, return information to allow delete
100  *	if at end of path and rewriting (create and LOCKPARENT), lock target
101  *	  inode and return info to allow rewrite
102  *	if .. and on mounted filesys, look in mount table for parent
103  *	if not at end, if neither creating nor deleting, add name to cache
104  * haveino:
105  *	if symbolic link, massage name in buffer and continue at dirloop
106  *	if more components of name, do next level at dirloop
107  *	return the answer as locked inode
108  *
109  * NOTE: (LOOKUP | LOCKPARENT) currently returns the parent inode,
110  *	 but unlocked.
111  */
112 struct inode *
113 namei(func, flag, follow)
114 	int (*func)(), flag, follow;
115 {
116 	register char *cp;		/* pointer into pathname argument */
117 /* these variables refer to things which must be freed or unlocked */
118 	register struct inode *dp = 0;	/* the directory we are searching */
119 	register struct nch *ncp;	/* cache slot for entry */
120 	register struct fs *fs;		/* file system that directory is in */
121 	register struct buf *bp = 0;	/* a buffer of directory entries */
122 	register struct direct *ep;	/* the current directory entry */
123 	int entryoffsetinblock;		/* offset of ep in bp's buffer */
124 	register struct buf *nbp;	/* buffer storing path name argument */
125 /* these variables hold information about the search for a slot */
126 	enum {NONE, COMPACT, FOUND} slotstatus;
127 	int slotoffset = -1;		/* offset of area with free space */
128 	int slotsize;			/* size of area at slotoffset */
129 	int slotfreespace;		/* amount of space free in slot */
130 	int slotneeded;			/* size of the entry we're seeking */
131 /* */
132 	int numdirpasses;		/* strategy for directory search */
133 	int endsearch;			/* offset to end directory search */
134 	int prevoff;			/* u.u_offset of previous entry */
135 	int nlink = 0;			/* number of symbolic links taken */
136 	struct inode *pdp;		/* saved dp during symlink work */
137 	int i;
138 	int lockparent;
139 	int docache;
140 	unsigned hash;			/* value of name hash for entry */
141 	union nchash *nhp;		/* cache chain head for entry */
142 	int isdotdot;			/* != 0 if current name is ".." */
143 
144 	lockparent = flag & LOCKPARENT;
145 	docache = (flag & NOCACHE) ^ NOCACHE;
146 	flag &= ~(LOCKPARENT|NOCACHE);
147 	if (flag == DELETE)
148 		docache = 0;
149 	/*
150 	 * Get a buffer for the name to be translated, and copy the
151 	 * name into the buffer.
152 	 */
153 	nbp = geteblk(MAXPATHLEN);
154 	for (cp = nbp->b_un.b_addr; *cp = (*func)(); ) {
155 		if ((*cp&0377) == ('/'|0200) || (*cp&0200) && flag != LOOKUP) {
156 			u.u_error = EPERM;
157 			goto bad;
158 		}
159 		cp++;
160 		if (cp >= nbp->b_un.b_addr + MAXPATHLEN) {
161 			u.u_error = ENOENT;
162 			goto bad;
163 		}
164 	}
165 	if (u.u_error)
166 		goto bad;
167 
168 	/*
169 	 * Get starting directory.
170 	 */
171 	cp = nbp->b_un.b_addr;
172 	if (*cp == '/') {
173 		while (*cp == '/')
174 			cp++;
175 		if ((dp = u.u_rdir) == NULL)
176 			dp = rootdir;
177 	} else
178 		dp = u.u_cdir;
179 	fs = dp->i_fs;
180 	ilock(dp);
181 	dp->i_count++;
182 	u.u_pdir = (struct inode *)0xc0000000;		/* illegal */
183 
184 	/*
185 	 * We come to dirloop to search a new directory.
186 	 * The directory must be locked so that it can be
187 	 * iput, and fs must be already set to dp->i_fs.
188 	 */
189 dirloop:
190 	/*
191 	 * Check accessiblity of directory.
192 	 */
193 	if ((dp->i_mode&IFMT) != IFDIR) {
194 		u.u_error = ENOTDIR;
195 		goto bad;
196 	}
197 	if (access(dp, IEXEC))
198 		goto bad;
199 
200 dirloop2:
201 	/*
202 	 * Copy next component of name to u.u_dent.
203 	 */
204 	hash = 0;
205 	for (i = 0; *cp != 0 && *cp != '/'; cp++) {
206 		if (i >= MAXNAMLEN) {
207 			u.u_error = ENOENT;
208 			goto bad;
209 		}
210 		u.u_dent.d_name[i++] = *cp;
211 		hash += (unsigned char)*cp * i;
212 	}
213 	u.u_dent.d_namlen = i;
214 	u.u_dent.d_name[i] = 0;
215 
216 	/*
217 	 * Check for degenerate name (e.g. / or "")
218 	 * which is a way of talking about a directory,
219 	 * e.g. like "/." or ".".
220 	 */
221 	if (u.u_dent.d_name[0] == 0) {
222 		if (flag != LOOKUP || lockparent) {
223 			u.u_error = EISDIR;
224 			goto bad;
225 		}
226 		brelse(nbp);
227 		return (dp);
228 	}
229 
230 	/*
231 	 * We now have a segment name to search for, and a directory to search.
232 	 *
233 	 * Before tediously performing a linear scan of the directory,
234 	 * check the name cache to see if the directory/name pair
235 	 * we are looking for is known already.  We don't do this
236 	 * if the segment name is long, simply so the cache can avoid
237 	 * holding long names (which would either waste space, or
238 	 * add greatly to the complexity).
239 	 */
240 	if (u.u_dent.d_namlen > NCHNAMLEN) {
241 		nchstats.ncs_long++;
242 		docache = 0;
243 	} else {
244 		nhp = &nchash[NHASH(hash, dp->i_number, dp->i_dev)];
245 		for (ncp = nhp->nch_forw; ncp != (struct nch *)nhp;
246 		    ncp = ncp->nc_forw) {
247 			if (ncp->nc_ino == dp->i_number &&
248 			    ncp->nc_dev == dp->i_dev &&
249 			    ncp->nc_nlen == u.u_dent.d_namlen &&
250 			    !bcmp(ncp->nc_name, u.u_dent.d_name, ncp->nc_nlen))
251 				break;
252 		}
253 
254 		if (ncp == (struct nch *)nhp) {
255 			nchstats.ncs_miss++;
256 			ncp = NULL;
257 		} else {
258 			if (*cp == '/' || docache) {
259 
260 				nchstats.ncs_goodhits++;
261 
262 					/*
263 					 * move this slot to end of LRU
264 					 * chain, if not already there
265 					 */
266 				if (ncp->nc_nxt) {
267 						/* remove from LRU chain */
268 					*ncp->nc_prev = ncp->nc_nxt;
269 					ncp->nc_nxt->nc_prev = ncp->nc_prev;
270 
271 						/* and replace at end of it */
272 					ncp->nc_nxt = NULL;
273 					ncp->nc_prev = nchtail;
274 					*nchtail = ncp;
275 					nchtail = &ncp->nc_nxt;
276 				}
277 
278 				pdp = dp;
279 				dp = ncp->nc_ip;
280 				if (dp == NULL)
281 					panic("nami: null cache ino");
282 				if (pdp != dp) {
283 					ilock(dp);
284 					dp->i_count++;
285 					iunlock(pdp);
286 				} else
287 					dp->i_count++;
288 
289 				u.u_dent.d_ino = dp->i_number;
290 				/* u_dent.d_reclen is garbage ... */
291 
292 				goto haveino;
293 			}
294 
295 			/*
296 			 * last segment and we are renaming or deleting
297 			 * or otherwise don't want cache entry to exist
298 			 */
299 
300 			nchstats.ncs_badhits++;
301 
302 				/* remove from LRU chain */
303 			*ncp->nc_prev = ncp->nc_nxt;
304 			if (ncp->nc_nxt)
305 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
306 			else
307 				nchtail = ncp->nc_prev;
308 
309 				/* remove from hash chain */
310 			remque(ncp);
311 
312 				/* release ref on the inode */
313 			irele(ncp->nc_ip);
314 			ncp->nc_ip = NULL;
315 
316 				/* insert at head of LRU list (first to grab) */
317 			ncp->nc_nxt = nchhead;
318 			ncp->nc_prev = &nchhead;
319 			nchhead->nc_prev = &ncp->nc_nxt;
320 			nchhead = ncp;
321 
322 				/* and make a dummy hash chain */
323 			ncp->nc_forw = ncp;
324 			ncp->nc_back = ncp;
325 
326 			ncp = NULL;
327 		}
328 	}
329 
330 	/*
331 	 * Suppress search for slots unless creating
332 	 * file and at end of pathname, in which case
333 	 * we watch for a place to put the new file in
334 	 * case it doesn't already exist.
335 	 */
336 	slotstatus = FOUND;
337 	if (flag == CREATE && *cp == 0) {
338 		slotstatus = NONE;
339 		slotfreespace = 0;
340 		slotneeded = DIRSIZ(&u.u_dent);
341 	}
342 	/*
343 	 * If this is the same directory that this process
344 	 * previously searched, pick up where we last left off.
345 	 * We cache only lookups as these are the most common
346 	 * and have the greatest payoff. Caching CREATE has little
347 	 * benefit as it usually must search the entire directory
348 	 * to determine that the entry does not exist. Caching the
349 	 * location of the last DELETE has not reduced profiling time
350 	 * and hence has been removed in the interest of simplicity.
351 	 */
352 	if (flag != LOOKUP || dp->i_number != u.u_ncache.nc_inumber ||
353 	    dp->i_dev != u.u_ncache.nc_dev) {
354 		u.u_offset = 0;
355 		numdirpasses = 1;
356 	} else {
357 		if ((dp->i_flag & ICHG) || dp->i_ctime >= u.u_ncache.nc_time) {
358 			u.u_ncache.nc_prevoffset &= ~(DIRBLKSIZ - 1);
359 			u.u_ncache.nc_time = time.tv_sec;
360 		}
361 		u.u_offset = u.u_ncache.nc_prevoffset;
362 		entryoffsetinblock = blkoff(fs, u.u_offset);
363 		if (entryoffsetinblock != 0) {
364 			bp = blkatoff(dp, u.u_offset, (char **)0);
365 			if (bp == 0)
366 				goto bad;
367 		}
368 		numdirpasses = 2;
369 		nchstats.ncs_2passes++;
370 	}
371 	endsearch = roundup(dp->i_size, DIRBLKSIZ);
372 
373 searchloop:
374 	while (u.u_offset < endsearch) {
375 		/*
376 		 * If offset is on a block boundary,
377 		 * read the next directory block.
378 		 * Release previous if it exists.
379 		 */
380 		if (blkoff(fs, u.u_offset) == 0) {
381 			if (bp != NULL)
382 				brelse(bp);
383 			bp = blkatoff(dp, u.u_offset, (char **)0);
384 			if (bp == 0)
385 				goto bad;
386 			entryoffsetinblock = 0;
387 		}
388 
389 		/*
390 		 * If still looking for a slot, and at a DIRBLKSIZE
391 		 * boundary, have to start looking for free space
392 		 * again.
393 		 */
394 		if (slotstatus == NONE &&
395 		    (entryoffsetinblock&(DIRBLKSIZ-1)) == 0) {
396 			slotoffset = -1;
397 			slotfreespace = 0;
398 		}
399 
400 		/*
401 		 * Get pointer to next entry, and do consistency checking:
402 		 *	record length must be multiple of 4
403 		 *	record length must not be zero
404 		 *	entry must fit in rest of this DIRBLKSIZ block
405 		 *	record must be large enough to contain name
406 		 * When dirchk is set we also check:
407 		 *	name is not longer than MAXNAMLEN
408 		 *	name must be as long as advertised, and null terminated
409 		 * Checking last two conditions is done only when dirchk is
410 		 * set, to save time.
411 		 */
412 		ep = (struct direct *)(bp->b_un.b_addr + entryoffsetinblock);
413 		i = DIRBLKSIZ - (entryoffsetinblock & (DIRBLKSIZ - 1));
414 		if ((ep->d_reclen & 0x3) || ep->d_reclen == 0 ||
415 		    ep->d_reclen > i || DIRSIZ(ep) > ep->d_reclen ||
416 		    dirchk && (ep->d_namlen > MAXNAMLEN || dirbadname(ep))) {
417 			dirbad(dp, "mangled entry");
418 			u.u_offset += i;
419 			entryoffsetinblock += i;
420 			continue;
421 		}
422 
423 		/*
424 		 * If an appropriate sized slot has not yet been found,
425 		 * check to see if one is available. Also accumulate space
426 		 * in the current block so that we can determine if
427 		 * compaction is viable.
428 		 */
429 		if (slotstatus != FOUND) {
430 			int size = ep->d_reclen;
431 
432 			if (ep->d_ino != 0)
433 				size -= DIRSIZ(ep);
434 			if (size > 0) {
435 				if (size >= slotneeded) {
436 					slotstatus = FOUND;
437 					slotoffset = u.u_offset;
438 					slotsize = ep->d_reclen;
439 				} else if (slotstatus == NONE) {
440 					slotfreespace += size;
441 					if (slotoffset == -1)
442 						slotoffset = u.u_offset;
443 					if (slotfreespace >= slotneeded) {
444 						slotstatus = COMPACT;
445 						slotsize =
446 						    u.u_offset+ep->d_reclen -
447 						      slotoffset;
448 					}
449 				}
450 			}
451 		}
452 
453 		/*
454 		 * Check for a name match.
455 		 */
456 		if (ep->d_ino) {
457 			if (ep->d_namlen == u.u_dent.d_namlen &&
458 			    !bcmp(u.u_dent.d_name, ep->d_name, ep->d_namlen))
459 				goto found;
460 		}
461 		prevoff = u.u_offset;
462 		u.u_offset += ep->d_reclen;
463 		entryoffsetinblock += ep->d_reclen;
464 	}
465 /* notfound: */
466 	/*
467 	 * If we started in the middle of the directory and failed
468 	 * to find our target, we must check the beginning as well.
469 	 */
470 	if (numdirpasses == 2) {
471 		numdirpasses--;
472 		u.u_offset = 0;
473 		endsearch = u.u_ncache.nc_prevoffset;
474 		goto searchloop;
475 	}
476 	/*
477 	 * If creating, and at end of pathname and current
478 	 * directory has not been removed, then can consider
479 	 * allowing file to be created.
480 	 */
481 	if (flag == CREATE && *cp == 0 && dp->i_nlink != 0) {
482 		/*
483 		 * Access for write is interpreted as allowing
484 		 * creation of files in the directory.
485 		 */
486 		if (access(dp, IWRITE))
487 			goto bad;
488 		/*
489 		 * Return an indication of where the new directory
490 		 * entry should be put.  If we didn't find a slot,
491 		 * then set u.u_count to 0 indicating that the
492 		 * new slot belongs at the end of the directory.
493 		 * If we found a slot, then the new entry can be
494 		 * put in the range [u.u_offset..u.u_offset+u.u_count)
495 		 */
496 		if (slotstatus == NONE) {
497 			u.u_offset = roundup(dp->i_size, DIRBLKSIZ);
498 			u.u_count = 0;
499 		} else {
500 			u.u_offset = slotoffset;
501 			u.u_count = slotsize;
502 		}
503 		dp->i_flag |= IUPD|ICHG;
504 		if (bp)
505 			brelse(bp);
506 		brelse(nbp);
507 		/*
508 		 * We return with the directory locked, so that
509 		 * the parameters we set up above will still be
510 		 * valid if we actually decide to do a direnter().
511 		 * We return NULL to indicate that the entry doesn't
512 		 * currently exist, leaving a pointer to the (locked)
513 		 * directory inode in u.u_pdir.
514 		 */
515 		u.u_pdir = dp;
516 		return (NULL);
517 	}
518 	u.u_error = ENOENT;
519 	goto bad;
520 found:
521 	if (numdirpasses == 2)
522 		nchstats.ncs_pass2++;
523 	/*
524 	 * Check that directory length properly reflects presence
525 	 * of this entry.
526 	 */
527 	if (entryoffsetinblock + DIRSIZ(ep) > dp->i_size) {
528 		dirbad(dp, "i_size too small");
529 		dp->i_size = entryoffsetinblock + DIRSIZ(ep);
530 		dp->i_flag |= IUPD|ICHG;
531 	}
532 
533 	/*
534 	 * Found component in pathname.
535 	 * If the final component of path name, save information
536 	 * in the cache as to where the entry was found.
537 	 */
538 	if (*cp == '\0' && flag == LOOKUP) {
539 		u.u_ncache.nc_prevoffset = u.u_offset;
540 		u.u_ncache.nc_inumber = dp->i_number;
541 		u.u_ncache.nc_dev = dp->i_dev;
542 		u.u_ncache.nc_time = time.tv_sec;
543 	}
544 	/*
545 	 * Save directory entry in u.u_dent,
546 	 * and release directory buffer.
547 	 */
548 	bcopy((caddr_t)ep, (caddr_t)&u.u_dent, (u_int)DIRSIZ(ep));
549 	brelse(bp);
550 	bp = NULL;
551 
552 	/*
553 	 * If deleting, and at end of pathname, return
554 	 * parameters which can be used to remove file.
555 	 * If the lockparent flag isn't set, we return only
556 	 * the directory (in u.u_pdir), otherwise we go
557 	 * on and lock the inode, being careful with ".".
558 	 */
559 	if (flag == DELETE && *cp == 0) {
560 		/*
561 		 * Write access to directory required to delete files.
562 		 */
563 		if (access(dp, IWRITE))
564 			goto bad;
565 		u.u_pdir = dp;		/* for dirremove() */
566 		/*
567 		 * Return pointer to current entry in u.u_offset,
568 		 * and distance past previous entry (if there
569 		 * is a previous entry in this block) in u.u_count.
570 		 * Save directory inode pointer in u.u_pdir for dirremove().
571 		 */
572 		if ((u.u_offset&(DIRBLKSIZ-1)) == 0)
573 			u.u_count = 0;
574 		else
575 			u.u_count = u.u_offset - prevoff;
576 		if (lockparent) {
577 			if (dp->i_number == u.u_dent.d_ino)
578 				dp->i_count++;
579 			else {
580 				dp = iget(dp->i_dev, fs, u.u_dent.d_ino);
581 				if (dp == NULL) {
582 					iput(u.u_pdir);
583 					goto bad;
584 				}
585 				/*
586 				 * If directory is "sticky", then user must own
587 				 * the directory, or the file in it, else he
588 				 * may not delete it (unless he's root). This
589 				 * implements append-only directories.
590 				 */
591 				if ((u.u_pdir->i_mode & ISVTX) &&
592 				    u.u_uid != 0 &&
593 				    u.u_uid != u.u_pdir->i_uid &&
594 				    dp->i_uid != u.u_uid) {
595 					iput(u.u_pdir);
596 					u.u_error = EPERM;
597 					goto bad;
598 				}
599 			}
600 		}
601 		brelse(nbp);
602 		return (dp);
603 	}
604 
605 	/*
606 	 * Special handling for ".." allowing chdir out of mounted
607 	 * file system: indirect .. in root inode to reevaluate
608 	 * in directory file system was mounted on.
609 	 */
610 	isdotdot = 0;
611 	if (bcmp(u.u_dent.d_name, "..", 3) == 0) {
612 		isdotdot++;
613 		if (dp == u.u_rdir)
614 			u.u_dent.d_ino = dp->i_number;
615 		else if (u.u_dent.d_ino == ROOTINO &&
616 		   dp->i_number == ROOTINO) {
617 			for (i = 1; i < NMOUNT; i++)
618 			if (mount[i].m_bufp != NULL &&
619 			   mount[i].m_dev == dp->i_dev) {
620 				iput(dp);
621 				dp = mount[i].m_inodp;
622 				ilock(dp);
623 				dp->i_count++;
624 				fs = dp->i_fs;
625 				cp -= 2;     /* back over .. */
626 				goto dirloop2;
627 			}
628 		}
629 	}
630 
631 	/*
632 	 * If rewriting (rename), return the inode and the
633 	 * information required to rewrite the present directory
634 	 * Must get inode of directory entry to verify it's a
635 	 * regular file, or empty directory.
636 	 */
637 	if ((flag == CREATE && lockparent) && *cp == 0) {
638 		if (access(dp, IWRITE))
639 			goto bad;
640 		u.u_pdir = dp;		/* for dirrewrite() */
641 		/*
642 		 * Careful about locking second inode.
643 		 * This can only occur if the target is ".".
644 		 */
645 		if (dp->i_number == u.u_dent.d_ino) {
646 			u.u_error = EISDIR;		/* XXX */
647 			goto bad;
648 		}
649 		dp = iget(dp->i_dev, fs, u.u_dent.d_ino);
650 		if (dp == NULL) {
651 			iput(u.u_pdir);
652 			goto bad;
653 		}
654 		brelse(nbp);
655 		return (dp);
656 	}
657 
658 	/*
659 	 * Check for symbolic link, which may require us to massage the
660 	 * name before we continue translation.  We do not `iput' the
661 	 * directory because we may need it again if the symbolic link
662 	 * is relative to the current directory.  Instead we save it
663 	 * unlocked as "pdp".  We must get the target inode before unlocking
664 	 * the directory to insure that the inode will not be removed
665 	 * before we get it.  We prevent deadlock by always fetching
666 	 * inodes from the root, moving down the directory tree. Thus
667 	 * when following backward pointers ".." we must unlock the
668 	 * parent directory before getting the requested directory.
669 	 * There is a potential race condition here if both the current
670 	 * and parent directories are removed before the `iget' for the
671 	 * inode associated with ".." returns.  We hope that this occurs
672 	 * infrequently since we cannot avoid this race condition without
673 	 * implementing a sophisticated deadlock detection algorithm.
674 	 * Note also that this simple deadlock detection scheme will not
675 	 * work if the file system has any hard links other than ".."
676 	 * that point backwards in the directory structure.
677 	 */
678 	pdp = dp;
679 	if (isdotdot) {
680 		iunlock(pdp);	/* race to get the inode */
681 		dp = iget(dp->i_dev, fs, u.u_dent.d_ino);
682 		if (dp == NULL)
683 			goto bad2;
684 	} else if (dp->i_number == u.u_dent.d_ino) {
685 		dp->i_count++;	/* we want ourself, ie "." */
686 	} else {
687 		dp = iget(dp->i_dev, fs, u.u_dent.d_ino);
688 		iunlock(pdp);
689 		if (dp == NULL)
690 			goto bad2;
691 	}
692 
693 	/*
694 	 * insert name into cache (if we want it, and it isn't "." or "..")
695 	 *
696 	 * all other cases where making a cache entry would be wrong
697 	 * have already departed from the code sequence somewhere above.
698 	 */
699 	if (bcmp(u.u_dent.d_name, ".", 2) != 0 && !isdotdot && docache) {
700 		if (ncp != NULL)
701 			panic("nami: duplicating cache");
702 
703 			/*
704 			 * free the cache slot at head of lru chain
705 			 */
706 		if (ncp = nchhead) {
707 				/* remove from lru chain */
708 			*ncp->nc_prev = ncp->nc_nxt;
709 			if (ncp->nc_nxt)
710 				ncp->nc_nxt->nc_prev = ncp->nc_prev;
711 			else
712 				nchtail = ncp->nc_prev;
713 
714 				/* remove from old hash chain */
715 			remque(ncp);
716 
717 				/* drop hold on inode (if we had one) */
718 			if (ncp->nc_ip)
719 				irele(ncp->nc_ip);
720 
721 				/* grab the inode we just found */
722 			ncp->nc_ip = dp;
723 			dp->i_count++;
724 
725 				/* fill in cache info */
726 			ncp->nc_ino = pdp->i_number;	/* parents inum */
727 			ncp->nc_dev = pdp->i_dev;	/* & device */
728 			ncp->nc_idev = dp->i_dev;	/* our device */
729 			ncp->nc_nlen = u.u_dent.d_namlen;
730 			bcopy(u.u_dent.d_name, ncp->nc_name, ncp->nc_nlen);
731 
732 				/* link at end of lru chain */
733 			ncp->nc_nxt = NULL;
734 			ncp->nc_prev = nchtail;
735 			*nchtail = ncp;
736 			nchtail = &ncp->nc_nxt;
737 
738 				/* and insert on hash chain */
739 			insque(ncp, nhp);
740 		}
741 	}
742 
743 haveino:
744 	fs = dp->i_fs;
745 
746 	/*
747 	 * Check for symbolic link
748 	 */
749 	if ((dp->i_mode & IFMT) == IFLNK && (follow || *cp == '/')) {
750 		u_int pathlen = strlen(cp) + 1;
751 
752 		if (dp->i_size + pathlen >= MAXPATHLEN - 1 ||
753 		    ++nlink > MAXSYMLINKS) {
754 			u.u_error = ELOOP;
755 			goto bad2;
756 		}
757 		ovbcopy(cp, nbp->b_un.b_addr + dp->i_size, pathlen);
758 		u.u_error =
759 		    rdwri(UIO_READ, dp, nbp->b_un.b_addr, (int)dp->i_size,
760 			0, 1, (int *)0);
761 		if (u.u_error)
762 			goto bad2;
763 		cp = nbp->b_un.b_addr;
764 		iput(dp);
765 		if (*cp == '/') {
766 			irele(pdp);
767 			while (*cp == '/')
768 				cp++;
769 			if ((dp = u.u_rdir) == NULL)
770 				dp = rootdir;
771 			ilock(dp);
772 			dp->i_count++;
773 		} else {
774 			dp = pdp;
775 			ilock(dp);
776 		}
777 		fs = dp->i_fs;
778 		goto dirloop;
779 	}
780 
781 	/*
782 	 * Not a symbolic link.  If more pathname,
783 	 * continue at next component, else return.
784 	 */
785 	if (*cp == '/') {
786 		while (*cp == '/')
787 			cp++;
788 		irele(pdp);
789 		goto dirloop;
790 	}
791 	brelse(nbp);
792 	if (lockparent)
793 		u.u_pdir = pdp;
794 	else
795 		irele(pdp);
796 	return (dp);
797 bad2:
798 	irele(pdp);
799 bad:
800 	if (bp)
801 		brelse(bp);
802 	if (dp)
803 		iput(dp);
804 	brelse(nbp);
805 	return (NULL);
806 }
807 
808 
809 dirbad(ip, how)
810 	struct inode *ip;
811 	char *how;
812 {
813 
814 	printf("%s: bad dir ino %d at offset %d: %s\n",
815 	    ip->i_fs->fs_fsmnt, ip->i_number, u.u_offset, how);
816 }
817 
818 dirbadname(ep)
819 	register struct direct *ep;
820 {
821 	register int i;
822 
823 	for (i = 0; i < ep->d_namlen; i++)
824 		if (ep->d_name[i] == 0)
825 			return (1);
826 	return (ep->d_name[i]);
827 }
828 
829 /*
830  * Write a directory entry after a call to namei, using the parameters
831  * which it left in the u. area.  The argument ip is the inode which
832  * the new directory entry will refer to.  The u. area field u.u_pdir is
833  * a pointer to the directory to be written, which was left locked by
834  * namei.  Remaining parameters (u.u_offset, u.u_count) indicate
835  * how the space for the new entry is to be gotten.
836  */
837 direnter(ip)
838 	struct inode *ip;
839 {
840 	register struct direct *ep, *nep;
841 	struct buf *bp;
842 	int loc, spacefree, error = 0;
843 	u_int dsize;
844 	int newentrysize;
845 	char *dirbuf;
846 
847 	u.u_dent.d_ino = ip->i_number;
848 	u.u_segflg = 1;
849 	newentrysize = DIRSIZ(&u.u_dent);
850 	if (u.u_count == 0) {
851 		/*
852 		 * If u.u_count is 0, then namei could find no space in the
853 		 * directory.  In this case u.u_offset will be on a directory
854 		 * block boundary and we will write the new entry into a fresh
855 		 * block.
856 		 */
857 		if (u.u_offset&(DIRBLKSIZ-1))
858 			panic("wdir: newblk");
859 		u.u_dent.d_reclen = DIRBLKSIZ;
860 		error = rdwri(UIO_WRITE, u.u_pdir, (caddr_t)&u.u_dent,
861 		    newentrysize, u.u_offset, 1, (int *)0);
862 		iput(u.u_pdir);
863 		return (error);
864 	}
865 
866 	/*
867 	 * If u.u_count is non-zero, then namei found space for the
868 	 * new entry in the range u.u_offset to u.u_offset+u.u_count.
869 	 * in the directory.  To use this space, we may have to compact
870 	 * the entries located there, by copying them together towards
871 	 * the beginning of the block, leaving the free space in
872 	 * one usable chunk at the end.
873 	 */
874 
875 	/*
876 	 * Increase size of directory if entry eats into new space.
877 	 * This should never push the size past a new multiple of
878 	 * DIRBLKSIZE.
879 	 */
880 	if (u.u_offset + u.u_count > u.u_pdir->i_size)
881 		u.u_pdir->i_size = u.u_offset + u.u_count;
882 
883 	/*
884 	 * Get the block containing the space for the new directory
885 	 * entry.  Should return error by result instead of u.u_error.
886 	 */
887 	bp = blkatoff(u.u_pdir, u.u_offset, (char **)&dirbuf);
888 	if (bp == 0) {
889 		iput(u.u_pdir);
890 		return (u.u_error);
891 	}
892 
893 	/*
894 	 * Find space for the new entry.  In the simple case, the
895 	 * entry at offset base will have the space.  If it does
896 	 * not, then namei arranged that compacting the region
897 	 * u.u_offset to u.u_offset+u.u_count would yield the space.
898 	 */
899 	ep = (struct direct *)dirbuf;
900 	dsize = DIRSIZ(ep);
901 	spacefree = ep->d_reclen - dsize;
902 	for (loc = ep->d_reclen; loc < u.u_count; ) {
903 		nep = (struct direct *)(dirbuf + loc);
904 		if (ep->d_ino) {
905 			/* trim the existing slot */
906 			ep->d_reclen = dsize;
907 			ep = (struct direct *)((char *)ep + dsize);
908 		} else {
909 			/* overwrite; nothing there; header is ours */
910 			spacefree += dsize;
911 		}
912 		dsize = DIRSIZ(nep);
913 		spacefree += nep->d_reclen - dsize;
914 		loc += nep->d_reclen;
915 		bcopy((caddr_t)nep, (caddr_t)ep, dsize);
916 	}
917 	/*
918 	 * Update the pointer fields in the previous entry (if any),
919 	 * copy in the new entry, and write out the block.
920 	 */
921 	if (ep->d_ino == 0) {
922 		if (spacefree + dsize < newentrysize)
923 			panic("wdir: compact1");
924 		u.u_dent.d_reclen = spacefree + dsize;
925 	} else {
926 		if (spacefree < newentrysize)
927 			panic("wdir: compact2");
928 		u.u_dent.d_reclen = spacefree;
929 		ep->d_reclen = dsize;
930 		ep = (struct direct *)((char *)ep + dsize);
931 	}
932 	bcopy((caddr_t)&u.u_dent, (caddr_t)ep, (u_int)newentrysize);
933 	bwrite(bp);
934 	u.u_pdir->i_flag |= IUPD|ICHG;
935 	iput(u.u_pdir);
936 	return (error);
937 }
938 
939 /*
940  * Remove a directory entry after a call to namei, using the
941  * parameters which it left in the u. area.  The u. entry
942  * u_offset contains the offset into the directory of the
943  * entry to be eliminated.  The u_count field contains the
944  * size of the previous record in the directory.  If this
945  * is 0, the first entry is being deleted, so we need only
946  * zero the inode number to mark the entry as free.  If the
947  * entry isn't the first in the directory, we must reclaim
948  * the space of the now empty record by adding the record size
949  * to the size of the previous entry.
950  */
951 dirremove()
952 {
953 	register struct inode *dp = u.u_pdir;
954 	register struct buf *bp;
955 	struct direct *ep;
956 
957 	if (u.u_count == 0) {
958 		/*
959 		 * First entry in block: set d_ino to zero.
960 		 */
961 		u.u_dent.d_ino = 0;
962 		(void) rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent,
963 		    (int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0);
964 	} else {
965 		/*
966 		 * Collapse new free space into previous entry.
967 		 */
968 		bp = blkatoff(dp, (int)(u.u_offset - u.u_count), (char **)&ep);
969 		if (bp == 0)
970 			return (0);
971 		ep->d_reclen += u.u_dent.d_reclen;
972 		bwrite(bp);
973 		dp->i_flag |= IUPD|ICHG;
974 	}
975 	return (1);
976 }
977 
978 /*
979  * Rewrite an existing directory entry to point at the inode
980  * supplied.  The parameters describing the directory entry are
981  * set up by a call to namei.
982  */
983 dirrewrite(dp, ip)
984 	struct inode *dp, *ip;
985 {
986 
987 	u.u_dent.d_ino = ip->i_number;
988 	u.u_error = rdwri(UIO_WRITE, dp, (caddr_t)&u.u_dent,
989 		(int)DIRSIZ(&u.u_dent), u.u_offset, 1, (int *)0);
990 	iput(dp);
991 }
992 
993 /*
994  * Return buffer with contents of block "offset"
995  * from the beginning of directory "ip".  If "res"
996  * is non-zero, fill it in with a pointer to the
997  * remaining space in the directory.
998  */
999 struct buf *
1000 blkatoff(ip, offset, res)
1001 	struct inode *ip;
1002 	off_t offset;
1003 	char **res;
1004 {
1005 	register struct fs *fs = ip->i_fs;
1006 	daddr_t lbn = lblkno(fs, offset);
1007 	int base = blkoff(fs, offset);
1008 	int bsize = blksize(fs, ip, lbn);
1009 	daddr_t bn = fsbtodb(fs, bmap(ip, lbn, B_WRITE, base, bsize));
1010 	register struct buf *bp;
1011 
1012 	if (u.u_error)
1013 		return (0);
1014 	bp = bread(ip->i_dev, bn, bsize);
1015 	if (bp->b_flags & B_ERROR) {
1016 		brelse(bp);
1017 		return (0);
1018 	}
1019 	if (res)
1020 		*res = bp->b_un.b_addr + base;
1021 	return (bp);
1022 }
1023 
1024 /*
1025  * Check if a directory is empty or not.
1026  * Inode supplied must be locked.
1027  *
1028  * Using a struct dirtemplate here is not precisely
1029  * what we want, but better than using a struct direct.
1030  *
1031  * NB: does not handle corrupted directories.
1032  */
1033 dirempty(ip)
1034 	register struct inode *ip;
1035 {
1036 	register off_t off;
1037 	struct dirtemplate dbuf;
1038 	register struct direct *dp = (struct direct *)&dbuf;
1039 	int error, count;
1040 #define	MINDIRSIZ (sizeof (struct dirtemplate) / 2)
1041 
1042 	for (off = 0; off < ip->i_size; off += dp->d_reclen) {
1043 		error = rdwri(UIO_READ, ip, (caddr_t)dp, MINDIRSIZ,
1044 		    off, 1, &count);
1045 		/*
1046 		 * Since we read MINDIRSIZ, residual must
1047 		 * be 0 unless we're at end of file.
1048 		 */
1049 		if (error || count != 0)
1050 			return (0);
1051 		/* skip empty entries */
1052 		if (dp->d_ino == 0)
1053 			continue;
1054 		/* accept only "." and ".." */
1055 		if (dp->d_namlen > 2)
1056 			return (0);
1057 		if (dp->d_name[0] != '.')
1058 			return (0);
1059 		/*
1060 		 * At this point d_namlen must be 1 or 2.
1061 		 * 1 implies ".", 2 implies ".." if second
1062 		 * char is also "."
1063 		 */
1064 		if (dp->d_namlen == 1 || dp->d_name[1] == '.')
1065 			continue;
1066 		return (0);
1067 	}
1068 	return (1);
1069 }
1070 
1071 /*
1072  * Check if source directory is in the path of the target directory.
1073  * Target is supplied locked, source is unlocked.
1074  * The target is always iput() before returning.
1075  */
1076 checkpath(source, target)
1077 	struct inode *source, *target;
1078 {
1079 	struct dirtemplate dirbuf;
1080 	register struct inode *ip;
1081 	int error = 0;
1082 
1083 	ip = target;
1084 	if (ip->i_number == source->i_number) {
1085 		error = EEXIST;
1086 		goto out;
1087 	}
1088 	if (ip->i_number == ROOTINO)
1089 		goto out;
1090 
1091 	for (;;) {
1092 		if ((ip->i_mode&IFMT) != IFDIR) {
1093 			error = ENOTDIR;
1094 			break;
1095 		}
1096 		error = rdwri(UIO_READ, ip, (caddr_t)&dirbuf,
1097 			sizeof (struct dirtemplate), (off_t)0, 1, (int *)0);
1098 		if (error != 0)
1099 			break;
1100 		if (dirbuf.dotdot_namlen != 2 ||
1101 		    bcmp(dirbuf.dotdot_name, "..", 3) != 0) {
1102 			error = ENOTDIR;
1103 			break;
1104 		}
1105 		if (dirbuf.dotdot_ino == source->i_number) {
1106 			error = EINVAL;
1107 			break;
1108 		}
1109 		if (dirbuf.dotdot_ino == ROOTINO)
1110 			break;
1111 		iput(ip);
1112 		ip = iget(ip->i_dev, ip->i_fs, dirbuf.dotdot_ino);
1113 		if (ip == NULL) {
1114 			error = u.u_error;
1115 			break;
1116 		}
1117 	}
1118 
1119 out:
1120 	if (error == ENOTDIR)
1121 		printf("checkpath: .. not a directory\n");
1122 	if (ip != NULL)
1123 		iput(ip);
1124 	return (error);
1125 }
1126 
1127 /*
1128  * Name cache initialization, from main() when we are booting
1129  */
1130 nchinit()
1131 {
1132 	register union nchash *nchp;
1133 	register struct nch *ncp;
1134 
1135 	nchhead = 0;
1136 	nchtail = &nchhead;
1137 
1138 	for (ncp = nch; ncp < &nch[nchsize]; ncp++) {
1139 		ncp->nc_forw = ncp;			/* hash chain */
1140 		ncp->nc_back = ncp;
1141 
1142 		ncp->nc_nxt = NULL;			/* lru chain */
1143 		*nchtail = ncp;
1144 		ncp->nc_prev = nchtail;
1145 		nchtail = &ncp->nc_nxt;
1146 
1147 		/* all else is zero already */
1148 	}
1149 
1150 	for (nchp = nchash; nchp < &nchash[NCHHASH]; nchp++) {
1151 		nchp->nch_head[0] = nchp;
1152 		nchp->nch_head[1] = nchp;
1153 	}
1154 }
1155 
1156 /*
1157  * Cache flush, called when filesys is umounted to
1158  * remove entries that would now be invalid
1159  *
1160  * The line "nxtcp = nchhead" near the end is to avoid potential problems
1161  * if the cache lru chain is modified while we are dumping the
1162  * inode.  This makes the algorithm O(n^2), but do you think I care?
1163  */
1164 nchinval(dev)
1165 	register dev_t dev;
1166 {
1167 	register struct nch *ncp, *nxtcp;
1168 
1169 	for (ncp = nchhead; ncp; ncp = nxtcp) {
1170 		nxtcp = ncp->nc_nxt;
1171 
1172 		if (ncp->nc_ip == NULL ||
1173 		    (ncp->nc_idev != dev && ncp->nc_dev != dev))
1174 			continue;
1175 
1176 		ncp->nc_idev = NODEV;
1177 		ncp->nc_dev = NODEV;
1178 		ncp->nc_ino = 0;
1179 
1180 			/* remove the entry from its hash chain */
1181 		remque(ncp);
1182 			/* and make a dummy one */
1183 		ncp->nc_forw = ncp;
1184 		ncp->nc_back = ncp;
1185 
1186 			/* delete this entry from LRU chain */
1187 		*ncp->nc_prev = nxtcp;
1188 		if (nxtcp)
1189 			nxtcp->nc_prev = ncp->nc_prev;
1190 		else
1191 			nchtail = ncp->nc_prev;
1192 
1193 			/* free the inode we had */
1194 		irele(ncp->nc_ip);
1195 		ncp->nc_ip = NULL;
1196 
1197 			/* cause rescan of list, it may have altered */
1198 		nxtcp = nchhead;
1199 			/* put the now-free entry at head of LRU */
1200 		ncp->nc_nxt = nxtcp;
1201 		ncp->nc_prev = &nchhead;
1202 		nxtcp->nc_prev = &ncp->nc_nxt;
1203 		nchhead = ncp;
1204 	}
1205 }
1206